oneforall-kjl 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. OneForAll/__init__.py +15 -0
  2. OneForAll/brute.py +503 -0
  3. OneForAll/common/check.py +41 -0
  4. OneForAll/common/crawl.py +10 -0
  5. OneForAll/common/database.py +277 -0
  6. OneForAll/common/domain.py +63 -0
  7. OneForAll/common/ipasn.py +42 -0
  8. OneForAll/common/ipreg.py +139 -0
  9. OneForAll/common/lookup.py +28 -0
  10. OneForAll/common/module.py +369 -0
  11. OneForAll/common/query.py +9 -0
  12. OneForAll/common/records.py +363 -0
  13. OneForAll/common/request.py +264 -0
  14. OneForAll/common/resolve.py +173 -0
  15. OneForAll/common/search.py +78 -0
  16. OneForAll/common/similarity.py +138 -0
  17. OneForAll/common/tablib/__init__.py +0 -0
  18. OneForAll/common/tablib/format.py +89 -0
  19. OneForAll/common/tablib/tablib.py +360 -0
  20. OneForAll/common/tldextract.py +240 -0
  21. OneForAll/common/utils.py +789 -0
  22. OneForAll/config/__init__.py +17 -0
  23. OneForAll/config/api.py +94 -0
  24. OneForAll/config/default.py +255 -0
  25. OneForAll/config/log.py +38 -0
  26. OneForAll/config/setting.py +108 -0
  27. OneForAll/export.py +72 -0
  28. OneForAll/modules/altdns.py +216 -0
  29. OneForAll/modules/autotake/github.py +105 -0
  30. OneForAll/modules/certificates/censys_api.py +73 -0
  31. OneForAll/modules/certificates/certspotter.py +48 -0
  32. OneForAll/modules/certificates/crtsh.py +84 -0
  33. OneForAll/modules/certificates/google.py +48 -0
  34. OneForAll/modules/certificates/myssl.py +46 -0
  35. OneForAll/modules/certificates/racent.py +49 -0
  36. OneForAll/modules/check/axfr.py +97 -0
  37. OneForAll/modules/check/cdx.py +44 -0
  38. OneForAll/modules/check/cert.py +58 -0
  39. OneForAll/modules/check/csp.py +94 -0
  40. OneForAll/modules/check/nsec.py +58 -0
  41. OneForAll/modules/check/robots.py +44 -0
  42. OneForAll/modules/check/sitemap.py +44 -0
  43. OneForAll/modules/collect.py +70 -0
  44. OneForAll/modules/crawl/archivecrawl.py +59 -0
  45. OneForAll/modules/crawl/commoncrawl.py +59 -0
  46. OneForAll/modules/datasets/anubis.py +45 -0
  47. OneForAll/modules/datasets/bevigil.py +50 -0
  48. OneForAll/modules/datasets/binaryedge_api.py +50 -0
  49. OneForAll/modules/datasets/cebaidu.py +45 -0
  50. OneForAll/modules/datasets/chinaz.py +45 -0
  51. OneForAll/modules/datasets/chinaz_api.py +49 -0
  52. OneForAll/modules/datasets/circl_api.py +49 -0
  53. OneForAll/modules/datasets/cloudflare_api.py +130 -0
  54. OneForAll/modules/datasets/dnsdb_api.py +51 -0
  55. OneForAll/modules/datasets/dnsdumpster.py +52 -0
  56. OneForAll/modules/datasets/dnsgrep.py +44 -0
  57. OneForAll/modules/datasets/fullhunt.py +48 -0
  58. OneForAll/modules/datasets/hackertarget.py +45 -0
  59. OneForAll/modules/datasets/ip138.py +45 -0
  60. OneForAll/modules/datasets/ipv4info_api.py +73 -0
  61. OneForAll/modules/datasets/netcraft.py +66 -0
  62. OneForAll/modules/datasets/passivedns_api.py +51 -0
  63. OneForAll/modules/datasets/qianxun.py +61 -0
  64. OneForAll/modules/datasets/rapiddns.py +45 -0
  65. OneForAll/modules/datasets/riddler.py +45 -0
  66. OneForAll/modules/datasets/robtex.py +58 -0
  67. OneForAll/modules/datasets/securitytrails_api.py +56 -0
  68. OneForAll/modules/datasets/sitedossier.py +57 -0
  69. OneForAll/modules/datasets/spyse_api.py +62 -0
  70. OneForAll/modules/datasets/sublist3r.py +45 -0
  71. OneForAll/modules/datasets/urlscan.py +45 -0
  72. OneForAll/modules/datasets/windvane.py +92 -0
  73. OneForAll/modules/dnsquery/mx.py +35 -0
  74. OneForAll/modules/dnsquery/ns.py +35 -0
  75. OneForAll/modules/dnsquery/soa.py +35 -0
  76. OneForAll/modules/dnsquery/spf.py +35 -0
  77. OneForAll/modules/dnsquery/txt.py +35 -0
  78. OneForAll/modules/enrich.py +72 -0
  79. OneForAll/modules/finder.py +206 -0
  80. OneForAll/modules/intelligence/alienvault.py +50 -0
  81. OneForAll/modules/intelligence/riskiq_api.py +58 -0
  82. OneForAll/modules/intelligence/threatbook_api.py +50 -0
  83. OneForAll/modules/intelligence/threatminer.py +45 -0
  84. OneForAll/modules/intelligence/virustotal.py +60 -0
  85. OneForAll/modules/intelligence/virustotal_api.py +59 -0
  86. OneForAll/modules/iscdn.py +86 -0
  87. OneForAll/modules/search/ask.py +69 -0
  88. OneForAll/modules/search/baidu.py +96 -0
  89. OneForAll/modules/search/bing.py +79 -0
  90. OneForAll/modules/search/bing_api.py +78 -0
  91. OneForAll/modules/search/fofa_api.py +74 -0
  92. OneForAll/modules/search/gitee.py +71 -0
  93. OneForAll/modules/search/github_api.py +86 -0
  94. OneForAll/modules/search/google.py +83 -0
  95. OneForAll/modules/search/google_api.py +77 -0
  96. OneForAll/modules/search/hunter_api.py +72 -0
  97. OneForAll/modules/search/quake_api.py +72 -0
  98. OneForAll/modules/search/shodan_api.py +53 -0
  99. OneForAll/modules/search/so.py +75 -0
  100. OneForAll/modules/search/sogou.py +72 -0
  101. OneForAll/modules/search/wzsearch.py +68 -0
  102. OneForAll/modules/search/yahoo.py +81 -0
  103. OneForAll/modules/search/yandex.py +80 -0
  104. OneForAll/modules/search/zoomeye_api.py +73 -0
  105. OneForAll/modules/srv.py +75 -0
  106. OneForAll/modules/wildcard.py +319 -0
  107. OneForAll/oneforall.py +275 -0
  108. OneForAll/takeover.py +168 -0
  109. OneForAll/test.py +23 -0
  110. oneforall_kjl-0.1.1.dist-info/METADATA +18 -0
  111. oneforall_kjl-0.1.1.dist-info/RECORD +114 -0
  112. oneforall_kjl-0.1.1.dist-info/WHEEL +5 -0
  113. oneforall_kjl-0.1.1.dist-info/entry_points.txt +2 -0
  114. oneforall_kjl-0.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,71 @@
1
+ import time
2
+ from bs4 import BeautifulSoup
3
+ from common.search import Search
4
+ from config.log import logger
5
+
6
+
7
+ class Gitee(Search):
8
+ def __init__(self, domain):
9
+ Search.__init__(self)
10
+ self.source = 'GiteeSearch'
11
+ self.module = 'Search'
12
+ self.addr = 'https://search.gitee.com/'
13
+ self.domain = domain
14
+
15
+ def search(self):
16
+ """
17
+ 向接口查询子域并做子域匹配
18
+ """
19
+ page_num = 1
20
+ while True:
21
+ time.sleep(self.delay)
22
+ self.header = self.get_header()
23
+ self.proxy = self.get_proxy(self.source)
24
+ params = {'pageno': page_num, 'q': self.domain, 'type': 'code'}
25
+ try:
26
+ resp = self.get(self.addr, params=params)
27
+ except Exception as e:
28
+ logger.log('ERROR', e.args)
29
+ break
30
+ if not resp:
31
+ break
32
+ if resp.status_code != 200:
33
+ logger.log('ERROR', f'{self.source} module query failed')
34
+ break
35
+ if 'class="empty-box"' in resp.text:
36
+ break
37
+ soup = BeautifulSoup(resp.text, 'html.parser')
38
+ subdomains = self.match_subdomains(soup, fuzzy=False)
39
+ if not self.check_subdomains(subdomains):
40
+ break
41
+ self.subdomains.update(subdomains)
42
+ if '<li class="disabled"><a href="###">' in resp.text:
43
+ break
44
+ page_num += 1
45
+ if page_num >= 100:
46
+ break
47
+
48
+ def run(self):
49
+ """
50
+ 类执行入口
51
+ """
52
+ self.begin()
53
+ self.search()
54
+ self.finish()
55
+ self.save_json()
56
+ self.gen_result()
57
+ self.save_db()
58
+
59
+
60
+ def run(domain):
61
+ """
62
+ 类统一调用入口
63
+
64
+ :param str domain: 域名
65
+ """
66
+ query = Gitee(domain)
67
+ query.run()
68
+
69
+
70
+ if __name__ == '__main__':
71
+ run('qq.com')
@@ -0,0 +1,86 @@
1
+ import time
2
+
3
+ from config import settings
4
+ from common.search import Search
5
+ from config.log import logger
6
+
7
+
8
+ class GithubAPI(Search):
9
+ def __init__(self, domain):
10
+ Search.__init__(self)
11
+ self.source = 'GithubAPISearch'
12
+ self.module = 'Search'
13
+ self.addr = 'https://api.github.com/search/code'
14
+ self.domain = domain
15
+ self.delay = 5
16
+ self.token = settings.github_api_token
17
+
18
+ def search(self):
19
+ """
20
+ 向接口查询子域并做子域匹配
21
+ """
22
+ self.header = self.get_header()
23
+ self.proxy = self.get_proxy(self.source)
24
+ self.header.update(
25
+ {'Accept': 'application/vnd.github.v3.text-match+json'})
26
+ self.header.update(
27
+ {'Authorization': 'token ' + self.token})
28
+
29
+ page = 1
30
+ while True:
31
+ time.sleep(self.delay)
32
+ params = {'q': self.domain, 'per_page': 100,
33
+ 'page': page, 'sort': 'indexed',
34
+ 'access_token': self.token}
35
+ try:
36
+ resp = self.get(self.addr, params=params)
37
+ except Exception as e:
38
+ logger.log('ERROR', e.args)
39
+ break
40
+ if not resp or resp.status_code != 200:
41
+ logger.log('ERROR', f'{self.source} module query failed')
42
+ break
43
+ subdomains = self.match_subdomains(resp)
44
+ if not subdomains:
45
+ break
46
+ self.subdomains.update(subdomains)
47
+ page += 1
48
+ try:
49
+ resp_json = resp.json()
50
+ except Exception as e:
51
+ logger.log('ERROR', e.args)
52
+ break
53
+ total_count = resp_json.get('total_count')
54
+ if not isinstance(total_count, int):
55
+ break
56
+ if page * 100 > total_count:
57
+ break
58
+ if page * 100 > 1000:
59
+ break
60
+
61
+ def run(self):
62
+ """
63
+ 类执行入口
64
+ """
65
+ if not self.have_api(self.token):
66
+ return
67
+ self.begin()
68
+ self.search()
69
+ self.finish()
70
+ self.save_json()
71
+ self.gen_result()
72
+ self.save_db()
73
+
74
+
75
+ def run(domain):
76
+ """
77
+ 类统一调用入口
78
+
79
+ :param str domain: 域名
80
+ """
81
+ query = GithubAPI(domain)
82
+ query.run()
83
+
84
+
85
+ if __name__ == '__main__':
86
+ run('freebuf.com')
@@ -0,0 +1,83 @@
1
+ import random
2
+ import time
3
+ from common.search import Search
4
+
5
+
6
+ class Google(Search):
7
+ def __init__(self, domain):
8
+ Search.__init__(self)
9
+ self.domain = domain
10
+ self.module = 'Search'
11
+ self.source = 'GoogleSearch'
12
+ self.init = 'https://www.google.com/'
13
+ self.addr = 'https://www.google.com/search'
14
+
15
+ def search(self, domain, filtered_subdomain=''):
16
+ """
17
+ 发送搜索请求并做子域匹配
18
+
19
+ :param str domain: 域名
20
+ :param str filtered_subdomain: 过滤的子域
21
+ """
22
+ page_num = 1
23
+ per_page_num = 50
24
+ self.header = self.get_header()
25
+ self.header.update({'User-Agent': 'Googlebot',
26
+ 'Referer': 'https://www.google.com'})
27
+ self.proxy = self.get_proxy(self.source)
28
+ resp = self.get(self.init)
29
+ if not resp:
30
+ return
31
+ self.cookie = resp.cookies
32
+ while True:
33
+ self.delay = random.randint(1, 5)
34
+ time.sleep(self.delay)
35
+ self.proxy = self.get_proxy(self.source)
36
+ word = 'site:.' + domain + filtered_subdomain
37
+ payload = {'q': word, 'start': page_num, 'num': per_page_num,
38
+ 'filter': '0', 'btnG': 'Search', 'gbv': '1', 'hl': 'en'}
39
+ resp = self.get(url=self.addr, params=payload)
40
+ subdomains = self.match_subdomains(resp, fuzzy=False)
41
+ if not self.check_subdomains(subdomains):
42
+ break
43
+ self.subdomains.update(subdomains)
44
+ page_num += per_page_num
45
+ if 'start=' + str(page_num) not in resp.text:
46
+ break
47
+ if '302 Moved' in resp.text:
48
+ break
49
+
50
+ def run(self):
51
+ """
52
+ 类执行入口
53
+ """
54
+ self.begin()
55
+
56
+ self.search(self.domain)
57
+
58
+ # 排除同一子域搜索结果过多的子域以发现新的子域
59
+ for statement in self.filter(self.domain, self.subdomains):
60
+ self.search(self.domain, filtered_subdomain=statement)
61
+
62
+ # 递归搜索下一层的子域
63
+ if self.recursive_search:
64
+ for subdomain in self.recursive_subdomain():
65
+ self.search(subdomain)
66
+ self.finish()
67
+ self.save_json()
68
+ self.gen_result()
69
+ self.save_db()
70
+
71
+
72
+ def run(domain):
73
+ """
74
+ 类统一调用入口
75
+
76
+ :param str domain: 域名
77
+ """
78
+ search = Google(domain)
79
+ search.run()
80
+
81
+
82
+ if __name__ == '__main__':
83
+ run('example.com')
@@ -0,0 +1,77 @@
1
+ import time
2
+ from config import settings
3
+ from common.search import Search
4
+
5
+
6
+ class GoogleAPI(Search):
7
+ def __init__(self, domain):
8
+ Search.__init__(self)
9
+ self.domain = domain
10
+ self.module = 'Search'
11
+ self.source = 'GoogleAPISearch'
12
+ self.addr = 'https://www.googleapis.com/customsearch/v1'
13
+ self.delay = 1
14
+ self.key = settings.google_api_key
15
+ self.id = settings.google_api_id
16
+ self.per_page_num = 10 # 每次只能请求10个结果
17
+
18
+ def search(self, domain, filtered_subdomain=''):
19
+ """
20
+ 发送搜索请求并做子域匹配
21
+
22
+ :param str domain: 域名
23
+ :param str filtered_subdomain: 过滤的子域
24
+ """
25
+ self.page_num = 1
26
+ while True:
27
+ word = 'site:.' + domain + filtered_subdomain
28
+ time.sleep(self.delay)
29
+ self.header = self.get_header()
30
+ self.proxy = self.get_proxy(self.source)
31
+ params = {'key': self.key, 'cx': self.id,
32
+ 'q': word, 'fields': 'items/link',
33
+ 'start': self.page_num, 'num': self.per_page_num}
34
+ resp = self.get(self.addr, params)
35
+ subdomains = self.match_subdomains(resp)
36
+ if not self.check_subdomains(subdomains):
37
+ break
38
+ self.subdomains.update(subdomains)
39
+ self.page_num += self.per_page_num
40
+ if self.page_num > 100: # 免费的API只能查询前100条结果
41
+ break
42
+
43
+ def run(self):
44
+ """
45
+ 类执行入口
46
+ """
47
+ if not self.have_api(self.id, self.key):
48
+ return
49
+ self.begin()
50
+ self.search(self.domain)
51
+
52
+ # 排除同一子域搜索结果过多的子域以发现新的子域
53
+ for statement in self.filter(self.domain, self.subdomains):
54
+ self.search(self.domain, filtered_subdomain=statement)
55
+
56
+ # 递归搜索下一层的子域
57
+ if self.recursive_search:
58
+ for subdomain in self.recursive_subdomain():
59
+ self.search(subdomain)
60
+ self.finish()
61
+ self.save_json()
62
+ self.gen_result()
63
+ self.save_db()
64
+
65
+
66
+ def run(domain):
67
+ """
68
+ 类统一调用入口
69
+
70
+ :param str domain: 域名
71
+ """
72
+ search = GoogleAPI(domain)
73
+ search.run()
74
+
75
+
76
+ if __name__ == '__main__':
77
+ run('mi.com')
@@ -0,0 +1,72 @@
1
+ import base64
2
+ import time
3
+
4
+ from config import settings
5
+ from common.search import Search
6
+
7
+
8
+ class Hunter(Search):
9
+ def __init__(self, domain):
10
+ Search.__init__(self)
11
+ self.domain = domain
12
+ self.module = 'Search'
13
+ self.source = 'HunterAPISearch'
14
+ self.addr = 'https://hunter.qianxin.com/openApi/search'
15
+ self.delay = 1
16
+ self.key = settings.hunter_api_key
17
+
18
+ def search(self):
19
+ """
20
+ 发送搜索请求并做子域匹配
21
+ """
22
+ self.page_num = 1
23
+ subdomain_encode = f'domain_suffix="{self.domain}"'.encode('utf-8')
24
+ query_data = base64.b64encode(subdomain_encode)
25
+ while 100 * self.page_num < settings.cam_records_maximum_per_domain:
26
+ time.sleep(self.delay)
27
+ self.header = self.get_header()
28
+ self.proxy = self.get_proxy(self.source)
29
+ query = {'api-key': self.key,
30
+ 'search': query_data,
31
+ 'page': self.page_num,
32
+ 'page_size': 100,
33
+ 'is_web': 1}
34
+ resp = self.get(self.addr, query)
35
+ if not resp:
36
+ return
37
+ resp_json = resp.json()
38
+ subdomains = self.match_subdomains(resp)
39
+ if not subdomains: # 搜索没有发现子域名则停止搜索
40
+ break
41
+ self.subdomains.update(subdomains)
42
+ total = resp_json.get('data').get('total')
43
+ if self.page_num * 100 >= int(total):
44
+ break
45
+ self.page_num += 1
46
+
47
+ def run(self):
48
+ """
49
+ 类执行入口
50
+ """
51
+ if not self.have_api(self.key):
52
+ return
53
+ self.begin()
54
+ self.search()
55
+ self.finish()
56
+ self.save_json()
57
+ self.gen_result()
58
+ self.save_db()
59
+
60
+
61
+ def run(domain):
62
+ """
63
+ 类统一调用入口
64
+
65
+ :param str domain: 域名
66
+ """
67
+ search = Hunter(domain)
68
+ search.run()
69
+
70
+
71
+ if __name__ == '__main__':
72
+ run('freebuf.com')
@@ -0,0 +1,72 @@
1
+ import time
2
+
3
+ from config import settings
4
+ from common.search import Search
5
+
6
+
7
+ class Quake(Search):
8
+ def __init__(self, domain):
9
+ Search.__init__(self)
10
+ self.domain = domain
11
+ self.module = 'Quake'
12
+ self.source = "QuakeAPISearch"
13
+ self.addr = 'https://quake.360.net/api/v3/search/quake_service'
14
+ self.delay = 1
15
+ self.key = settings.quake_api_key
16
+
17
+ def search(self):
18
+ """
19
+ 发送搜索请求并做子域匹配
20
+ """
21
+ self.per_page_num = 100
22
+ self.page_num = 0
23
+ while self.per_page_num * self.page_num < settings.cam_records_maximum_per_domain:
24
+ time.sleep(self.delay)
25
+ self.header = self.get_header()
26
+ self.header.update({'Content-Type': 'application/json'})
27
+ self.header.update({'X-QuakeToken': self.key})
28
+ self.proxy = self.get_proxy(self.source)
29
+
30
+ query = {'query': 'domain:"' + self.domain + '"',
31
+ 'start': self.page_num * self.per_page_num,
32
+ 'size': self.per_page_num,
33
+ 'include': ["service.http.host"]}
34
+ resp = self.post(self.addr, json=query)
35
+ if not resp:
36
+ return
37
+ resp_json = resp.json()
38
+ subdomains = self.match_subdomains(resp)
39
+ if not subdomains: # 搜索没有发现子域名则停止搜索
40
+ break
41
+ self.subdomains.update(subdomains)
42
+ total = resp_json.get('meta').get('pagination').get('total')
43
+ self.page_num += 1
44
+ if self.page_num * self.per_page_num >= int(total):
45
+ break
46
+
47
+ def run(self):
48
+ """
49
+ 类执行入口
50
+ """
51
+ if not self.have_api(self.key):
52
+ return
53
+ self.begin()
54
+ self.search()
55
+ self.finish()
56
+ self.save_json()
57
+ self.gen_result()
58
+ self.save_db()
59
+
60
+
61
+ def run(domain):
62
+ """
63
+ 类统一调用入口
64
+
65
+ :param str domain: 域名
66
+ """
67
+ query = Quake(domain)
68
+ query.run()
69
+
70
+
71
+ if __name__ == '__main__':
72
+ run('nosugartech.com')
@@ -0,0 +1,53 @@
1
+ from config import settings
2
+ from common.search import Search
3
+
4
+
5
+ class ShodanAPI(Search):
6
+ def __init__(self, domain):
7
+ Search.__init__(self)
8
+ self.domain = domain
9
+ self.module = 'Search'
10
+ self.source = 'ShodanAPISearch'
11
+ self.key = settings.shodan_api_key
12
+
13
+ def search(self):
14
+ """
15
+ 发送搜索请求并做子域匹配
16
+ """
17
+ self.header = self.get_header()
18
+ self.proxy = self.get_proxy(self.source)
19
+ url = f'https://api.shodan.io/dns/domain/{self.domain}?key={self.key}'
20
+ resp = self.get(url)
21
+ if not resp:
22
+ return
23
+ data = resp.json()
24
+ names = data.get('subdomains')
25
+ subdomain_str = str(set(map(lambda name: f'{name}.{self.domain}', names)))
26
+ self.subdomains = self.collect_subdomains(subdomain_str)
27
+
28
+ def run(self):
29
+ """
30
+ 类执行入口
31
+ """
32
+ if not self.have_api(self.key):
33
+ return
34
+ self.begin()
35
+ self.search()
36
+ self.finish()
37
+ self.save_json()
38
+ self.gen_result()
39
+ self.save_db()
40
+
41
+
42
+ def run(domain):
43
+ """
44
+ 类统一调用入口
45
+
46
+ :param str domain: 域名
47
+ """
48
+ search = ShodanAPI(domain)
49
+ search.run()
50
+
51
+
52
+ if __name__ == '__main__':
53
+ run('freebuf.com')
@@ -0,0 +1,75 @@
1
+ import time
2
+
3
+ from common.search import Search
4
+
5
+
6
+ class So(Search):
7
+ def __init__(self, domain):
8
+ Search.__init__(self)
9
+ self.domain = domain
10
+ self.module = 'Search'
11
+ self.source = 'SoSearch'
12
+ self.addr = 'https://www.so.com/s'
13
+ self.limit_num = 640 # 限制搜索条数
14
+ self.per_page_num = 10 # 默认每页显示10页
15
+
16
+ def search(self, domain, filtered_subdomain=''):
17
+ """
18
+ 发送搜索请求并做子域匹配
19
+
20
+ :param str domain: 域名
21
+ :param str filtered_subdomain: 过滤的子域
22
+ """
23
+ page_num = 1
24
+ while True:
25
+ time.sleep(self.delay)
26
+ self.header = self.get_header()
27
+ self.proxy = self.get_proxy(self.source)
28
+ word = 'site:.' + domain + filtered_subdomain
29
+ payload = {'q': word, 'pn': page_num}
30
+ resp = self.get(url=self.addr, params=payload)
31
+ subdomains = self.match_subdomains(resp, fuzzy=False)
32
+ if not self.check_subdomains(subdomains):
33
+ break
34
+ self.subdomains.update(subdomains)
35
+ page_num += 1
36
+ # 搜索页面没有出现下一页时停止搜索
37
+ if '<a id="snext"' not in resp.text:
38
+ break
39
+ # 搜索条数限制
40
+ if self.page_num * self.per_page_num >= self.limit_num:
41
+ break
42
+
43
+ def run(self):
44
+ """
45
+ 类执行入口
46
+ """
47
+ self.begin()
48
+ self.search(self.domain)
49
+
50
+ # 排除同一子域搜索结果过多的子域以发现新的子域
51
+ for statement in self.filter(self.domain, self.subdomains):
52
+ self.search(self.domain, filtered_subdomain=statement)
53
+
54
+ # 递归搜索下一层的子域
55
+ if self.recursive_search:
56
+ for subdomain in self.recursive_subdomain():
57
+ self.search(subdomain)
58
+ self.finish()
59
+ self.save_json()
60
+ self.gen_result()
61
+ self.save_db()
62
+
63
+
64
+ def run(domain):
65
+ """
66
+ 类统一调用入口
67
+
68
+ :param str domain: 域名
69
+ """
70
+ search = So(domain)
71
+ search.run()
72
+
73
+
74
+ if __name__ == '__main__':
75
+ run('mi.com')
@@ -0,0 +1,72 @@
1
+ from common.search import Search
2
+
3
+
4
+ class Sogou(Search):
5
+ def __init__(self, domain):
6
+ Search.__init__(self)
7
+ self.domain = domain
8
+ self.module = 'Search'
9
+ self.source = 'SogouSearch'
10
+ self.addr = 'https://www.sogou.com/web'
11
+ self.limit_num = 1000 # 限制搜索条数
12
+
13
+ def search(self, domain, filtered_subdomain=''):
14
+ """
15
+ 发送搜索请求并做子域匹配
16
+
17
+ :param str domain: 域名
18
+ :param str filtered_subdomain: 过滤的子域
19
+ """
20
+ self.page_num = 1
21
+ while True:
22
+ self.header = self.get_header()
23
+ self.proxy = self.get_proxy(self.source)
24
+ word = 'site:.' + domain + filtered_subdomain
25
+ payload = {'query': word, 'page': self.page_num,
26
+ "num": self.per_page_num}
27
+ resp = self.get(self.addr, payload)
28
+ subdomains = self.match_subdomains(resp, fuzzy=False)
29
+ if not self.check_subdomains(subdomains):
30
+ break
31
+ self.subdomains.update(subdomains)
32
+ self.page_num += 1
33
+ # 搜索页面没有出现下一页时停止搜索
34
+ if '<a id="sogou_next"' not in resp.text:
35
+ break
36
+ # 搜索条数限制
37
+ if self.page_num * self.per_page_num >= self.limit_num:
38
+ break
39
+
40
+ def run(self):
41
+ """
42
+ 类执行入口
43
+ """
44
+ self.begin()
45
+ self.search(self.domain)
46
+
47
+ # 排除同一子域搜索结果过多的子域以发现新的子域
48
+ for statement in self.filter(self.domain, self.subdomains):
49
+ self.search(self.domain, filtered_subdomain=statement)
50
+
51
+ # 递归搜索下一层的子域
52
+ if self.recursive_search:
53
+ for subdomain in self.recursive_subdomain():
54
+ self.search(subdomain)
55
+ self.finish()
56
+ self.save_json()
57
+ self.gen_result()
58
+ self.save_db()
59
+
60
+
61
+ def run(domain):
62
+ """
63
+ 类统一调用入口
64
+
65
+ :param str domain: 域名
66
+ """
67
+ search = Sogou(domain)
68
+ search.run()
69
+
70
+
71
+ if __name__ == '__main__':
72
+ run('example.com')