oneforall-kjl 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. OneForAll/__init__.py +15 -0
  2. OneForAll/brute.py +503 -0
  3. OneForAll/common/check.py +41 -0
  4. OneForAll/common/crawl.py +10 -0
  5. OneForAll/common/database.py +277 -0
  6. OneForAll/common/domain.py +63 -0
  7. OneForAll/common/ipasn.py +42 -0
  8. OneForAll/common/ipreg.py +139 -0
  9. OneForAll/common/lookup.py +28 -0
  10. OneForAll/common/module.py +369 -0
  11. OneForAll/common/query.py +9 -0
  12. OneForAll/common/records.py +363 -0
  13. OneForAll/common/request.py +264 -0
  14. OneForAll/common/resolve.py +173 -0
  15. OneForAll/common/search.py +78 -0
  16. OneForAll/common/similarity.py +138 -0
  17. OneForAll/common/tablib/__init__.py +0 -0
  18. OneForAll/common/tablib/format.py +89 -0
  19. OneForAll/common/tablib/tablib.py +360 -0
  20. OneForAll/common/tldextract.py +240 -0
  21. OneForAll/common/utils.py +789 -0
  22. OneForAll/config/__init__.py +17 -0
  23. OneForAll/config/api.py +94 -0
  24. OneForAll/config/default.py +255 -0
  25. OneForAll/config/log.py +38 -0
  26. OneForAll/config/setting.py +108 -0
  27. OneForAll/export.py +72 -0
  28. OneForAll/modules/altdns.py +216 -0
  29. OneForAll/modules/autotake/github.py +105 -0
  30. OneForAll/modules/certificates/censys_api.py +73 -0
  31. OneForAll/modules/certificates/certspotter.py +48 -0
  32. OneForAll/modules/certificates/crtsh.py +84 -0
  33. OneForAll/modules/certificates/google.py +48 -0
  34. OneForAll/modules/certificates/myssl.py +46 -0
  35. OneForAll/modules/certificates/racent.py +49 -0
  36. OneForAll/modules/check/axfr.py +97 -0
  37. OneForAll/modules/check/cdx.py +44 -0
  38. OneForAll/modules/check/cert.py +58 -0
  39. OneForAll/modules/check/csp.py +94 -0
  40. OneForAll/modules/check/nsec.py +58 -0
  41. OneForAll/modules/check/robots.py +44 -0
  42. OneForAll/modules/check/sitemap.py +44 -0
  43. OneForAll/modules/collect.py +70 -0
  44. OneForAll/modules/crawl/archivecrawl.py +59 -0
  45. OneForAll/modules/crawl/commoncrawl.py +59 -0
  46. OneForAll/modules/datasets/anubis.py +45 -0
  47. OneForAll/modules/datasets/bevigil.py +50 -0
  48. OneForAll/modules/datasets/binaryedge_api.py +50 -0
  49. OneForAll/modules/datasets/cebaidu.py +45 -0
  50. OneForAll/modules/datasets/chinaz.py +45 -0
  51. OneForAll/modules/datasets/chinaz_api.py +49 -0
  52. OneForAll/modules/datasets/circl_api.py +49 -0
  53. OneForAll/modules/datasets/cloudflare_api.py +130 -0
  54. OneForAll/modules/datasets/dnsdb_api.py +51 -0
  55. OneForAll/modules/datasets/dnsdumpster.py +52 -0
  56. OneForAll/modules/datasets/dnsgrep.py +44 -0
  57. OneForAll/modules/datasets/fullhunt.py +48 -0
  58. OneForAll/modules/datasets/hackertarget.py +45 -0
  59. OneForAll/modules/datasets/ip138.py +45 -0
  60. OneForAll/modules/datasets/ipv4info_api.py +73 -0
  61. OneForAll/modules/datasets/netcraft.py +66 -0
  62. OneForAll/modules/datasets/passivedns_api.py +51 -0
  63. OneForAll/modules/datasets/qianxun.py +61 -0
  64. OneForAll/modules/datasets/rapiddns.py +45 -0
  65. OneForAll/modules/datasets/riddler.py +45 -0
  66. OneForAll/modules/datasets/robtex.py +58 -0
  67. OneForAll/modules/datasets/securitytrails_api.py +56 -0
  68. OneForAll/modules/datasets/sitedossier.py +57 -0
  69. OneForAll/modules/datasets/spyse_api.py +62 -0
  70. OneForAll/modules/datasets/sublist3r.py +45 -0
  71. OneForAll/modules/datasets/urlscan.py +45 -0
  72. OneForAll/modules/datasets/windvane.py +92 -0
  73. OneForAll/modules/dnsquery/mx.py +35 -0
  74. OneForAll/modules/dnsquery/ns.py +35 -0
  75. OneForAll/modules/dnsquery/soa.py +35 -0
  76. OneForAll/modules/dnsquery/spf.py +35 -0
  77. OneForAll/modules/dnsquery/txt.py +35 -0
  78. OneForAll/modules/enrich.py +72 -0
  79. OneForAll/modules/finder.py +206 -0
  80. OneForAll/modules/intelligence/alienvault.py +50 -0
  81. OneForAll/modules/intelligence/riskiq_api.py +58 -0
  82. OneForAll/modules/intelligence/threatbook_api.py +50 -0
  83. OneForAll/modules/intelligence/threatminer.py +45 -0
  84. OneForAll/modules/intelligence/virustotal.py +60 -0
  85. OneForAll/modules/intelligence/virustotal_api.py +59 -0
  86. OneForAll/modules/iscdn.py +86 -0
  87. OneForAll/modules/search/ask.py +69 -0
  88. OneForAll/modules/search/baidu.py +96 -0
  89. OneForAll/modules/search/bing.py +79 -0
  90. OneForAll/modules/search/bing_api.py +78 -0
  91. OneForAll/modules/search/fofa_api.py +74 -0
  92. OneForAll/modules/search/gitee.py +71 -0
  93. OneForAll/modules/search/github_api.py +86 -0
  94. OneForAll/modules/search/google.py +83 -0
  95. OneForAll/modules/search/google_api.py +77 -0
  96. OneForAll/modules/search/hunter_api.py +72 -0
  97. OneForAll/modules/search/quake_api.py +72 -0
  98. OneForAll/modules/search/shodan_api.py +53 -0
  99. OneForAll/modules/search/so.py +75 -0
  100. OneForAll/modules/search/sogou.py +72 -0
  101. OneForAll/modules/search/wzsearch.py +68 -0
  102. OneForAll/modules/search/yahoo.py +81 -0
  103. OneForAll/modules/search/yandex.py +80 -0
  104. OneForAll/modules/search/zoomeye_api.py +73 -0
  105. OneForAll/modules/srv.py +75 -0
  106. OneForAll/modules/wildcard.py +319 -0
  107. OneForAll/oneforall.py +275 -0
  108. OneForAll/takeover.py +168 -0
  109. OneForAll/test.py +23 -0
  110. oneforall_kjl-0.1.1.dist-info/METADATA +18 -0
  111. oneforall_kjl-0.1.1.dist-info/RECORD +114 -0
  112. oneforall_kjl-0.1.1.dist-info/WHEEL +5 -0
  113. oneforall_kjl-0.1.1.dist-info/entry_points.txt +2 -0
  114. oneforall_kjl-0.1.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,360 @@
1
+ from collections import OrderedDict
2
+ from .format import registry
3
+
4
+
5
+ class Row:
6
+ """Internal Row object. Mainly used for filtering."""
7
+
8
+ __slots__ = ['_row', 'tags']
9
+
10
+ def __init__(self, row=None, tags=None):
11
+ if tags is None:
12
+ tags = list()
13
+ if row is None:
14
+ row = list()
15
+ self._row = list(row)
16
+ self.tags = list(tags)
17
+
18
+ def __iter__(self):
19
+ return (col for col in self._row)
20
+
21
+ def __len__(self):
22
+ return len(self._row)
23
+
24
+ def __repr__(self):
25
+ return repr(self._row)
26
+
27
+ def __getitem__(self, i):
28
+ return self._row[i]
29
+
30
+ def __setitem__(self, i, value):
31
+ self._row[i] = value
32
+
33
+ def __delitem__(self, i):
34
+ del self._row[i]
35
+
36
+ def __getstate__(self):
37
+
38
+ slots = dict()
39
+
40
+ for slot in self.__slots__:
41
+ attribute = getattr(self, slot)
42
+ slots[slot] = attribute
43
+
44
+ return slots
45
+
46
+ def __setstate__(self, state):
47
+ for (k, v) in list(state.items()):
48
+ setattr(self, k, v)
49
+
50
+ def rpush(self, value):
51
+ self.insert(len(self._row), value)
52
+
53
+ def append(self, value):
54
+ self.rpush(value)
55
+
56
+ def insert(self, index, value):
57
+ self._row.insert(index, value)
58
+
59
+ def __contains__(self, item):
60
+ return (item in self._row)
61
+
62
+ @property
63
+ def tuple(self):
64
+ """Tuple representation of :class:`Row`."""
65
+ return tuple(self._row)
66
+
67
+
68
+ class Dataset:
69
+ """The :class:`Dataset` object is the heart of Tablib. It provides all core
70
+ functionality.
71
+
72
+ Usually you create a :class:`Dataset` instance in your main module, and append
73
+ rows as you collect data. ::
74
+
75
+ data = tablib.Dataset()
76
+ data.headers = ('name', 'age')
77
+
78
+ for (name, age) in some_collector():
79
+ data.append((name, age))
80
+
81
+
82
+ Setting columns is similar. The column data length must equal the
83
+ current height of the data and headers must be set. ::
84
+
85
+ data = tablib.Dataset()
86
+ data.headers = ('first_name', 'last_name')
87
+
88
+ data.append(('John', 'Adams'))
89
+ data.append(('George', 'Washington'))
90
+
91
+ data.append_col((90, 67), header='age')
92
+
93
+
94
+ You can also set rows and headers upon instantiation. This is useful if
95
+ dealing with dozens or hundreds of :class:`Dataset` objects. ::
96
+
97
+ headers = ('first_name', 'last_name')
98
+ data = [('John', 'Adams'), ('George', 'Washington')]
99
+
100
+ data = tablib.Dataset(*data, headers=headers)
101
+
102
+ :param \\*args: (optional) list of rows to populate Dataset
103
+ :param headers: (optional) list strings for Dataset header row
104
+ :param title: (optional) string to use as title of the Dataset
105
+
106
+
107
+ .. admonition:: Format Attributes Definition
108
+
109
+ If you look at the code, the various output/import formats are not
110
+ defined within the :class:`Dataset` object. To add support for a new format, see
111
+ :ref:`Adding New Formats <newformats>`.
112
+
113
+ """
114
+
115
+ def __init__(self, *args, **kwargs):
116
+ self._data = list(Row(arg) for arg in args)
117
+ self.__headers = None
118
+
119
+ # ('title', index) tuples
120
+ self._separators = []
121
+
122
+ # (column, callback) tuples
123
+ self._formatters = []
124
+
125
+ self.headers = kwargs.get('headers')
126
+
127
+ self.title = kwargs.get('title')
128
+
129
+ def __len__(self):
130
+ return self.height
131
+
132
+ def _validate(self, row=None, col=None, safety=False):
133
+ """Assures size of every row in dataset is of proper proportions."""
134
+ if row:
135
+ is_valid = (len(row) == self.width) if self.width else True
136
+ elif col:
137
+ if len(col) < 1:
138
+ is_valid = True
139
+ else:
140
+ is_valid = (len(col) == self.height) if self.height else True
141
+ else:
142
+ is_valid = all(len(x) == self.width for x in self._data)
143
+
144
+ if is_valid:
145
+ return True
146
+ if not safety:
147
+ raise InvalidDimensions
148
+ return False
149
+
150
+ def _package(self, dicts=True, ordered=True):
151
+ """Packages Dataset into lists of dictionaries for transmission."""
152
+ # TODO: Dicts default to false?
153
+
154
+ _data = list(self._data)
155
+
156
+ if ordered:
157
+ dict_pack = OrderedDict
158
+ else:
159
+ dict_pack = dict
160
+
161
+ # Execute formatters
162
+ if self._formatters:
163
+ for row_i, row in enumerate(_data):
164
+ for col, callback in self._formatters:
165
+ try:
166
+ if col is None:
167
+ for j, c in enumerate(row):
168
+ _data[row_i][j] = callback(c)
169
+ else:
170
+ _data[row_i][col] = callback(row[col])
171
+ except IndexError:
172
+ raise InvalidDatasetIndex
173
+
174
+ if self.headers:
175
+ if dicts:
176
+ data = [dict_pack(list(zip(self.headers, data_row)))
177
+ for data_row in _data]
178
+ else:
179
+ data = [list(self.headers)] + list(_data)
180
+ else:
181
+ data = [list(row) for row in _data]
182
+
183
+ return data
184
+
185
+ def _get_headers(self):
186
+ """An *optional* list of strings to be used for header rows and attribute names.
187
+
188
+ This must be set manually. The given list length must equal :class:`Dataset.width`.
189
+
190
+ """
191
+ return self.__headers
192
+
193
+ def _set_headers(self, collection):
194
+ """Validating headers setter."""
195
+ self._validate(collection)
196
+ if collection:
197
+ try:
198
+ self.__headers = list(collection)
199
+ except TypeError:
200
+ raise TypeError
201
+ else:
202
+ self.__headers = None
203
+
204
+ headers = property(_get_headers, _set_headers)
205
+
206
+ def _get_dict(self):
207
+ """A native Python representation of the :class:`Dataset` object. If headers have
208
+ been set, a list of Python dictionaries will be returned. If no headers have been
209
+ set, a list of tuples (rows) will be returned instead.
210
+
211
+ A dataset object can also be imported by setting the `Dataset.dict` attribute: ::
212
+
213
+ data = tablib.Dataset()
214
+ data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
215
+
216
+ """
217
+ return self._package()
218
+
219
+ def _set_dict(self, pickle):
220
+ """A native Python representation of the Dataset object. If headers have been
221
+ set, a list of Python dictionaries will be returned. If no headers have been
222
+ set, a list of tuples (rows) will be returned instead.
223
+
224
+ A dataset object can also be imported by setting the :class:`Dataset.dict` attribute. ::
225
+
226
+ data = tablib.Dataset()
227
+ data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}]
228
+
229
+ """
230
+
231
+ if not len(pickle):
232
+ return
233
+
234
+ # if list of rows
235
+ if isinstance(pickle[0], list):
236
+ self.wipe()
237
+ for row in pickle:
238
+ self.append(Row(row))
239
+
240
+ # if list of objects
241
+ elif isinstance(pickle[0], dict):
242
+ self.wipe()
243
+ self.headers = list(pickle[0].keys())
244
+ for row in pickle:
245
+ self.append(Row(list(row.values())))
246
+ else:
247
+ raise UnsupportedFormat
248
+
249
+ dict = property(_get_dict, _set_dict)
250
+
251
+ @property
252
+ def height(self):
253
+ """The number of rows currently in the :class:`Dataset`.
254
+ Cannot be directly modified.
255
+ """
256
+ return len(self._data)
257
+
258
+ @property
259
+ def width(self):
260
+ """The number of columns currently in the :class:`Dataset`.
261
+ Cannot be directly modified.
262
+ """
263
+
264
+ try:
265
+ return len(self._data[0])
266
+ except IndexError:
267
+ try:
268
+ return len(self.headers)
269
+ except TypeError:
270
+ return 0
271
+
272
+ def export(self, format, **kwargs):
273
+ """
274
+ Export :class:`Dataset` object to `format`.
275
+
276
+ :param format: export format
277
+ :param kwargs: (optional) custom configuration to the format `export_set`.
278
+ """
279
+ fmt = registry.get_format(format)
280
+ if not hasattr(fmt, 'export_set'):
281
+ raise Exception('Format {} cannot be exported.'.format(format))
282
+
283
+ return fmt.export_set(self, **kwargs)
284
+
285
+ # ----
286
+ # Rows
287
+ # ----
288
+
289
+ def insert(self, index, row, tags=None):
290
+ """Inserts a row to the :class:`Dataset` at the given index.
291
+
292
+ Rows inserted must be the correct size (height or width).
293
+
294
+ The default behaviour is to insert the given row to the :class:`Dataset`
295
+ object at the given index.
296
+ """
297
+
298
+ if tags is None:
299
+ tags = list()
300
+ self._validate(row)
301
+ self._data.insert(index, Row(row, tags=tags))
302
+
303
+ def rpush(self, row, tags=None):
304
+ """Adds a row to the end of the :class:`Dataset`.
305
+ See :class:`Dataset.insert` for additional documentation.
306
+ """
307
+
308
+ if tags is None:
309
+ tags = list()
310
+ self.insert(self.height, row=row, tags=tags)
311
+
312
+ def append(self, row, tags=None):
313
+ """Adds a row to the :class:`Dataset`.
314
+ See :class:`Dataset.insert` for additional documentation.
315
+ """
316
+
317
+ if tags is None:
318
+ tags = list()
319
+ self.rpush(row, tags)
320
+
321
+ def extend(self, rows, tags=None):
322
+ """Adds a list of rows to the :class:`Dataset` using
323
+ :class:`Dataset.append`
324
+ """
325
+
326
+ if tags is None:
327
+ tags = list()
328
+ for row in rows:
329
+ self.append(row, tags)
330
+
331
+ # ----
332
+ # Misc
333
+ # ----
334
+
335
+ def remove_duplicates(self):
336
+ """Removes all duplicate rows from the :class:`Dataset` object
337
+ while maintaining the original order."""
338
+ seen = set()
339
+ self._data[:] = [row for row in self._data if
340
+ not (tuple(row) in seen or seen.add(tuple(row)))]
341
+
342
+ def wipe(self):
343
+ """Removes all content and headers from the :class:`Dataset` object."""
344
+ self._data = list()
345
+ self.__headers = None
346
+
347
+
348
+ registry.register_builtins()
349
+
350
+
351
+ class InvalidDimensions(Exception):
352
+ """Invalid size"""
353
+
354
+
355
+ class InvalidDatasetIndex(Exception):
356
+ """Outside of Dataset size"""
357
+
358
+
359
+ class UnsupportedFormat(NotImplementedError):
360
+ """Format is not supported"""
@@ -0,0 +1,240 @@
1
+ # -*- coding: utf-8 -*-
2
+ """`tldextract` accurately separates the gTLD or ccTLD (generic or country code
3
+ top-level domain) from the registered domain and subdomains of a URL.
4
+
5
+ >>> import tldextract
6
+
7
+ >>> tldextract.extract('http://forums.news.cnn.com/')
8
+ ExtractResult(subdomain='forums.news', domain='cnn', suffix='com')
9
+
10
+ >>> tldextract.extract('http://forums.bbc.co.uk/') # United Kingdom
11
+ ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk')
12
+
13
+ >>> tldextract.extract('http://www.worldbank.org.kg/') # Kyrgyzstan
14
+ ExtractResult(subdomain='www', domain='worldbank', suffix='org.kg')
15
+
16
+ `ExtractResult` is a namedtuple, so it's simple to access the parts you want.
17
+
18
+ >>> ext = tldextract.extract('http://forums.bbc.co.uk')
19
+ >>> (ext.subdomain, ext.domain, ext.suffix)
20
+ ('forums', 'bbc', 'co.uk')
21
+ >>> # rejoin subdomain and domain
22
+ >>> '.'.join(ext[:2])
23
+ 'forums.bbc'
24
+ >>> # a common alias
25
+ >>> ext.registered_domain
26
+ 'bbc.co.uk'
27
+
28
+ Note subdomain and suffix are _optional_. Not all URL-like inputs have a
29
+ subdomain or a valid suffix.
30
+
31
+ >>> tldextract.extract('google.com')
32
+ ExtractResult(subdomain='', domain='google', suffix='com')
33
+
34
+ >>> tldextract.extract('google.notavalidsuffix')
35
+ ExtractResult(subdomain='google', domain='notavalidsuffix', suffix='')
36
+
37
+ >>> tldextract.extract('http://127.0.0.1:8080/deployed/')
38
+ ExtractResult(subdomain='', domain='127.0.0.1', suffix='')
39
+
40
+ If you want to rejoin the whole namedtuple, regardless of whether a subdomain
41
+ or suffix were found:
42
+
43
+ >>> ext = tldextract.extract('http://127.0.0.1:8080/deployed/')
44
+ >>> # this has unwanted dots
45
+ >>> '.'.join(ext)
46
+ '.127.0.0.1.'
47
+ """
48
+
49
+
50
+ import os
51
+ import re
52
+ import json
53
+ import collections
54
+ from urllib.parse import scheme_chars
55
+ from functools import wraps
56
+
57
+ import idna
58
+
59
+ from common import utils
60
+
61
+ IP_RE = re.compile(r'^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$') # pylint: disable=line-too-long
62
+
63
+ SCHEME_RE = re.compile(r'^([' + scheme_chars + ']+:)?//')
64
+
65
+
66
+ class ExtractResult(collections.namedtuple('ExtractResult', 'subdomain domain suffix')):
67
+ """namedtuple of a URL's subdomain, domain, and suffix."""
68
+
69
+ # Necessary for __dict__ member to get populated in Python 3+
70
+ __slots__ = ()
71
+
72
+ @property
73
+ def registered_domain(self):
74
+ """
75
+ Joins the domain and suffix fields with a dot, if they're both set.
76
+
77
+ >>> extract('http://forums.bbc.co.uk').registered_domain
78
+ 'bbc.co.uk'
79
+ >>> extract('http://localhost:8080').registered_domain
80
+ ''
81
+ """
82
+ if self.domain and self.suffix:
83
+ return self.domain + '.' + self.suffix
84
+ return ''
85
+
86
+ @property
87
+ def fqdn(self):
88
+ """
89
+ Returns a Fully Qualified Domain Name, if there is a proper domain/suffix.
90
+
91
+ >>> extract('http://forums.bbc.co.uk/path/to/file').fqdn
92
+ 'forums.bbc.co.uk'
93
+ >>> extract('http://localhost:8080').fqdn
94
+ ''
95
+ """
96
+ if self.domain and self.suffix:
97
+ # self is the namedtuple (subdomain domain suffix)
98
+ return '.'.join(i for i in self if i)
99
+ return ''
100
+
101
+ @property
102
+ def ipv4(self):
103
+ """
104
+ Returns the ipv4 if that is what the presented domain/url is
105
+
106
+ >>> extract('http://127.0.0.1/path/to/file').ipv4
107
+ '127.0.0.1'
108
+ >>> extract('http://127.0.0.1.1/path/to/file').ipv4
109
+ ''
110
+ >>> extract('http://256.1.1.1').ipv4
111
+ ''
112
+ """
113
+ if not (self.suffix or self.subdomain) and IP_RE.match(self.domain):
114
+ return self.domain
115
+ return ''
116
+
117
+
118
+ class TLDExtract(object):
119
+ """A callable for extracting, subdomain, domain, and suffix components from a URL."""
120
+
121
+ def __init__(self, cache_file=None):
122
+ """
123
+ Constructs a callable for extracting subdomain, domain, and suffix
124
+ components from a URL.
125
+ """
126
+
127
+ self.cache_file = os.path.expanduser(cache_file or '')
128
+ self._extractor = None
129
+
130
+ def __call__(self, url):
131
+ """
132
+ Takes a string URL and splits it into its subdomain, domain, and
133
+ suffix (effective TLD, gTLD, ccTLD, etc.) component.
134
+
135
+ >>> ext = TLDExtract()
136
+ >>> ext('http://forums.news.cnn.com/')
137
+ ExtractResult(subdomain='forums.news', domain='cnn', suffix='com')
138
+ >>> ext('http://forums.bbc.co.uk/')
139
+ ExtractResult(subdomain='forums', domain='bbc', suffix='co.uk')
140
+ """
141
+ netloc = SCHEME_RE.sub("", url) \
142
+ .partition("/")[0] \
143
+ .partition("?")[0] \
144
+ .partition("#")[0] \
145
+ .split("@")[-1] \
146
+ .partition(":")[0] \
147
+ .strip() \
148
+ .rstrip(".")
149
+
150
+ labels = netloc.split(".")
151
+
152
+ translations = [_decode_punycode(label) for label in labels]
153
+ suffix_index = self._get_tld_extractor().suffix_index(translations)
154
+
155
+ suffix = ".".join(labels[suffix_index:])
156
+ if not suffix and netloc and utils.looks_like_ip(netloc):
157
+ return ExtractResult('', netloc, '')
158
+
159
+ subdomain = ".".join(labels[:suffix_index - 1]) if suffix_index else ""
160
+ domain = labels[suffix_index - 1] if suffix_index else ""
161
+ return ExtractResult(subdomain, domain, suffix)
162
+
163
+ @property
164
+ def tlds(self):
165
+ return self._get_tld_extractor().tlds
166
+
167
+ def _get_tld_extractor(self):
168
+ """Get or compute this object's TLDExtractor. Looks up the TLDExtractor
169
+ in roughly the following order, based on the settings passed to
170
+ __init__:
171
+
172
+ 1. Memoized on `self`
173
+ 2. Local system cache file"""
174
+ # pylint: disable=no-else-return
175
+
176
+ if self._extractor:
177
+ return self._extractor
178
+ tlds = self._get_cached_tlds()
179
+ if tlds:
180
+ self._extractor = _PublicSuffixListTLDExtractor(tlds)
181
+ return self._extractor
182
+ else:
183
+ raise Exception("tlds is empty, cannot proceed without tlds.")
184
+
185
+ def _get_cached_tlds(self):
186
+ """Read the local TLD cache file. Returns None on IOError or other
187
+ error, or if this object is not set to use the cache
188
+ file."""
189
+ if not self.cache_file:
190
+ return None
191
+
192
+ with open(self.cache_file) as cache_file:
193
+ return json.loads(cache_file.read())
194
+
195
+
196
+ TLD_EXTRACTOR = TLDExtract()
197
+
198
+
199
+ @wraps(TLD_EXTRACTOR.__call__)
200
+ def extract(url):
201
+ return TLD_EXTRACTOR(url)
202
+
203
+
204
+ class _PublicSuffixListTLDExtractor(object):
205
+ """Wrapper around this project's main algo for PSL
206
+ lookups.
207
+ """
208
+ def __init__(self, tlds):
209
+ self.tlds = frozenset(tlds)
210
+
211
+ def suffix_index(self, lower_spl):
212
+ """Returns the index of the first suffix label.
213
+ Returns len(spl) if no suffix is found
214
+ """
215
+ length = len(lower_spl)
216
+ for i in range(length):
217
+ maybe_tld = '.'.join(lower_spl[i:])
218
+ exception_tld = '!' + maybe_tld
219
+ if exception_tld in self.tlds:
220
+ return i + 1
221
+
222
+ if maybe_tld in self.tlds:
223
+ return i
224
+
225
+ wildcard_tld = '*.' + '.'.join(lower_spl[i + 1:])
226
+ if wildcard_tld in self.tlds:
227
+ return i
228
+
229
+ return length
230
+
231
+
232
+ def _decode_punycode(label):
233
+ lowered = label.lower()
234
+ looks_like_puny = lowered.startswith('xn--')
235
+ if looks_like_puny:
236
+ try:
237
+ return idna.decode(label.encode('ascii')).lower()
238
+ except (UnicodeError, IndexError):
239
+ pass
240
+ return lowered