tldts 5.7.112 → 6.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +63 -54
- package/bin/cli.js +4 -6
- package/dist/cjs/index.js +48 -37
- package/dist/cjs/index.js.map +1 -1
- package/dist/cjs/src/data/trie.js +4 -6
- package/dist/cjs/src/data/trie.js.map +1 -1
- package/dist/cjs/src/suffix-trie.js +11 -9
- package/dist/cjs/src/suffix-trie.js.map +1 -1
- package/dist/cjs/tsconfig.tsbuildinfo +1 -1
- package/dist/es6/src/data/trie.js +4 -6
- package/dist/es6/src/data/trie.js.map +1 -1
- package/dist/es6/src/suffix-trie.js +11 -9
- package/dist/es6/src/suffix-trie.js.map +1 -1
- package/dist/es6/tsconfig.bundle.tsbuildinfo +1 -1
- package/dist/index.cjs.min.js +1 -1
- package/dist/index.cjs.min.js.map +1 -1
- package/dist/index.esm.min.js +1 -1
- package/dist/index.esm.min.js.map +1 -1
- package/dist/index.umd.min.js +1 -1
- package/dist/index.umd.min.js.map +1 -1
- package/dist/types/src/data/trie.d.ts +3 -6
- package/package.json +9 -12
- package/src/data/trie.ts +5 -11
- package/src/suffix-trie.ts +9 -11
package/README.md
CHANGED
|
@@ -3,13 +3,14 @@
|
|
|
3
3
|
`tldts` is a JavaScript library to extract hostnames, domains, public suffixes, top-level domains and subdomains from URLs.
|
|
4
4
|
|
|
5
5
|
**Features**:
|
|
6
|
+
|
|
6
7
|
1. Tuned for **performance** (order of 0.1 to 1 μs per input)
|
|
7
8
|
2. Handles both URLs and hostnames
|
|
8
9
|
3. Full Unicode/IDNA support
|
|
9
10
|
4. Support parsing email addresses
|
|
10
11
|
5. Detect IPv4 and IPv6 addresses
|
|
11
12
|
6. Continuously updated version of the public suffix list
|
|
12
|
-
7. **TypeScript**, ships with `umd`, `esm`, `cjs` bundles and
|
|
13
|
+
7. **TypeScript**, ships with `umd`, `esm`, `cjs` bundles and _type definitions_
|
|
13
14
|
8. Small bundles and small memory footprint
|
|
14
15
|
9. Battle tested: full test coverage and production use
|
|
15
16
|
|
|
@@ -22,6 +23,7 @@ npm install --save tldts
|
|
|
22
23
|
# Usage
|
|
23
24
|
|
|
24
25
|
Using the command-line interface:
|
|
26
|
+
|
|
25
27
|
```js
|
|
26
28
|
$ npx tldts 'http://www.writethedocs.org/conf/eu/2017/'
|
|
27
29
|
{
|
|
@@ -37,6 +39,7 @@ $ npx tldts 'http://www.writethedocs.org/conf/eu/2017/'
|
|
|
37
39
|
```
|
|
38
40
|
|
|
39
41
|
Programmatically:
|
|
42
|
+
|
|
40
43
|
```js
|
|
41
44
|
const { parse } = require('tldts');
|
|
42
45
|
|
|
@@ -52,22 +55,22 @@ parse('http://www.writethedocs.org/conf/eu/2017/');
|
|
|
52
55
|
// subdomain: 'www' }
|
|
53
56
|
```
|
|
54
57
|
|
|
55
|
-
Modern
|
|
58
|
+
Modern _ES6 modules import_ is also supported:
|
|
56
59
|
|
|
57
60
|
```js
|
|
58
61
|
import { parse } from 'tldts';
|
|
59
62
|
```
|
|
60
63
|
|
|
61
|
-
Alternatively, you can try it
|
|
64
|
+
Alternatively, you can try it _directly in your browser_ here: https://npm.runkit.com/tldts
|
|
62
65
|
|
|
63
66
|
# API
|
|
64
67
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
68
|
+
- `tldts.parse(url | hostname, options)`
|
|
69
|
+
- `tldts.getHostname(url | hostname, options)`
|
|
70
|
+
- `tldts.getDomain(url | hostname, options)`
|
|
71
|
+
- `tldts.getPublicSuffix(url | hostname, options)`
|
|
72
|
+
- `tldts.getSubdomain(url, | hostname, options)`
|
|
73
|
+
- `tldts.getDomainWithoutSuffix(url | hostname, options)`
|
|
71
74
|
|
|
72
75
|
The behavior of `tldts` can be customized using an `options` argument for all
|
|
73
76
|
the functions exposed as part of the public API. This is useful to both change
|
|
@@ -114,7 +117,10 @@ tldts.parse('https://spark-public.s3.amazonaws.com/dataanalysis/loansData.csv');
|
|
|
114
117
|
// publicSuffix: 'com',
|
|
115
118
|
// subdomain: 'spark-public.s3' }
|
|
116
119
|
|
|
117
|
-
tldts.parse(
|
|
120
|
+
tldts.parse(
|
|
121
|
+
'https://spark-public.s3.amazonaws.com/dataanalysis/loansData.csv',
|
|
122
|
+
{ allowPrivateDomains: true },
|
|
123
|
+
);
|
|
118
124
|
// { domain: 'spark-public.s3.amazonaws.com',
|
|
119
125
|
// domainWithoutSuffix: 'spark-public',
|
|
120
126
|
// hostname: 'spark-public.s3.amazonaws.com',
|
|
@@ -134,7 +140,7 @@ tldts.parse('gopher://domain.unknown/');
|
|
|
134
140
|
// publicSuffix: 'unknown',
|
|
135
141
|
// subdomain: '' }
|
|
136
142
|
|
|
137
|
-
tldts.parse('https://192.168.0.0') // IPv4
|
|
143
|
+
tldts.parse('https://192.168.0.0'); // IPv4
|
|
138
144
|
// { domain: null,
|
|
139
145
|
// domainWithoutSuffix: null,
|
|
140
146
|
// hostname: '192.168.0.0',
|
|
@@ -144,7 +150,7 @@ tldts.parse('https://192.168.0.0') // IPv4
|
|
|
144
150
|
// publicSuffix: null,
|
|
145
151
|
// subdomain: null }
|
|
146
152
|
|
|
147
|
-
tldts.parse('https://[::1]') // IPv6
|
|
153
|
+
tldts.parse('https://[::1]'); // IPv6
|
|
148
154
|
// { domain: null,
|
|
149
155
|
// domainWithoutSuffix: null,
|
|
150
156
|
// hostname: '::1',
|
|
@@ -154,7 +160,7 @@ tldts.parse('https://[::1]') // IPv6
|
|
|
154
160
|
// publicSuffix: null,
|
|
155
161
|
// subdomain: null }
|
|
156
162
|
|
|
157
|
-
tldts.parse('tldts@emailprovider.co.uk') // email
|
|
163
|
+
tldts.parse('tldts@emailprovider.co.uk'); // email
|
|
158
164
|
// { domain: 'emailprovider.co.uk',
|
|
159
165
|
// domainWithoutSuffix: 'emailprovider',
|
|
160
166
|
// hostname: 'emailprovider.co.uk',
|
|
@@ -166,7 +172,7 @@ tldts.parse('tldts@emailprovider.co.uk') // email
|
|
|
166
172
|
```
|
|
167
173
|
|
|
168
174
|
| Property Name | Type | Description |
|
|
169
|
-
|
|
175
|
+
| :-------------------- | :----- | :---------------------------------------------- |
|
|
170
176
|
| `hostname` | `str` | `hostname` of the input extracted automatically |
|
|
171
177
|
| `domain` | `str` | Domain (tld + sld) |
|
|
172
178
|
| `domainWithoutSuffix` | `str` | Domain without public suffix |
|
|
@@ -176,7 +182,6 @@ tldts.parse('tldts@emailprovider.co.uk') // email
|
|
|
176
182
|
| `isPrivate` | `bool` | Does TLD come from Private part of the list |
|
|
177
183
|
| `isIP` | `bool` | Is `hostname` an IP address? |
|
|
178
184
|
|
|
179
|
-
|
|
180
185
|
## Single purpose methods
|
|
181
186
|
|
|
182
187
|
These methods are shorthands if you want to retrieve only a single value (and
|
|
@@ -189,13 +194,15 @@ Returns the hostname from a given string.
|
|
|
189
194
|
```javascript
|
|
190
195
|
const { getHostname } = require('tldts');
|
|
191
196
|
|
|
192
|
-
getHostname('google.com');
|
|
193
|
-
getHostname('fr.google.com');
|
|
194
|
-
getHostname('fr.google.google');
|
|
195
|
-
getHostname('foo.google.co.uk');
|
|
196
|
-
getHostname('t.co');
|
|
197
|
-
getHostname('fr.t.co');
|
|
198
|
-
getHostname(
|
|
197
|
+
getHostname('google.com'); // returns `google.com`
|
|
198
|
+
getHostname('fr.google.com'); // returns `fr.google.com`
|
|
199
|
+
getHostname('fr.google.google'); // returns `fr.google.google`
|
|
200
|
+
getHostname('foo.google.co.uk'); // returns `foo.google.co.uk`
|
|
201
|
+
getHostname('t.co'); // returns `t.co`
|
|
202
|
+
getHostname('fr.t.co'); // returns `fr.t.co`
|
|
203
|
+
getHostname(
|
|
204
|
+
'https://user:password@example.co.uk:8080/some/path?and&query#hash',
|
|
205
|
+
); // returns `example.co.uk`
|
|
199
206
|
```
|
|
200
207
|
|
|
201
208
|
### getDomain(url | hostname, options?)
|
|
@@ -205,12 +212,12 @@ Returns the fully qualified domain from a given string.
|
|
|
205
212
|
```javascript
|
|
206
213
|
const { getDomain } = require('tldts');
|
|
207
214
|
|
|
208
|
-
getDomain('google.com');
|
|
209
|
-
getDomain('fr.google.com');
|
|
210
|
-
getDomain('fr.google.google');
|
|
211
|
-
getDomain('foo.google.co.uk');
|
|
212
|
-
getDomain('t.co');
|
|
213
|
-
getDomain('fr.t.co');
|
|
215
|
+
getDomain('google.com'); // returns `google.com`
|
|
216
|
+
getDomain('fr.google.com'); // returns `google.com`
|
|
217
|
+
getDomain('fr.google.google'); // returns `google.google`
|
|
218
|
+
getDomain('foo.google.co.uk'); // returns `google.co.uk`
|
|
219
|
+
getDomain('t.co'); // returns `t.co`
|
|
220
|
+
getDomain('fr.t.co'); // returns `t.co`
|
|
214
221
|
getDomain('https://user:password@example.co.uk:8080/some/path?and&query#hash'); // returns `example.co.uk`
|
|
215
222
|
```
|
|
216
223
|
|
|
@@ -221,13 +228,15 @@ Returns the domain (as returned by `getDomain(...)`) without the public suffix p
|
|
|
221
228
|
```javascript
|
|
222
229
|
const { getDomainWithoutSuffix } = require('tldts');
|
|
223
230
|
|
|
224
|
-
getDomainWithoutSuffix('google.com');
|
|
225
|
-
getDomainWithoutSuffix('fr.google.com');
|
|
226
|
-
getDomainWithoutSuffix('fr.google.google');
|
|
227
|
-
getDomainWithoutSuffix('foo.google.co.uk');
|
|
228
|
-
getDomainWithoutSuffix('t.co');
|
|
229
|
-
getDomainWithoutSuffix('fr.t.co');
|
|
230
|
-
getDomainWithoutSuffix(
|
|
231
|
+
getDomainWithoutSuffix('google.com'); // returns `google`
|
|
232
|
+
getDomainWithoutSuffix('fr.google.com'); // returns `google`
|
|
233
|
+
getDomainWithoutSuffix('fr.google.google'); // returns `google`
|
|
234
|
+
getDomainWithoutSuffix('foo.google.co.uk'); // returns `google`
|
|
235
|
+
getDomainWithoutSuffix('t.co'); // returns `t`
|
|
236
|
+
getDomainWithoutSuffix('fr.t.co'); // returns `t`
|
|
237
|
+
getDomainWithoutSuffix(
|
|
238
|
+
'https://user:password@example.co.uk:8080/some/path?and&query#hash',
|
|
239
|
+
); // returns `example`
|
|
231
240
|
```
|
|
232
241
|
|
|
233
242
|
### getSubdomain(url | hostname, options?)
|
|
@@ -237,14 +246,16 @@ Returns the complete subdomain for a given string.
|
|
|
237
246
|
```javascript
|
|
238
247
|
const { getSubdomain } = require('tldts');
|
|
239
248
|
|
|
240
|
-
getSubdomain('google.com');
|
|
241
|
-
getSubdomain('fr.google.com');
|
|
242
|
-
getSubdomain('google.co.uk');
|
|
243
|
-
getSubdomain('foo.google.co.uk');
|
|
244
|
-
getSubdomain('moar.foo.google.co.uk');
|
|
245
|
-
getSubdomain('t.co');
|
|
246
|
-
getSubdomain('fr.t.co');
|
|
247
|
-
getSubdomain(
|
|
249
|
+
getSubdomain('google.com'); // returns ``
|
|
250
|
+
getSubdomain('fr.google.com'); // returns `fr`
|
|
251
|
+
getSubdomain('google.co.uk'); // returns ``
|
|
252
|
+
getSubdomain('foo.google.co.uk'); // returns `foo`
|
|
253
|
+
getSubdomain('moar.foo.google.co.uk'); // returns `moar.foo`
|
|
254
|
+
getSubdomain('t.co'); // returns ``
|
|
255
|
+
getSubdomain('fr.t.co'); // returns `fr`
|
|
256
|
+
getSubdomain(
|
|
257
|
+
'https://user:password@secure.example.co.uk:443/some/path?and&query#hash',
|
|
258
|
+
); // returns `secure`
|
|
248
259
|
```
|
|
249
260
|
|
|
250
261
|
### getPublicSuffix(url | hostname, options?)
|
|
@@ -254,19 +265,19 @@ Returns the [public suffix][] for a given string.
|
|
|
254
265
|
```javascript
|
|
255
266
|
const { getPublicSuffix } = require('tldts');
|
|
256
267
|
|
|
257
|
-
getPublicSuffix('google.com');
|
|
258
|
-
getPublicSuffix('fr.google.com');
|
|
259
|
-
getPublicSuffix('google.co.uk');
|
|
268
|
+
getPublicSuffix('google.com'); // returns `com`
|
|
269
|
+
getPublicSuffix('fr.google.com'); // returns `com`
|
|
270
|
+
getPublicSuffix('google.co.uk'); // returns `co.uk`
|
|
260
271
|
getPublicSuffix('s3.amazonaws.com'); // returns `com`
|
|
261
272
|
getPublicSuffix('s3.amazonaws.com', { allowPrivateDomains: true }); // returns `s3.amazonaws.com`
|
|
262
|
-
getPublicSuffix('tld.is.unknown');
|
|
273
|
+
getPublicSuffix('tld.is.unknown'); // returns `unknown`
|
|
263
274
|
```
|
|
264
275
|
|
|
265
276
|
# Troubleshooting
|
|
266
277
|
|
|
267
278
|
## Retrieving subdomain of `localhost` and custom hostnames
|
|
268
279
|
|
|
269
|
-
`tldts` methods `getDomain` and `getSubdomain` are designed to **work only with
|
|
280
|
+
`tldts` methods `getDomain` and `getSubdomain` are designed to **work only with _known and valid_ TLDs**.
|
|
270
281
|
This way, you can trust what a domain is.
|
|
271
282
|
|
|
272
283
|
`localhost` is a valid hostname but not a TLD. You can pass additional options to each method exposed by `tldts`:
|
|
@@ -274,11 +285,11 @@ This way, you can trust what a domain is.
|
|
|
274
285
|
```js
|
|
275
286
|
const tldts = require('tldts');
|
|
276
287
|
|
|
277
|
-
tldts.getDomain('localhost');
|
|
278
|
-
tldts.getSubdomain('vhost.localhost');
|
|
288
|
+
tldts.getDomain('localhost'); // returns null
|
|
289
|
+
tldts.getSubdomain('vhost.localhost'); // returns null
|
|
279
290
|
|
|
280
291
|
tldts.getDomain('localhost', { validHosts: ['localhost'] }); // returns 'localhost'
|
|
281
|
-
tldts.getSubdomain('vhost.localhost', { validHosts: ['localhost'] });
|
|
292
|
+
tldts.getSubdomain('vhost.localhost', { validHosts: ['localhost'] }); // returns 'vhost'
|
|
282
293
|
```
|
|
283
294
|
|
|
284
295
|
## Updating the TLDs List
|
|
@@ -291,7 +302,7 @@ If you keep `tldts` updated, the lists should be up-to-date as well!
|
|
|
291
302
|
|
|
292
303
|
# Performance
|
|
293
304
|
|
|
294
|
-
`tldts` is the
|
|
305
|
+
`tldts` is the _fastest JavaScript library_ available for parsing hostnames. It is able to parse _millions of inputs per second_ (typically 2-3M depending on your hardware and inputs). It also offers granular options to fine-tune the behavior and performance of the library depending on the kind of inputs you are dealing with (e.g.: if you know you only manipulate valid hostnames you can disable the hostname extraction step with `{ extractHostname: false }`).
|
|
295
306
|
|
|
296
307
|
Please see [this detailed comparison](./comparison/comparison.md) with other available libraries.
|
|
297
308
|
|
|
@@ -310,9 +321,7 @@ This project would not be possible without the amazing Mozilla's
|
|
|
310
321
|
|
|
311
322
|
[badge-ci]: https://secure.travis-ci.org/remusao/tldts.svg?branch=master
|
|
312
323
|
[badge-downloads]: https://img.shields.io/npm/dm/tldts.svg
|
|
313
|
-
|
|
314
324
|
[public suffix list]: https://publicsuffix.org/list/
|
|
315
325
|
[list the recent changes]: https://github.com/publicsuffix/list/commits/master
|
|
316
326
|
[changes Atom Feed]: https://github.com/publicsuffix/list/commits/master.atom
|
|
317
|
-
|
|
318
327
|
[public suffix]: https://publicsuffix.org/learn/
|
package/bin/cli.js
CHANGED
|
@@ -15,9 +15,7 @@ if (process.argv.length > 2) {
|
|
|
15
15
|
const rlInterface = readline.createInterface({
|
|
16
16
|
input: process.stdin,
|
|
17
17
|
});
|
|
18
|
-
rlInterface.on('line', function(line){
|
|
19
|
-
console.log(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
})
|
|
23
|
-
}
|
|
18
|
+
rlInterface.on('line', function (line) {
|
|
19
|
+
console.log(JSON.stringify(parse(line), null, 2));
|
|
20
|
+
});
|
|
21
|
+
}
|