@bartificer/linkify 2.1.0 → 2.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bartificer/linkify",
3
- "version": "2.1.0",
3
+ "version": "2.3.1",
4
4
  "description": "An module for converting URLs into pretty links in any format.",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -15,7 +15,7 @@
15
15
  ],
16
16
  "scripts": {
17
17
  "build": "webpack",
18
- "publish": "npm run build && npm publish"
18
+ "release": "npm run build && npm publish"
19
19
  },
20
20
  "repository": {
21
21
  "type": "git",
@@ -32,6 +32,7 @@
32
32
  "clipboardy": "^5.3.1",
33
33
  "mustache": "^4.2.0",
34
34
  "node-fetch": "^3.3.2",
35
+ "title-case": "^4.3.2",
35
36
  "urijs": "^1.19.10",
36
37
  "url-slug": "^5.0.0"
37
38
  },
@@ -26,6 +26,16 @@ export class Linkifier {
26
26
  }
27
27
  };
28
28
 
29
+ /**
30
+ * A mapping of domains names to default template names.
31
+ *
32
+ * @private
33
+ * @type {Object.<FQDN, templateName>}
34
+ */
35
+ this._pageDataToLinkTemplateName = {
36
+ '.' : 'html' // default to the 'html' template for all domains unless otherwise specified
37
+ };
38
+
29
39
  /**
30
40
  * The registered link templates.
31
41
  *
@@ -34,6 +44,15 @@ export class Linkifier {
34
44
  */
35
45
  this._linkTemplates = {};
36
46
 
47
+ /**
48
+ * The loaded list of words with customised capitalisations.
49
+ *
50
+ * @private
51
+ * @type {string[]}
52
+ */
53
+ this._speciallyCapitalisedWords = [];
54
+ defaults.speciallyCapitalisedWords.map(word => this._speciallyCapitalisedWords.push(word));
55
+
37
56
  /**
38
57
  * A collection of utility functions.
39
58
  *
@@ -50,13 +69,6 @@ export class Linkifier {
50
69
  }
51
70
  }
52
71
 
53
- /**
54
- * @type {string[]} A list of the names of the registered link templates.
55
- */
56
- get templateNames() {
57
- return Object.keys(this._linkTemplates);
58
- }
59
-
60
72
  /**
61
73
  * @type {Object.<string, Function>}
62
74
  */
@@ -71,6 +83,24 @@ export class Linkifier {
71
83
  return this._utilities;
72
84
  }
73
85
 
86
+ /**
87
+ * @returns {string[]} The current list of known words with special capitalisations.
88
+ */
89
+ get speciallyCapitalisedWords(){
90
+ const ans = [];
91
+ this._speciallyCapitalisedWords.map(word => ans.push(word));
92
+ return ans;
93
+ }
94
+
95
+ /**
96
+ * @param {string[]} words - a list of words with special capitalisations
97
+ */
98
+ set speciallyCapitalisedWords(words){
99
+ // TO DO - add validation
100
+
101
+ this._speciallyCapitalisedWords = words;
102
+ }
103
+
74
104
  /**
75
105
  * Register a data transformer function for a given domain.
76
106
  *
@@ -126,6 +156,39 @@ export class Linkifier {
126
156
  return this._pageDataToLinkDataTransmformers['.'];
127
157
  }
128
158
 
159
+ /**
160
+ * @type {string[]} A list of the names of the registered link templates.
161
+ */
162
+ get templateNames() {
163
+ return Object.keys(this._linkTemplates);
164
+ }
165
+
166
+ /**
167
+ * @returns {string} The name of the default template.
168
+ */
169
+ get defaultTemplateName(){
170
+ return this._pageDataToLinkTemplateName['.'];
171
+ }
172
+
173
+ /**
174
+ * @param {string} templateName - The name of the default template to use.
175
+ * @throws {ValidationError} A validation error is thrown if the template name is missing, invalid, or doesn't correspond to a registered template.
176
+ */
177
+ set defaultTemplateName(templateName){
178
+ const tplName = String(templateName);
179
+ if(!this._linkTemplates[tplName]){
180
+ throw new ValidationError(`No template named '${tplName}' is registered`);
181
+ }
182
+ this._pageDataToLinkTemplateName['.'] = tplName;
183
+ }
184
+
185
+ /**
186
+ * @type {LinkTemplate} The default link template.
187
+ */
188
+ get defaultTemplate(){
189
+ return this._linkTemplates[this._pageDataToLinkTemplateName['.']];
190
+ }
191
+
129
192
  /**
130
193
  * Register a link template.
131
194
  *
@@ -136,8 +199,88 @@ export class Linkifier {
136
199
  */
137
200
  registerTemplate(name, template){
138
201
  // TO DO - add validation
202
+ const tplName = String(name);
139
203
 
140
- this._linkTemplates[name] = template;
204
+ this._linkTemplates[tplName] = template;
205
+ }
206
+
207
+ /**
208
+ * Get a registered link template by name.
209
+ *
210
+ * @param {string} templateName
211
+ * @returns {LinkTemplate}
212
+ * @throws {ValidationError} A validation error is thrown unless a valid name is passed and corresponds to a registered template.
213
+ */
214
+ getTemplate(templateName){
215
+ const tplName = String(templateName);
216
+
217
+ if(!this._linkTemplates[tplName]){
218
+ throw new ValidationError(`No template named '${tplName}' is registered`);
219
+ }
220
+ return this._linkTemplates[tplName];
221
+ }
222
+
223
+ /**
224
+ * Register a default template for use with a given domain. This template will
225
+ * override the overall default for this domain and all its subdomains.
226
+ *
227
+ * @param {domainName} domain - The domain for which this template should be used by default.
228
+ * @param {templateName} templateName - The name of the template to use.
229
+ * @throws {ValidationError} A validation error is thrown if either parameter
230
+ * is missing or invalid.
231
+ */
232
+ registerDefaultTemplateMapping(domain, templateName){
233
+ // TO DO - add validation
234
+
235
+ let fqdn = String(domain);
236
+ if(!fqdn.match(/[.]$/)){
237
+ fqdn += '.';
238
+ }
239
+ this._pageDataToLinkTemplateName[fqdn] = templateName;
240
+ }
241
+
242
+ /**
243
+ * Get the data transformer function for a given domain.
244
+ *
245
+ * Note that domains are searched from the subdomain up. For example, if passed
246
+ * the domain `www.bartificer.net` the function will first look for a
247
+ * transformer for the domain `www.bartificer.net`, if there's no transformer
248
+ * registered for that domain it will look for a transformer for the domain
249
+ * `bartificer.net`, if there's no transformer for that domain either it will
250
+ * return the default transformer.
251
+ *
252
+ * @param {domainName} domain - The domain to get the data transformer for.
253
+ * @returns {dataTransformer}
254
+ * @throws {ValidationError} A validation error is thrown unless a valid domain
255
+ * name is passed.
256
+ */
257
+ getTemplateNameForDomain(domain){
258
+ // TO DO - add validation
259
+
260
+ let fqdn = String(domain);
261
+ if(!fqdn.match(/[.]$/)){
262
+ fqdn += '.';
263
+ }
264
+
265
+ // return the most exact match
266
+ while(fqdn.match(/[.][^.]+[.]$/)){
267
+ if(this._pageDataToLinkTemplateName[fqdn]){
268
+ let tplName = this._pageDataToLinkTemplateName[fqdn];
269
+
270
+ // make sure the template exists
271
+ if(!this._linkTemplates[tplName]){
272
+ console.warn(`No template named '${tplName}' is registered, falling back to global default '${this._pageDataToLinkTemplateName['.']}'`);
273
+ return this._pageDataToLinkTemplateName['.'];
274
+ }
275
+
276
+ //console.log(`returning template name for '${fqdn}'`);
277
+ return this._pageDataToLinkTemplateName[fqdn];
278
+ }
279
+ //console.log(`no template name found for '${fqdn}'`);
280
+ fqdn = fqdn.replace(/^[^.]+[.]/, '');
281
+ }
282
+ //console.log('returning default template name');
283
+ return this._pageDataToLinkTemplateName['.'];
141
284
  }
142
285
 
143
286
  /**
@@ -164,9 +307,8 @@ export class Linkifier {
164
307
  webDownloadResponseBody = await webDownloadResponse.text();
165
308
  } catch (err) {
166
309
  // fall back to extracting the title from the URL slug
167
- console.warn(`Failed to fetch page data for '${url}': ${err.message}`);
168
- console.warn('Falling back to reversing the URL slug for the title');
169
- ans.title = this.utilities.extractSlug(url) || 'Untitled';
310
+ console.warn(`Falling back to de-slugifying URL (${err.message})`);
311
+ ans.title = this.utilities.extractSlug(url, this._speciallyCapitalisedWords) || 'Untitled';
170
312
  return ans;
171
313
  }
172
314
  let $ = cheerio.load(webDownloadResponseBody);
@@ -183,7 +325,9 @@ export class Linkifier {
183
325
  }
184
326
 
185
327
  /**
186
- * Generate a link given a URL.
328
+ * Generate a link given a URL. By default the registered template for the
329
+ * URL's domain will be used, or, if none is registered, the overall
330
+ * default will be used (`html`).
187
331
  *
188
332
  * @async
189
333
  * @param {URL} url
@@ -194,16 +338,52 @@ export class Linkifier {
194
338
  */
195
339
  async generateLink(url, templateName){
196
340
  // TO DO - add validation
197
-
198
- let tplName = templateName && typeof templateName === 'string' ? templateName : 'html';
341
+
342
+ //
343
+ // -- resolve the template name to use for this URL --
344
+ //
345
+ let tplName = '';
346
+
347
+ // resolve the template — if a template name is passed, try use it,
348
+ // otherwise resolve the default for this URL's domain
349
+ if(templateName && typeof templateName === 'string'){
350
+ tplName = templateName;
351
+
352
+ // make sure the template exists
353
+ if(!this._linkTemplates[tplName]){
354
+ console.warn(`No template named '${tplName}' is registered, falling back to global default '${this._pageDataToLinkTemplateName['.']}'`);
355
+ tplName = this._pageDataToLinkTemplateName['.'];
356
+ }
357
+ } else {
358
+ tplName = this.getTemplateNameForDomain((new URL(url)).hostname);
359
+ }
360
+ const template = this._linkTemplates[tplName];
199
361
 
200
362
  // get the page data
201
- let pData = await this.fetchPageData(url);
363
+ const pageData = await this.fetchPageData(url);
202
364
 
203
365
  // transform the page data to link data
204
- let lData = this.getTransformerForDomain(pData.uri.hostname())(pData);
366
+ const linkData = this.getTransformerForDomain(pageData.uri.hostname())(pageData);
367
+
368
+ // apply field-specific filters to the link data
369
+ const fieldNames = ['url', 'text', 'description'];
370
+ const templateData = linkData.asPlainObject();
371
+ for(let fieldName of fieldNames){
372
+ let fieldFilters = template.filtersFor(fieldName);
373
+ for(let filterFn of fieldFilters){
374
+ templateData[fieldName] = filterFn(templateData[fieldName]);
375
+ }
376
+ }
377
+
378
+ // apply the universal filters to all the link data fields
379
+ let globalFilters = template.filtersFor('all');
380
+ for(let filterFn of globalFilters){
381
+ for(let fieldName of fieldNames){
382
+ templateData[fieldName] = filterFn(templateData[fieldName]);
383
+ }
384
+ }
205
385
 
206
386
  // render the link
207
- return Mustache.render(this._linkTemplates[tplName].templateString, lData.asPlainObject());
387
+ return Mustache.render(this._linkTemplates[tplName].templateString, templateData);
208
388
  }
209
389
  };
package/src/defaults.mjs CHANGED
@@ -1,10 +1,76 @@
1
1
  import { LinkTemplate } from './LinkTemplate.class.mjs';
2
+ import * as utilities from "./utilities.mjs";
2
3
 
3
4
  /**
4
- * @type {Object.<string, LinkTemplate>} A collection of named link templates.
5
+ * The collection of named link templates loaded by the Linkifier constructor.
6
+ * @type {Object.<string, LinkTemplate>}
5
7
  */
6
8
  export const linkTemplates = {
7
- html: new LinkTemplate('<a href="{{{url}}}" title="{{description}}">{{text}}</a>'),
8
- htmlNewTab: new LinkTemplate('<a href="{{{url}}}" title="{{description}}" target="_blank" rel="noopener">{{text}}</a>'),
9
- markdown: new LinkTemplate('[{{{text}}}]({{{url}}})')
9
+ html: new LinkTemplate(
10
+ '<a href="{{{url}}}" title="{{description}}">{{text}}</a>',
11
+ [
12
+ ['url', utilities.stripUTMParameters],
13
+ ['text', utilities.regulariseWhitespace],
14
+ ['description', utilities.regulariseWhitespace]
15
+ ]
16
+ ),
17
+ htmlNewTab: new LinkTemplate(
18
+ '<a href="{{{url}}}" title="{{description}}" target="_blank" rel="noopener">{{text}}</a>',
19
+ [
20
+ ['url', utilities.stripUTMParameters],
21
+ ['text', utilities.regulariseWhitespace],
22
+ ['description', utilities.regulariseWhitespace]
23
+ ]
24
+ ),
25
+ markdown: new LinkTemplate(
26
+ '[{{{text}}}]({{{url}}})',
27
+ [
28
+ ['url', utilities.stripUTMParameters],
29
+ ['text', utilities.regulariseWhitespace]
30
+ ]
31
+ )
10
32
  };
33
+
34
+ /**
35
+ * The default list of words with special capitalisations.
36
+ * @type {string[]}
37
+ */
38
+ export const speciallyCapitalisedWords = [
39
+ // generic acronyms
40
+ 'FBI',
41
+ 'CIA',
42
+ 'USA',
43
+ 'UK',
44
+ 'EU',
45
+ 'NASA',
46
+ 'NSA',
47
+ 'OS',
48
+ 'OSes',
49
+ 'ID',
50
+ 'IDs',
51
+ 'MLB',
52
+ 'NFL',
53
+ 'NASCAR',
54
+ 'TV',
55
+ 'VR',
56
+ 'BAFTA',
57
+ 'BBC',
58
+ 'AI',
59
+ 'VP',
60
+
61
+ // tech jargon
62
+ 'iOS',
63
+ 'macOS',
64
+ 'iPad',
65
+ 'iPod',
66
+ 'iPadOS',
67
+ 'watchOS',
68
+ 'tvOS',
69
+ 'CarPlay',
70
+ 'AirPods',
71
+ 'MacBook',
72
+ 'iTunes',
73
+ 'XProtect',
74
+ 'LinkedIn',
75
+ 'ChatGPT'
76
+ ];
package/src/utilities.mjs CHANGED
@@ -1,5 +1,17 @@
1
1
  import URI from 'urijs';
2
2
  import * as urlSlug from 'url-slug';
3
+ import { titleCase } from "title-case";
4
+ import * as defaults from './defaults.mjs';
5
+
6
+ /**
7
+ * Regularise white space by replacing all sequences of whitespace characters with a single space and trimming leading and trailing whitespace.
8
+ *
9
+ * @param {string} text
10
+ * @return {string}
11
+ */
12
+ export function regulariseWhitespace(text){
13
+ return String(text).replace(/[\s\n]+/g, ' ').trim();
14
+ };
3
15
 
4
16
  /**
5
17
  * Strip the query string from a URL.
@@ -22,13 +34,69 @@ export function stripUTMParameters(url){
22
34
  return URI(url).removeQuery(['utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content']).toString();
23
35
  };
24
36
 
37
+ /**
38
+ * Escape a string for use in regular expressions.
39
+ *
40
+ * @param {string} str - the string to escape.
41
+ * @returns {string}
42
+ * @note much to my surprise, not a standard Javascript feature!
43
+ * @see https://stackoverflow.com/a/3561711/174985
44
+ */
45
+ export function escapeRegex(str) {
46
+ return String(str).replace(/[/\-\\^$*+?.()|[\]{}]/g, '\\$&');
47
+ }
48
+
49
+ /**
50
+ * Batch-customise word casings in a string. E.g. force `fbi` to `FBI`, `ios` to `iOS`, etc..
51
+ *
52
+ * @param {string} str - the string to apply the replacemnts to.
53
+ * @param {[string[]]} words - an array of words in their desired capitalisations. Defaults to the default list of custom capitalisations.
54
+ * @returns {string}
55
+ */
56
+ export function batchFixCustomWordCases(str, words){
57
+ // coerce the first argument to a string
58
+ let ans = String(str);
59
+
60
+ // resolve the word list
61
+ if(arguments.length < 2){
62
+ // if none was passed, use the default list
63
+ words = defaults.speciallyCapitalisedWords;
64
+ } else {
65
+ // TO DO — add validation
66
+ }
67
+
68
+ // build a mapping from the lower-case version of each word to custom version
69
+ const lowerToCustomMap = {};
70
+ words.map(word => lowerToCustomMap[word.toLowerCase()] = word);
71
+
72
+ // assemble an RE from all the words
73
+ const sortedWords = Object.keys(lowerToCustomMap).sort((a, b) => b.length - a.length); // sort for efficiency
74
+ const wordRE = new RegExp(
75
+ sortedWords.map(word => `\\b${escapeRegex(word)}\\b`).join('|'),
76
+ 'gi'
77
+ );
78
+
79
+ // replace all the matches at once using an anonymous function as the replacement
80
+ ans = str.replace(wordRE, match => lowerToCustomMap[match.toLowerCase()]);
81
+
82
+ return ans;
83
+ }
84
+
25
85
  /**
26
86
  * Extract the slug from a URL and convert it to a title-case string.
27
87
  *
28
88
  * @param {string} url
89
+ * @param {[string[]]} words - a list of words with custom capitalisations to correct after title-casing.
29
90
  * @return {string}
30
91
  */
31
- export function extractSlug(url){
92
+ export function extractSlug(url, words){
93
+ // TO DO - add validation
94
+
95
+ // resolve the list of words with custom capitalisations
96
+ if(arguments.length < 2){
97
+ words = [];
98
+ }
99
+
32
100
  // example URLs to try support:
33
101
  // ----------------------------
34
102
  // https://www.macobserver.com/news/apple-q2-2026-earnings-call-date-confirmed-heres-what-to-expect/
@@ -40,7 +108,8 @@ export function extractSlug(url){
40
108
  // 2. Trim leading and trailing slashes
41
109
  // 3. Split the path on / into segments and take the last segment.
42
110
  // 4. Remove any file extension.
43
- // 5. Call slug reversing function with Title Case option.
111
+ // 5. Call slug reversing function with the lower-case option.
112
+ // 6. Intelligently Title-case the title.
44
113
 
45
114
  // extract the path from the URL and clean up both ends
46
115
  const uri = URI(url);
@@ -49,6 +118,17 @@ export function extractSlug(url){
49
118
  let slug = path.split('/').pop() || ''; // get last segment of the path
50
119
  slug = slug.replace(/\.[^/.]+$/, ''); // trim any file extension that might be present
51
120
 
52
- // reverse the slug into a title-case string
53
- return urlSlug.revert(slug, { transformer: urlSlug.TITLECASE_TRANSFORMER });
121
+ // reverse the slug into a lower-case string
122
+ let title = urlSlug.revert(slug, {
123
+ transformer: urlSlug.LOWERCASE_TRANSFORMER,
124
+ camelCase: false
125
+ });
126
+
127
+ // convert the title to title case
128
+ title = titleCase(title);
129
+
130
+ // fix any words with unusual customisations
131
+ title = batchFixCustomWordCases(title, words);
132
+
133
+ return title;
54
134
  };