@bartificer/linkify 2.2.0 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bartificer/linkify",
3
- "version": "2.2.0",
3
+ "version": "2.3.2",
4
4
  "description": "An module for converting URLs into pretty links in any format.",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -15,7 +15,7 @@
15
15
  ],
16
16
  "scripts": {
17
17
  "build": "webpack",
18
- "publish": "npm run build && npm publish"
18
+ "release": "npm run build && npm publish"
19
19
  },
20
20
  "repository": {
21
21
  "type": "git",
@@ -32,6 +32,7 @@
32
32
  "clipboardy": "^5.3.1",
33
33
  "mustache": "^4.2.0",
34
34
  "node-fetch": "^3.3.2",
35
+ "title-case": "^4.3.2",
35
36
  "urijs": "^1.19.10",
36
37
  "url-slug": "^5.0.0"
37
38
  },
@@ -44,6 +44,15 @@ export class Linkifier {
44
44
  */
45
45
  this._linkTemplates = {};
46
46
 
47
+ /**
48
+ * The loaded list of words with customised capitalisations.
49
+ *
50
+ * @private
51
+ * @type {string[]}
52
+ */
53
+ this._speciallyCapitalisedWords = [];
54
+ defaults.speciallyCapitalisedWords.map(word => this._speciallyCapitalisedWords.push(word));
55
+
47
56
  /**
48
57
  * A collection of utility functions.
49
58
  *
@@ -74,6 +83,24 @@ export class Linkifier {
74
83
  return this._utilities;
75
84
  }
76
85
 
86
+ /**
87
+ * @returns {string[]} The current list of known words with special capitalisations.
88
+ */
89
+ get speciallyCapitalisedWords(){
90
+ const ans = [];
91
+ this._speciallyCapitalisedWords.map(word => ans.push(word));
92
+ return ans;
93
+ }
94
+
95
+ /**
96
+ * @param {string[]} words - a list of words with special capitalisations
97
+ */
98
+ set speciallyCapitalisedWords(words){
99
+ // TO DO - add validation
100
+
101
+ this._speciallyCapitalisedWords = words;
102
+ }
103
+
77
104
  /**
78
105
  * Register a data transformer function for a given domain.
79
106
  *
@@ -280,9 +307,8 @@ export class Linkifier {
280
307
  webDownloadResponseBody = await webDownloadResponse.text();
281
308
  } catch (err) {
282
309
  // fall back to extracting the title from the URL slug
283
- console.warn(`Failed to fetch page data for '${url}': ${err.message}`);
284
- console.warn('Falling back to reversing the URL slug for the title');
285
- ans.title = this.utilities.extractSlug(url) || 'Untitled';
310
+ console.warn(`Falling back to de-slugifying URL (${err.message})`);
311
+ ans.title = this.utilities.extractSlug(url, this._speciallyCapitalisedWords) || 'Untitled';
286
312
  return ans;
287
313
  }
288
314
  let $ = cheerio.load(webDownloadResponseBody);
package/src/defaults.mjs CHANGED
@@ -2,7 +2,8 @@ import { LinkTemplate } from './LinkTemplate.class.mjs';
2
2
  import * as utilities from "./utilities.mjs";
3
3
 
4
4
  /**
5
- * @type {Object.<string, LinkTemplate>} A collection of named link templates.
5
+ * The collection of named link templates loaded by the Linkifier constructor.
6
+ * @type {Object.<string, LinkTemplate>}
6
7
  */
7
8
  export const linkTemplates = {
8
9
  html: new LinkTemplate(
@@ -29,3 +30,58 @@ export const linkTemplates = {
29
30
  ]
30
31
  )
31
32
  };
33
+
34
+ /**
35
+ * The default list of words with special capitalisations.
36
+ * @type {string[]}
37
+ */
38
+ export const speciallyCapitalisedWords = [
39
+ // generic acronyms
40
+ 'FBI',
41
+ 'CIA',
42
+ 'USA',
43
+ 'UK',
44
+ 'EU',
45
+ 'NASA',
46
+ 'NSA',
47
+ 'OS',
48
+ 'OSes',
49
+ 'ID',
50
+ 'IDs',
51
+ 'MLB',
52
+ 'NFL',
53
+ 'NASCAR',
54
+ 'FIFA',
55
+ 'TV',
56
+ 'VR',
57
+ 'BAFTA',
58
+ 'BBC',
59
+ 'AI',
60
+ 'VP',
61
+ 'II',
62
+ 'III',
63
+ 'IV',
64
+
65
+ // tech jargon
66
+ 'iOS',
67
+ 'macOS',
68
+ 'iPhone',
69
+ 'iPad',
70
+ 'iPod',
71
+ 'AirTag',
72
+ 'iPadOS',
73
+ 'watchOS',
74
+ 'tvOS',
75
+ 'CarPlay',
76
+ 'AirPods',
77
+ 'MacBook',
78
+ 'iTunes',
79
+ 'WWDC',
80
+ 'XDR',
81
+ 'XProtect',
82
+ 'VESA',
83
+ 'HDMI',
84
+ 'DisplayPort',
85
+ 'LinkedIn',
86
+ 'ChatGPT'
87
+ ];
package/src/utilities.mjs CHANGED
@@ -1,5 +1,7 @@
1
1
  import URI from 'urijs';
2
2
  import * as urlSlug from 'url-slug';
3
+ import * as titleCase from "title-case";
4
+ import * as defaults from './defaults.mjs';
3
5
 
4
6
  /**
5
7
  * Regularise white space by replacing all sequences of whitespace characters with a single space and trimming leading and trailing whitespace.
@@ -32,13 +34,69 @@ export function stripUTMParameters(url){
32
34
  return URI(url).removeQuery(['utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content']).toString();
33
35
  };
34
36
 
37
+ /**
38
+ * Escape a string for use in regular expressions.
39
+ *
40
+ * @param {string} str - the string to escape.
41
+ * @returns {string}
42
+ * @note much to my surprise, not a standard Javascript feature!
43
+ * @see https://stackoverflow.com/a/3561711/174985
44
+ */
45
+ export function escapeRegex(str) {
46
+ return String(str).replace(/[/\-\\^$*+?.()|[\]{}]/g, '\\$&');
47
+ }
48
+
49
+ /**
50
+ * Batch-customise word casings in a string. E.g. force `fbi` to `FBI`, `ios` to `iOS`, etc..
51
+ *
52
+ * @param {string} str - the string to apply the replacemnts to.
53
+ * @param {[string[]]} words - an array of words in their desired capitalisations. Defaults to the default list of custom capitalisations.
54
+ * @returns {string}
55
+ */
56
+ export function batchFixCustomWordCases(str, words){
57
+ // coerce the first argument to a string
58
+ let ans = String(str);
59
+
60
+ // resolve the word list
61
+ if(arguments.length < 2){
62
+ // if none was passed, use the default list
63
+ words = defaults.speciallyCapitalisedWords;
64
+ } else {
65
+ // TO DO — add validation
66
+ }
67
+
68
+ // build a mapping from the lower-case version of each word to custom version
69
+ const lowerToCustomMap = {};
70
+ words.map(word => lowerToCustomMap[word.toLowerCase()] = word);
71
+
72
+ // assemble an RE from all the words
73
+ const sortedWords = Object.keys(lowerToCustomMap).sort((a, b) => b.length - a.length); // sort for efficiency
74
+ const wordRE = new RegExp(
75
+ sortedWords.map(word => `\\b${escapeRegex(word)}\\b`).join('|'),
76
+ 'gi'
77
+ );
78
+
79
+ // replace all the matches at once using an anonymous function as the replacement
80
+ ans = str.replace(wordRE, match => lowerToCustomMap[match.toLowerCase()]);
81
+
82
+ return ans;
83
+ }
84
+
35
85
  /**
36
86
  * Extract the slug from a URL and convert it to a title-case string.
37
87
  *
38
88
  * @param {string} url
89
+ * @param {[string[]]} words - a list of words with custom capitalisations to correct after title-casing.
39
90
  * @return {string}
40
91
  */
41
- export function extractSlug(url){
92
+ export function extractSlug(url, words){
93
+ // TO DO - add validation
94
+
95
+ // resolve the list of words with custom capitalisations
96
+ if(arguments.length < 2){
97
+ words = [];
98
+ }
99
+
42
100
  // example URLs to try support:
43
101
  // ----------------------------
44
102
  // https://www.macobserver.com/news/apple-q2-2026-earnings-call-date-confirmed-heres-what-to-expect/
@@ -50,7 +108,8 @@ export function extractSlug(url){
50
108
  // 2. Trim leading and trailing slashes
51
109
  // 3. Split the path on / into segments and take the last segment.
52
110
  // 4. Remove any file extension.
53
- // 5. Call slug reversing function with Title Case option.
111
+ // 5. Call slug reversing function with the lower-case option.
112
+ // 6. Intelligently Title-case the title.
54
113
 
55
114
  // extract the path from the URL and clean up both ends
56
115
  const uri = URI(url);
@@ -59,6 +118,18 @@ export function extractSlug(url){
59
118
  let slug = path.split('/').pop() || ''; // get last segment of the path
60
119
  slug = slug.replace(/\.[^/.]+$/, ''); // trim any file extension that might be present
61
120
 
62
- // reverse the slug into a title-case string
63
- return urlSlug.revert(slug, { transformer: urlSlug.TITLECASE_TRANSFORMER });
121
+ // reverse the slug into a lower-case string
122
+ let title = urlSlug.revert(slug, {
123
+ transformer: urlSlug.LOWERCASE_TRANSFORMER,
124
+ camelCase: false
125
+ });
126
+
127
+ // convert the title to title case
128
+ titleCase.SMALL_WORDS.add('is');
129
+ title = titleCase.titleCase(title);
130
+
131
+ // fix any words with unusual customisations
132
+ title = batchFixCustomWordCases(title, words);
133
+
134
+ return title;
64
135
  };