@bartificer/linkify 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,257 @@
1
+ import {default as URI} from 'urijs';
2
+
3
+ export class PageData {
4
+ /**
5
+ * This constructor throws a {@link ValidationError} unless a valid URL is passed.
6
+ *
7
+ * @param {URL} url - The page's full URL.
8
+ * @throws {ValidationError} A validation error is thrown if an invalid URL
9
+ * is passed.
10
+ */
11
+ constructor(url){
12
+ // TO DO - add validation
13
+
14
+ /**
15
+ * The page's URL as a URI object.
16
+ *
17
+ * @private
18
+ * @type {URIObject}
19
+ */
20
+ this._uri = URI();
21
+
22
+ /**
23
+ * The page's title.
24
+ *
25
+ * @private
26
+ * @type {string}
27
+ */
28
+ this._title = '';
29
+
30
+ /**
31
+ * The section headings on the page as arrays of strings indexed by
32
+ * `h1` and `h2`.
33
+ *
34
+ * @private
35
+ * @type {plainObject}
36
+ */
37
+ this._headings = {
38
+ h1: [],
39
+ h2: []
40
+ };
41
+
42
+ // store the URL
43
+ this.url = url;
44
+ }
45
+
46
+ /**
47
+ * @returns {string}
48
+ */
49
+ get url(){
50
+ return this._uri.toString();
51
+ }
52
+
53
+ /**
54
+ * @param {string} url - A URL as a string.
55
+ * @throws {ValidationError} A validation error is thrown if an argument
56
+ * is passed that's not a valid URL string.
57
+ */
58
+ set url(url){
59
+ this._uri = URI(url).normalize();
60
+ }
61
+
62
+ /**
63
+ * @returns {Object} A URI.js object.
64
+ */
65
+ get uri(){
66
+ return this._uri.clone();
67
+ }
68
+
69
+ /**
70
+ * Get the domain-part of the URL as a string.
71
+ *
72
+ * @returns {string} The domain-part of the URL.
73
+ */
74
+ get domain(){
75
+ return this._uri.hostname();
76
+ }
77
+
78
+ /**
79
+ * @returns {string} The path-part of the URL.
80
+ */
81
+ get path(){
82
+ return this._uri.path();
83
+ }
84
+
85
+ /**
86
+ * @returns {string}
87
+ */
88
+ get title(){
89
+ return this._title;
90
+ }
91
+
92
+ /**
93
+ * @param {string} title - the page's title as a string. Values passed will be coerced to strings.
94
+ */
95
+ set title(title){
96
+ this._title = String(title);
97
+ }
98
+
99
+ /**
100
+ * Get the page's section headings.
101
+ *
102
+ * @returns {Object} A plain object containing arrays of strings indexed by `h1` and `h2`.
103
+ */
104
+ get headings(){
105
+ let ans = {
106
+ h1: [],
107
+ h2: []
108
+ };
109
+ for(let h of this._headings.h1){
110
+ ans.h1.push(h);
111
+ }
112
+ for(let h of this._headings.h2){
113
+ ans.h2.push(h);
114
+ }
115
+ return ans;
116
+ }
117
+
118
+ /**
119
+ * The page's top-level headings (`h1` tags).
120
+ *
121
+ * @returns {string[]}
122
+ */
123
+ get topLevelHeadings(){
124
+ var ans = [];
125
+ for(let h of this._headings.h1){
126
+ ans.push(h);
127
+ }
128
+ return ans;
129
+ }
130
+
131
+ /**
132
+ * An alias for `.topLevelHeadings`.
133
+ * @see PageData#topLevelHeadings
134
+ */
135
+ get h1s(){
136
+ return this.topLevelHeadings;
137
+ }
138
+
139
+ /**
140
+ * The page's secondary headings (`h2` tags).
141
+ *
142
+ * @returns {string[]}
143
+ */
144
+ get secondaryHeadings(){
145
+ var ans = [];
146
+ for(let h of this._headings.h2){
147
+ ans.push(h);
148
+ }
149
+ return ans;
150
+ }
151
+
152
+ /**
153
+ * An alias for `.secondaryHeadings`.
154
+ * @see PageData#secondaryHeadings
155
+ */
156
+ get h2s(){
157
+ return this.secondaryHeadings;
158
+ }
159
+
160
+ /**
161
+ * The text from the most important heading on the page. If the page
162
+ * has `h1` tags, the first one will be used, if not, the first `h2` tag
163
+ * will be used, and if there's none of those either, an empty string will
164
+ * be returned.
165
+ *
166
+ * @returns {string} Heading text as a string, or an empty string.
167
+ */
168
+ get mainHeading(){
169
+ if(this._headings.h1.length > 0){
170
+ return this._headings.h1[0];
171
+ }
172
+ if(this._headings.h2.length > 0){
173
+ return this._headings.h2[0];
174
+ }
175
+ return '';
176
+ }
177
+
178
+ /**
179
+ * Add a top-level heading.
180
+ *
181
+ * @param {string} h1Text
182
+ * @returns {PageData} A reference to self to
183
+ * facilitate function chaning.
184
+ */
185
+ addTopLevelHeading(h1Text){
186
+ // TO DO - add argument validation
187
+ this._headings.h1.push(h1Text);
188
+ return this;
189
+ }
190
+
191
+ /**
192
+ * Add a seconary heading.
193
+ *
194
+ * @param {string} h2Text
195
+ * @returns {PageData} A reference to self to
196
+ * facilitate function chaning.
197
+ */
198
+ addSecondaryHeading(h2Text){
199
+ // TO DO - add argument validation
200
+ this._headings.h2.push(h2Text);
201
+ return this;
202
+ }
203
+
204
+ /**
205
+ * Get the page data as a plain object of the form:
206
+ * ```
207
+ * {
208
+ * url: 'http://www.bartificer.net/',
209
+ * title: 'the page title',
210
+ * topLevelHeadings: [ 'first h1', 'second h1' ],
211
+ * secondaryHeadings: [ 'first h2', 'second h2' ],
212
+ * mainHeading: 'first h1',
213
+ * uri: {
214
+ * hostname: 'www.bartificer.net',
215
+ * path: '/',
216
+ * hasPath: false
217
+ * }
218
+ * }
219
+ * ```
220
+ *
221
+ * Note that the `uri` could contain more fields - it's initialised with
222
+ * output from the `URI.parse()` function from the `URI` module.
223
+ *
224
+ * @returns {Object} A plain object containing the page data.
225
+ * @see {@link https://medialize.github.io/URI.js/docs.html#static-parse}
226
+ */
227
+ asPlainObject(){
228
+ let ans = {
229
+ url: this.url,
230
+ title: this.title,
231
+ topLevelHeadings: this.topLevelHeadings,
232
+ secondaryHeadings: this.secondaryHeadings,
233
+ mainHeading: this.mainHeading,
234
+ uri: URI.parse(this._uri.toString())
235
+ };
236
+ ans.uri.hasPath = ans.uri.path !== '/';
237
+ return ans;
238
+ }
239
+ };
240
+
241
+ /**
242
+ * A shortcut for `.addTopLevelHeading()`.
243
+ *
244
+ * @function
245
+ * @see PageData#addTopLevelHeading
246
+ *
247
+ */
248
+ PageData.prototype.h1 = PageData.prototype.addTopLevelHeading;
249
+
250
+ /**
251
+ * A shortcut for `.addSecondaryHeading()`.
252
+ *
253
+ * @function
254
+ * @see PageData#addSecondaryHeading
255
+ *
256
+ */
257
+ PageData.prototype.h2 = PageData.prototype.addSecondaryHeading;
package/src/index.js ADDED
@@ -0,0 +1,12 @@
1
+ import { Linkifier } from "./Linkifier.class.mjs";
2
+ import { LinkData } from "./LinkData.class.mjs";
3
+ import { LinkTemplate } from "./LinkTemplate.class.mjs";
4
+ import { PageData } from "./PageData.class.mjs";
5
+
6
+ // TO DO — more intelligently handle default transformers, ATM they're hard-coded in the Linkifier constructor!!!
7
+
8
+ // export the public API
9
+ const VERSION = process.env.VERSION; // Webpack replaces this line with the actual version string during build
10
+ const linkify = new Linkifier();
11
+ export { linkify, Linkifier, PageData, LinkData, LinkTemplate, VERSION};
12
+ export { linkify as default };
@@ -0,0 +1,54 @@
1
+ import URI from 'urijs';
2
+ import * as urlSlug from 'url-slug';
3
+
4
+ /**
5
+ * Strip the query string from a URL.
6
+ *
7
+ * @param {string} url
8
+ * @return {string}
9
+ */
10
+ export function stripQueryString(url){
11
+ return URI(url).query('').toString();
12
+ };
13
+
14
+ /**
15
+ * Remove UTM parameters from the query string in a URL.
16
+ *
17
+ * @param {string} url
18
+ * @return {string}
19
+ * @see https://en.wikipedia.org/wiki/UTM_parameters
20
+ */
21
+ export function stripUTMParameters(url){
22
+ return URI(url).removeQuery(['utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content']).toString();
23
+ };
24
+
25
+ /**
26
+ * Extract the slug from a URL and convert it to a title-case string.
27
+ *
28
+ * @param {string} url
29
+ * @return {string}
30
+ */
31
+ export function extractSlug(url){
32
+ // example URLs to try support:
33
+ // ----------------------------
34
+ // https://www.macobserver.com/news/apple-q2-2026-earnings-call-date-confirmed-heres-what-to-expect/
35
+ // https://appleinsider.com/articles/26/04/01/studio-display-xdr-without-tilt-adjustable-stand-now-costs-less
36
+ // https://www.bloomberg.com/news/articles/2026-04-03/ireland-tests-digital-id-to-verify-age-of-social-media-users?srnd=phx-technology&embedded-checkout=true
37
+ //
38
+ // Based on those examples, implement the following algorithm:
39
+ // 1. Parse the URL and extract the path (be sure not to capture the query string or fragment).
40
+ // 2. Trim leading and trailing slashes
41
+ // 3. Split the path on / into segments and take the last segment.
42
+ // 4. Remove any file extension.
43
+ // 5. Call slug reversing function with Title Case option.
44
+
45
+ // extract the path from the URL and clean up both ends
46
+ const uri = URI(url);
47
+ let path = uri.path();
48
+ path = path.replace(/^\/|\/$/g, ''); // trim leading and trailing slashes
49
+ let slug = path.split('/').pop() || ''; // get last segment of the path
50
+ slug = slug.replace(/\.[^/.]+$/, ''); // trim any file extension that might be present
51
+
52
+ // reverse the slug into a title-case string
53
+ return urlSlug.revert(slug, { transformer: urlSlug.TITLECASE_TRANSFORMER });
54
+ };