@leanbase.com/js 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,46 +1,46 @@
1
1
  {
2
- "name": "@leanbase.com/js",
3
- "version": "0.1.1",
4
- "description": "Leanbase browser SDK - event tracking, autocapture, and session replay",
5
- "repository": {
6
- "type": "git",
7
- "url": "https://github.com/PostHog/posthog-js",
8
- "directory": "packages/leanbase"
9
- },
10
- "author": "leanbase",
11
- "license": "Copyrighted by Leanflag Limited",
12
- "main": "dist/index.cjs",
13
- "module": "dist/index.mjs",
14
- "types": "dist/index.d.ts",
15
- "unpkg": "dist/leanbase.iife.js",
16
- "jsdelivr": "dist/leanbase.iife.js",
17
- "files": [
18
- "dist",
19
- "src",
20
- "README.md"
21
- ],
22
- "publishConfig": {
23
- "access": "public"
24
- },
25
- "dependencies": {
26
- "@posthog/core": "1.3.1"
27
- },
28
- "devDependencies": {
29
- "jest": "^29.7.0",
30
- "jest-environment-jsdom": "^29.7.0",
31
- "rollup": "^4.44.1",
32
- "rimraf": "^6.0.1",
33
- "@posthog-tooling/tsconfig-base": "1.0.0",
34
- "@posthog-tooling/rollup-utils": "1.0.0"
35
- },
36
- "scripts": {
37
- "clean": "rimraf dist coverage",
38
- "test:unit": "jest -c jest.config.js",
39
- "lint": "eslint src test",
40
- "lint:fix": "eslint src test --fix",
41
- "prebuild": "node -p \"'export const version = \\'' + require('./package.json').version + '\\''\" > src/version.ts",
42
- "build": "rollup -c",
43
- "dev": "rollup -c -w",
44
- "package": "mkdir -p ../../target && pnpm pack --pack-destination ../../target"
45
- }
46
- }
2
+ "name": "@leanbase.com/js",
3
+ "version": "0.1.3",
4
+ "description": "Leanbase browser SDK - event tracking, autocapture, and session replay",
5
+ "repository": {
6
+ "type": "git",
7
+ "directory": "packages/leanbase"
8
+ },
9
+ "author": "leanbase",
10
+ "license": "Copyrighted by Leanflag Limited",
11
+ "main": "dist/index.cjs",
12
+ "module": "dist/index.mjs",
13
+ "types": "dist/index.d.ts",
14
+ "unpkg": "dist/leanbase.iife.js",
15
+ "jsdelivr": "dist/leanbase.iife.js",
16
+ "scripts": {
17
+ "clean": "rimraf dist coverage",
18
+ "test:unit": "jest -c jest.config.js",
19
+ "lint": "eslint src test",
20
+ "lint:fix": "eslint src test --fix",
21
+ "prebuild": "node -p \"'export const version = \\'' + require('./package.json').version + '\\''\" > src/version.ts",
22
+ "build": "rollup -c",
23
+ "dev": "rollup -c -w",
24
+ "prepublishOnly": "pnpm lint && pnpm test:unit && pnpm build",
25
+ "package": "mkdir -p ../../target && pnpm pack --pack-destination ../../target"
26
+ },
27
+ "files": [
28
+ "dist",
29
+ "src",
30
+ "README.md"
31
+ ],
32
+ "publishConfig": {
33
+ "access": "public"
34
+ },
35
+ "dependencies": {
36
+ "@posthog/core": "1.3.1"
37
+ },
38
+ "devDependencies": {
39
+ "jest": "^29.7.0",
40
+ "jest-environment-jsdom": "^29.7.0",
41
+ "rollup": "^4.44.1",
42
+ "rimraf": "^6.0.1",
43
+ "@posthog-tooling/tsconfig-base": "1.0.0",
44
+ "@posthog-tooling/rollup-utils": "1.0.0"
45
+ }
46
+ }
@@ -0,0 +1,550 @@
1
+ import { AutocaptureConfig, LeanbaseConfig, Properties } from './types'
2
+ import { each, entries } from './utils'
3
+
4
+ import { isNullish, isString, isUndefined, isArray, isBoolean } from '@posthog/core'
5
+ import { isDocumentFragment, isElementNode, isTag, isTextNode } from './utils/element-utils'
6
+ import { includes, trim } from '@posthog/core'
7
+ import { logger } from './leanbase-logger'
8
+
9
+ export function splitClassString(s: string): string[] {
10
+ return s ? trim(s).split(/\s+/) : []
11
+ }
12
+
13
+ function checkForURLMatches(urlsList: (string | RegExp)[]): boolean {
14
+ const url = window?.location.href
15
+ return !!(url && urlsList && urlsList.some((regex) => url.match(regex)))
16
+ }
17
+
18
+ /*
19
+ * Get the className of an element, accounting for edge cases where element.className is an object
20
+ *
21
+ * Because this is a string it can contain unexpected characters
22
+ * So, this method safely splits the className and returns that array.
23
+ */
24
+ export function getClassNames(el: Element): string[] {
25
+ let className = ''
26
+ switch (typeof el.className) {
27
+ case 'string':
28
+ className = el.className
29
+ break
30
+ // TODO: when is this ever used?
31
+ case 'object': // handle cases where className might be SVGAnimatedString or some other type
32
+ className =
33
+ (el.className && 'baseVal' in el.className ? (el.className as any).baseVal : null) ||
34
+ el.getAttribute('class') ||
35
+ ''
36
+ break
37
+ default:
38
+ className = ''
39
+ }
40
+
41
+ return splitClassString(className)
42
+ }
43
+
44
+ export function makeSafeText(s: string | null | undefined): string | null {
45
+ if (isNullish(s)) {
46
+ return null
47
+ }
48
+
49
+ return (
50
+ trim(s)
51
+ // scrub potentially sensitive values
52
+ .split(/(\s+)/)
53
+ .filter((s) => shouldCaptureValue(s))
54
+ .join('')
55
+ // normalize whitespace
56
+ .replace(/[\r\n]/g, ' ')
57
+ .replace(/[ ]+/g, ' ')
58
+ // truncate
59
+ .substring(0, 255)
60
+ )
61
+ }
62
+
63
+ /*
64
+ * Get the direct text content of an element, protecting against sensitive data collection.
65
+ * Concats textContent of each of the element's text node children; this avoids potential
66
+ * collection of sensitive data that could happen if we used element.textContent and the
67
+ * element had sensitive child elements, since element.textContent includes child content.
68
+ * Scrubs values that look like they could be sensitive (i.e. cc or ssn number).
69
+ * @param {Element} el - element to get the text of
70
+ * @returns {string} the element's direct text content
71
+ */
72
+ export function getSafeText(el: Element): string {
73
+ let elText = ''
74
+
75
+ if (shouldCaptureElement(el) && !isSensitiveElement(el) && el.childNodes && el.childNodes.length) {
76
+ each(el.childNodes, function (child) {
77
+ if (isTextNode(child) && child.textContent) {
78
+ elText += makeSafeText(child.textContent) ?? ''
79
+ }
80
+ })
81
+ }
82
+
83
+ return trim(elText)
84
+ }
85
+
86
+ export function getEventTarget(e: Event): Element | null {
87
+ // https://developer.mozilla.org/en-US/docs/Web/API/Event/target#Compatibility_notes
88
+ if (isUndefined(e.target)) {
89
+ return (e.srcElement as Element) || null
90
+ } else {
91
+ if ((e.target as HTMLElement)?.shadowRoot) {
92
+ return (e.composedPath()[0] as Element) || null
93
+ }
94
+ return (e.target as Element) || null
95
+ }
96
+ }
97
+
98
+ export const autocaptureCompatibleElements = ['a', 'button', 'form', 'input', 'select', 'textarea', 'label']
99
+
100
+ /*
101
+ if there is no config, then all elements are allowed
102
+ if there is a config, and there is an allow list, then only elements in the allow list are allowed
103
+ assumes that some other code is checking this element's parents
104
+ */
105
+ function checkIfElementTreePassesElementAllowList(
106
+ elements: Element[],
107
+ autocaptureConfig: AutocaptureConfig | undefined
108
+ ): boolean {
109
+ const allowlist = autocaptureConfig?.element_allowlist
110
+ if (isUndefined(allowlist)) {
111
+ // everything is allowed, when there is no allow list
112
+ return true
113
+ }
114
+
115
+ // check each element in the tree
116
+ // if any of the elements are in the allow list, then the tree is allowed
117
+ for (const el of elements) {
118
+ if (allowlist.some((elementType) => el.tagName.toLowerCase() === elementType)) {
119
+ return true
120
+ }
121
+ }
122
+
123
+ // otherwise there is an allow list and this element tree didn't match it
124
+ return false
125
+ }
126
+
127
+ /*
128
+ if there is no selector list (i.e. it is undefined), then any elements matches
129
+ if there is an empty list, then no elements match
130
+ if there is a selector list, then check it against each element provided
131
+ */
132
+ function checkIfElementsMatchCSSSelector(elements: Element[], selectorList: string[] | undefined): boolean {
133
+ if (isUndefined(selectorList)) {
134
+ // everything is allowed, when there is no selector list
135
+ return true
136
+ }
137
+
138
+ for (const el of elements) {
139
+ if (selectorList.some((selector) => el.matches(selector))) {
140
+ return true
141
+ }
142
+ }
143
+
144
+ return false
145
+ }
146
+
147
+ export function getParentElement(curEl: Element): Element | false {
148
+ const parentNode = curEl.parentNode
149
+ if (!parentNode || !isElementNode(parentNode)) return false
150
+ return parentNode
151
+ }
152
+
153
+ // autocapture check will already filter for ph-no-capture,
154
+ // but we include it here to protect against future changes accidentally removing that check
155
+ const DEFAULT_RAGE_CLICK_IGNORE_LIST = ['.ph-no-rageclick', '.ph-no-capture']
156
+ export function shouldCaptureRageclick(el: Element | null, _config: LeanbaseConfig['rageclick']) {
157
+ if (!window || cannotCheckForAutocapture(el)) {
158
+ return false
159
+ }
160
+
161
+ let selectorIgnoreList: string[] | boolean
162
+ if (isBoolean(_config)) {
163
+ selectorIgnoreList = _config ? DEFAULT_RAGE_CLICK_IGNORE_LIST : false
164
+ } else {
165
+ selectorIgnoreList = _config?.css_selector_ignorelist ?? DEFAULT_RAGE_CLICK_IGNORE_LIST
166
+ }
167
+
168
+ if (selectorIgnoreList === false) {
169
+ return false
170
+ }
171
+
172
+ const { targetElementList } = getElementAndParentsForElement(el, false)
173
+ // we don't capture if we match the ignore list
174
+ return !checkIfElementsMatchCSSSelector(targetElementList, selectorIgnoreList)
175
+ }
176
+
177
+ const cannotCheckForAutocapture = (el: Element | null) => {
178
+ return !el || isTag(el, 'html') || !isElementNode(el)
179
+ }
180
+
181
+ const getElementAndParentsForElement = (el: Element, captureOnAnyElement: false | true | undefined) => {
182
+ if (!window || cannotCheckForAutocapture(el)) {
183
+ return { parentIsUsefulElement: false, targetElementList: [] }
184
+ }
185
+
186
+ let parentIsUsefulElement = false
187
+ const targetElementList: Element[] = [el]
188
+ let curEl: Element = el
189
+ while (curEl.parentNode && !isTag(curEl, 'body')) {
190
+ // If element is a shadow root, we skip it
191
+ if (isDocumentFragment(curEl.parentNode)) {
192
+ targetElementList.push((curEl.parentNode as any).host)
193
+ curEl = (curEl.parentNode as any).host
194
+ continue
195
+ }
196
+ const parentNode = getParentElement(curEl)
197
+ if (!parentNode) break
198
+ if (captureOnAnyElement || autocaptureCompatibleElements.indexOf(parentNode.tagName.toLowerCase()) > -1) {
199
+ parentIsUsefulElement = true
200
+ } else {
201
+ const compStyles = window.getComputedStyle(parentNode)
202
+ if (compStyles && compStyles.getPropertyValue('cursor') === 'pointer') {
203
+ parentIsUsefulElement = true
204
+ }
205
+ }
206
+
207
+ targetElementList.push(parentNode)
208
+ curEl = parentNode
209
+ }
210
+ return { parentIsUsefulElement, targetElementList }
211
+ }
212
+
213
+ /*
214
+ * Check whether a DOM event should be "captured" or if it may contain sensitive data
215
+ * using a variety of heuristics.
216
+ * @param {Element} el - element to check
217
+ * @param {Event} event - event to check
218
+ * @param {Object} autocaptureConfig - autocapture config
219
+ * @param {boolean} captureOnAnyElement - whether to capture on any element, clipboard autocapture doesn't restrict to "clickable" elements
220
+ * @param {string[]} allowedEventTypes - event types to capture, normally just 'click', but some autocapture types react to different events, some elements have fixed events (e.g., form has "submit")
221
+ * @returns {boolean} whether the event should be captured
222
+ */
223
+ export function shouldCaptureDomEvent(
224
+ el: Element,
225
+ event: Event,
226
+ autocaptureConfig: AutocaptureConfig | undefined = undefined,
227
+ captureOnAnyElement?: boolean,
228
+ allowedEventTypes?: string[]
229
+ ): boolean {
230
+ if (!window || cannotCheckForAutocapture(el)) {
231
+ return false
232
+ }
233
+
234
+ if (autocaptureConfig?.url_allowlist) {
235
+ // if the current URL is not in the allow list, don't capture
236
+ if (!checkForURLMatches(autocaptureConfig.url_allowlist)) {
237
+ return false
238
+ }
239
+ }
240
+
241
+ if (autocaptureConfig?.url_ignorelist) {
242
+ // if the current URL is in the ignore list, don't capture
243
+ if (checkForURLMatches(autocaptureConfig.url_ignorelist)) {
244
+ return false
245
+ }
246
+ }
247
+
248
+ if (autocaptureConfig?.dom_event_allowlist) {
249
+ const allowlist = autocaptureConfig.dom_event_allowlist
250
+ if (allowlist && !allowlist.some((eventType) => event.type === eventType)) {
251
+ return false
252
+ }
253
+ }
254
+
255
+ const { parentIsUsefulElement, targetElementList } = getElementAndParentsForElement(el, captureOnAnyElement)
256
+
257
+ if (!checkIfElementTreePassesElementAllowList(targetElementList, autocaptureConfig)) {
258
+ return false
259
+ }
260
+
261
+ if (!checkIfElementsMatchCSSSelector(targetElementList, autocaptureConfig?.css_selector_allowlist)) {
262
+ return false
263
+ }
264
+
265
+ const compStyles = window.getComputedStyle(el)
266
+ if (compStyles && compStyles.getPropertyValue('cursor') === 'pointer' && event.type === 'click') {
267
+ return true
268
+ }
269
+
270
+ const tag = el.tagName.toLowerCase()
271
+ switch (tag) {
272
+ case 'html':
273
+ return false
274
+ case 'form':
275
+ return (allowedEventTypes || ['submit']).indexOf(event.type) >= 0
276
+ case 'input':
277
+ case 'select':
278
+ case 'textarea':
279
+ return (allowedEventTypes || ['change', 'click']).indexOf(event.type) >= 0
280
+ default:
281
+ if (parentIsUsefulElement) return (allowedEventTypes || ['click']).indexOf(event.type) >= 0
282
+ return (
283
+ (allowedEventTypes || ['click']).indexOf(event.type) >= 0 &&
284
+ (autocaptureCompatibleElements.indexOf(tag) > -1 || el.getAttribute('contenteditable') === 'true')
285
+ )
286
+ }
287
+ }
288
+
289
+ /*
290
+ * Check whether a DOM element should be "captured" or if it may contain sensitive data
291
+ * using a variety of heuristics.
292
+ * @param {Element} el - element to check
293
+ * @returns {boolean} whether the element should be captured
294
+ */
295
+ export function shouldCaptureElement(el: Element): boolean {
296
+ for (let curEl = el; curEl.parentNode && !isTag(curEl, 'body'); curEl = curEl.parentNode as Element) {
297
+ const classes = getClassNames(curEl)
298
+ if (includes(classes, 'ph-sensitive') || includes(classes, 'ph-no-capture')) {
299
+ return false
300
+ }
301
+ }
302
+
303
+ if (includes(getClassNames(el), 'ph-include')) {
304
+ return true
305
+ }
306
+
307
+ // don't include hidden or password fields
308
+ const type = (el as HTMLInputElement).type || ''
309
+ if (isString(type)) {
310
+ // it's possible for el.type to be a DOM element if el is a form with a child input[name="type"]
311
+ switch (type.toLowerCase()) {
312
+ case 'hidden':
313
+ return false
314
+ case 'password':
315
+ return false
316
+ }
317
+ }
318
+
319
+ // filter out data from fields that look like sensitive fields
320
+ const name = (el as HTMLInputElement).name || el.id || ''
321
+ // See https://github.com/posthog/posthog-js/issues/165
322
+ // Under specific circumstances a bug caused .replace to be called on a DOM element
323
+ // instead of a string, removing the element from the page. Ensure this issue is mitigated.
324
+ if (isString(name)) {
325
+ // it's possible for el.name or el.id to be a DOM element if el is a form with a child input[name="name"]
326
+ const sensitiveNameRegex =
327
+ /^cc|cardnum|ccnum|creditcard|csc|cvc|cvv|exp|pass|pwd|routing|seccode|securitycode|securitynum|socialsec|socsec|ssn/i
328
+ if (sensitiveNameRegex.test(name.replace(/[^a-zA-Z0-9]/g, ''))) {
329
+ return false
330
+ }
331
+ }
332
+
333
+ return true
334
+ }
335
+
336
+ /*
337
+ * Check whether a DOM element is 'sensitive' and we should only capture limited data
338
+ * @param {Element} el - element to check
339
+ * @returns {boolean} whether the element should be captured
340
+ */
341
+ export function isSensitiveElement(el: Element): boolean {
342
+ // don't send data from inputs or similar elements since there will always be
343
+ // a risk of clientside javascript placing sensitive data in attributes
344
+ const allowedInputTypes = ['button', 'checkbox', 'submit', 'reset']
345
+ if (
346
+ (isTag(el, 'input') && !allowedInputTypes.includes((el as HTMLInputElement).type)) ||
347
+ isTag(el, 'select') ||
348
+ isTag(el, 'textarea') ||
349
+ el.getAttribute('contenteditable') === 'true'
350
+ ) {
351
+ return true
352
+ }
353
+ return false
354
+ }
355
+
356
+ // Define the core pattern for matching credit card numbers
357
+ const coreCCPattern = `(4[0-9]{12}(?:[0-9]{3})?)|(5[1-5][0-9]{14})|(6(?:011|5[0-9]{2})[0-9]{12})|(3[47][0-9]{13})|(3(?:0[0-5]|[68][0-9])[0-9]{11})|((?:2131|1800|35[0-9]{3})[0-9]{11})`
358
+ // Create the Anchored version of the regex by adding '^' at the start and '$' at the end
359
+ const anchoredCCRegex = new RegExp(`^(?:${coreCCPattern})$`)
360
+ // The Unanchored version is essentially the core pattern, usable as is for partial matches
361
+ const unanchoredCCRegex = new RegExp(coreCCPattern)
362
+
363
+ // Define the core pattern for matching SSNs with optional dashes
364
+ const coreSSNPattern = `\\d{3}-?\\d{2}-?\\d{4}`
365
+ // Create the Anchored version of the regex by adding '^' at the start and '$' at the end
366
+ const anchoredSSNRegex = new RegExp(`^(${coreSSNPattern})$`)
367
+ // The Unanchored version is essentially the core pattern itself, usable for partial matches
368
+ const unanchoredSSNRegex = new RegExp(`(${coreSSNPattern})`)
369
+
370
+ /*
371
+ * Check whether a string value should be "captured" or if it may contain sensitive data
372
+ * using a variety of heuristics.
373
+ * @param {string} value - string value to check
374
+ * @param {boolean} anchorRegexes - whether to anchor the regexes to the start and end of the string
375
+ * @returns {boolean} whether the element should be captured
376
+ */
377
+ export function shouldCaptureValue(value: string, anchorRegexes = true): boolean {
378
+ if (isNullish(value)) {
379
+ return false
380
+ }
381
+
382
+ if (isString(value)) {
383
+ value = trim(value)
384
+
385
+ // check to see if input value looks like a credit card number
386
+ // see: https://www.safaribooksonline.com/library/view/regular-expressions-cookbook/9781449327453/ch04s20.html
387
+ const ccRegex = anchorRegexes ? anchoredCCRegex : unanchoredCCRegex
388
+ if (ccRegex.test((value || '').replace(/[- ]/g, ''))) {
389
+ return false
390
+ }
391
+
392
+ // check to see if input value looks like a social security number
393
+ const ssnRegex = anchorRegexes ? anchoredSSNRegex : unanchoredSSNRegex
394
+ if (ssnRegex.test(value)) {
395
+ return false
396
+ }
397
+ }
398
+
399
+ return true
400
+ }
401
+
402
+ /*
403
+ * Check whether an attribute name is an Angular style attr (either _ngcontent or _nghost)
404
+ * These update on each build and lead to noise in the element chain
405
+ * More details on the attributes here: https://angular.io/guide/view-encapsulation
406
+ * @param {string} attributeName - string value to check
407
+ * @returns {boolean} whether the element is an angular tag
408
+ */
409
+ export function isAngularStyleAttr(attributeName: string): boolean {
410
+ if (isString(attributeName)) {
411
+ return attributeName.substring(0, 10) === '_ngcontent' || attributeName.substring(0, 7) === '_nghost'
412
+ }
413
+ return false
414
+ }
415
+
416
+ /*
417
+ * Iterate through children of a target element looking for span tags
418
+ * and return the text content of the span tags, separated by spaces,
419
+ * along with the direct text content of the target element
420
+ * @param {Element} target - element to check
421
+ * @returns {string} text content of the target element and its child span tags
422
+ */
423
+ export function getDirectAndNestedSpanText(target: Element): string {
424
+ let text = getSafeText(target)
425
+ text = `${text} ${getNestedSpanText(target)}`.trim()
426
+ return shouldCaptureValue(text) ? text : ''
427
+ }
428
+
429
+ /*
430
+ * Iterate through children of a target element looking for span tags
431
+ * and return the text content of the span tags, separated by spaces
432
+ * @param {Element} target - element to check
433
+ * @returns {string} text content of span tags
434
+ */
435
+ export function getNestedSpanText(target: Element): string {
436
+ let text = ''
437
+ if (target && target.childNodes && target.childNodes.length) {
438
+ each(target.childNodes, function (child) {
439
+ if (child && child.tagName?.toLowerCase() === 'span') {
440
+ try {
441
+ const spanText = getSafeText(child)
442
+ text = `${text} ${spanText}`.trim()
443
+
444
+ if (child.childNodes && child.childNodes.length) {
445
+ text = `${text} ${getNestedSpanText(child)}`.trim()
446
+ }
447
+ } catch (e) {
448
+ logger.error('[AutoCapture]', e)
449
+ }
450
+ }
451
+ })
452
+ }
453
+ return text
454
+ }
455
+
456
+ /*
457
+ Back in the day storing events in Postgres we use Elements for autocapture events.
458
+ Now we're using elements_chain. We used to do this parsing/processing during ingestion.
459
+ This code is just copied over from ingestion, but we should optimize it
460
+ to create elements_chain string directly.
461
+ */
462
+ export function getElementsChainString(elements: Properties[]): string {
463
+ return elementsToString(extractElements(elements))
464
+ }
465
+
466
+ // This interface is called 'Element' in plugin-scaffold https://github.com/PostHog/plugin-scaffold/blob/b07d3b879796ecc7e22deb71bf627694ba05386b/src/types.ts#L200
467
+ // However 'Element' is a DOM Element when run in the browser, so we have to rename it
468
+ interface PHElement {
469
+ text?: string
470
+ tag_name?: string
471
+ href?: string
472
+ attr_id?: string
473
+ attr_class?: string[]
474
+ nth_child?: number
475
+ nth_of_type?: number
476
+ attributes?: Record<string, any>
477
+ event_id?: number
478
+ order?: number
479
+ group_id?: number
480
+ }
481
+
482
+ function escapeQuotes(input: string): string {
483
+ return input.replace(/"|\\"/g, '\\"')
484
+ }
485
+
486
+ function elementsToString(elements: PHElement[]): string {
487
+ const ret = elements.map((element) => {
488
+ let el_string = ''
489
+ if (element.tag_name) {
490
+ el_string += element.tag_name
491
+ }
492
+ if (element.attr_class) {
493
+ element.attr_class.sort()
494
+ for (const single_class of element.attr_class) {
495
+ el_string += `.${single_class.replace(/"/g, '')}`
496
+ }
497
+ }
498
+ const attributes: Record<string, any> = {
499
+ ...(element.text ? { text: element.text } : {}),
500
+ 'nth-child': element.nth_child ?? 0,
501
+ 'nth-of-type': element.nth_of_type ?? 0,
502
+ ...(element.href ? { href: element.href } : {}),
503
+ ...(element.attr_id ? { attr_id: element.attr_id } : {}),
504
+ ...element.attributes,
505
+ }
506
+ const sortedAttributes: Record<string, any> = {}
507
+ entries(attributes)
508
+ .sort(([a], [b]) => a.localeCompare(b))
509
+ .forEach(
510
+ ([key, value]) => (sortedAttributes[escapeQuotes(key.toString())] = escapeQuotes(value.toString()))
511
+ )
512
+ el_string += ':'
513
+ el_string += entries(sortedAttributes)
514
+ .map(([key, value]) => `${key}="${value}"`)
515
+ .join('')
516
+ return el_string
517
+ })
518
+ return ret.join(';')
519
+ }
520
+
521
+ function extractElements(elements: Properties[]): PHElement[] {
522
+ return elements.map((el) => {
523
+ const response = {
524
+ text: el['$el_text']?.slice(0, 400),
525
+ tag_name: el['tag_name'],
526
+ href: el['attr__href']?.slice(0, 2048),
527
+ attr_class: extractAttrClass(el),
528
+ attr_id: el['attr__id'],
529
+ nth_child: el['nth_child'],
530
+ nth_of_type: el['nth_of_type'],
531
+ attributes: {} as { [id: string]: any },
532
+ }
533
+
534
+ entries(el)
535
+ .filter(([key]) => key.indexOf('attr__') === 0)
536
+ .forEach(([key, value]) => (response.attributes[key] = value))
537
+ return response
538
+ })
539
+ }
540
+
541
+ function extractAttrClass(el: Properties): PHElement['attr_class'] {
542
+ const attr_class = el['attr__class']
543
+ if (!attr_class) {
544
+ return undefined
545
+ } else if (isArray(attr_class)) {
546
+ return attr_class
547
+ } else {
548
+ return splitClassString(attr_class)
549
+ }
550
+ }