@letsrunit/playwright 0.21.0 → 0.21.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@letsrunit/playwright",
3
- "version": "0.21.0",
3
+ "version": "0.21.1",
4
4
  "description": "Playwright extensions and utilities for letsrunit",
5
5
  "keywords": [
6
6
  "testing",
@@ -42,18 +42,12 @@
42
42
  },
43
43
  "packageManager": "yarn@4.10.3",
44
44
  "dependencies": {
45
- "@letsrunit/utils": "0.21.0",
45
+ "@letsrunit/utils": "0.21.1",
46
46
  "@playwright/test": "1.58.2",
47
47
  "case": "^1.6.3",
48
48
  "diff": "^8.0.3",
49
49
  "fast-json-stable-stringify": "^2.1.0",
50
50
  "jsdom": "^27.4.0",
51
- "metascraper-description": "^5.49.15",
52
- "metascraper-image": "^5.49.15",
53
- "metascraper-logo": "^5.49.15",
54
- "metascraper-logo-favicon": "^5.49.15",
55
- "metascraper-title": "^5.49.15",
56
- "metascraper-url": "^5.49.15",
57
51
  "rehype-format": "^5.0.1",
58
52
  "rehype-parse": "^9.0.1",
59
53
  "rehype-stringify": "^10.0.1",
package/src/page-info.ts CHANGED
@@ -1,33 +1,22 @@
1
- import metascraper, { type MetascraperOptions } from 'metascraper';
2
- import metascraperDescription from 'metascraper-description';
3
- import metascraperImage from 'metascraper-image';
4
- import metascraperLang from 'metascraper-lang';
5
- import metascraperLogo from 'metascraper-logo';
6
- import metascraperLogoFavicon from 'metascraper-logo-favicon';
7
- import metascraperTitle from 'metascraper-title';
8
- import metascraperUrl from 'metascraper-url';
1
+ import type { Page } from '@playwright/test';
2
+ import { extractPageMetadata } from './page-metadata';
3
+ import { screenshot as takeScreenshot } from './screenshot';
9
4
  import type { PageInfo, Snapshot } from './types';
5
+ import { isPage } from './utils/type-check';
10
6
 
11
- const scrape = metascraper([
12
- metascraperTitle(),
13
- metascraperDescription(),
14
- metascraperImage(),
15
- metascraperLogo(),
16
- metascraperLogoFavicon(),
17
- metascraperLang(),
18
- metascraperUrl(),
19
- ]);
7
+ type PageLike = Page | Partial<Snapshot> & { url: string; html: string };
20
8
 
21
- export async function extractPageInfo(options: MetascraperOptions & Partial<Snapshot>): Promise<PageInfo> {
22
- const meta = await scrape(options);
9
+ export async function extractPageInfo(page: PageLike): Promise<PageInfo> {
10
+ const snapshot = isPage(page)
11
+ ? {
12
+ url: page.url(),
13
+ html: await page.content(),
14
+ screenshot: await takeScreenshot(page),
15
+ }
16
+ : page;
23
17
 
24
18
  return {
25
- url: meta.url || options.url,
26
- name: meta.title || undefined,
27
- description: meta.description || undefined,
28
- image: meta.image || undefined,
29
- favicon: meta.logo || undefined,
30
- lang: meta.lang || undefined,
31
- screenshot: options.screenshot,
19
+ ...extractPageMetadata(snapshot),
20
+ screenshot: snapshot.screenshot,
32
21
  };
33
22
  }
@@ -0,0 +1,69 @@
1
+ import { JSDOM } from 'jsdom';
2
+ import type { PageInfo, Snapshot } from './types';
3
+
4
+ export function extractPageMetadata(snapshot: Pick<Snapshot, 'url' | 'html'>): Omit<PageInfo, 'screenshot'> {
5
+ const dom = new JSDOM(snapshot.html, { url: snapshot.url });
6
+ const doc = dom.window.document;
7
+
8
+ return {
9
+ url: resolveCanonicalUrl(doc, snapshot.url),
10
+ name: firstNonEmpty(
11
+ metaContent(doc, 'property', 'og:title'),
12
+ metaContent(doc, 'name', 'twitter:title'),
13
+ doc.title,
14
+ ),
15
+ description: firstNonEmpty(
16
+ metaContent(doc, 'name', 'description'),
17
+ metaContent(doc, 'property', 'og:description'),
18
+ metaContent(doc, 'name', 'twitter:description'),
19
+ ),
20
+ image: resolveUrl(
21
+ snapshot.url,
22
+ firstNonEmpty(
23
+ metaContent(doc, 'property', 'og:image'),
24
+ metaContent(doc, 'name', 'twitter:image'),
25
+ ),
26
+ ),
27
+ logo: resolveUrl(snapshot.url, firstLinkHref(doc, ['link[rel~="apple-touch-icon"]', 'link[rel~="icon"]'])),
28
+ author: firstNonEmpty(metaContent(doc, 'name', 'author')),
29
+ publisher: firstNonEmpty(
30
+ metaContent(doc, 'property', 'article:publisher'),
31
+ metaContent(doc, 'name', 'publisher'),
32
+ metaContent(doc, 'property', 'og:site_name'),
33
+ ),
34
+ lang: firstNonEmpty(doc.documentElement.lang),
35
+ favicon: resolveUrl(snapshot.url, firstLinkHref(doc, ['link[rel~="icon"]', 'link[rel="shortcut icon"]'])),
36
+ };
37
+ }
38
+
39
+ function metaContent(doc: Document, attr: 'name' | 'property', value: string): string | undefined {
40
+ return firstNonEmpty(doc.querySelector(`meta[${attr}="${value}"]`)?.getAttribute('content') ?? undefined);
41
+ }
42
+
43
+ function firstLinkHref(doc: Document, selectors: string[]): string | undefined {
44
+ for (const selector of selectors) {
45
+ const href = firstNonEmpty(doc.querySelector(selector)?.getAttribute('href') ?? undefined);
46
+ if (href) return href;
47
+ }
48
+
49
+ return undefined;
50
+ }
51
+
52
+ function resolveCanonicalUrl(doc: Document, fallbackUrl: string): string {
53
+ return resolveUrl(fallbackUrl, firstLinkHref(doc, ['link[rel="canonical"]'])) ?? fallbackUrl;
54
+ }
55
+
56
+ function resolveUrl(baseUrl: string, candidate?: string): string | undefined {
57
+ const value = firstNonEmpty(candidate);
58
+ if (!value) return undefined;
59
+
60
+ try {
61
+ return new URL(value, baseUrl).toString();
62
+ } catch {
63
+ return value;
64
+ }
65
+ }
66
+
67
+ function firstNonEmpty(...values: Array<string | undefined>): string | undefined {
68
+ return values.find((value) => value !== undefined && value.trim() !== '')?.trim();
69
+ }
Binary file