@d-zero/archaeologist 1.1.3 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +9 -3
  2. package/dist/analyze-child-process.d.ts +12 -0
  3. package/dist/analyze-child-process.js +118 -0
  4. package/dist/analyze-main-process.d.ts +8 -0
  5. package/dist/analyze-main-process.js +59 -0
  6. package/dist/cli.js +43 -12
  7. package/dist/freeze-child-process.d.ts +3 -0
  8. package/dist/freeze-child-process.js +22 -0
  9. package/dist/freeze-main-process.d.ts +7 -0
  10. package/dist/freeze-main-process.js +34 -0
  11. package/dist/index.d.ts +1 -1
  12. package/dist/index.js +1 -1
  13. package/dist/modules/analize-url.d.ts +11 -0
  14. package/dist/{analize-url.js → modules/analize-url.js} +10 -0
  15. package/dist/modules/diff-text.d.ts +21 -0
  16. package/dist/modules/diff-text.js +45 -0
  17. package/dist/{get-data.d.ts → modules/get-data.d.ts} +5 -2
  18. package/dist/{get-data.js → modules/get-data.js} +10 -9
  19. package/dist/modules/normalize-text-document.d.ts +5 -0
  20. package/dist/modules/normalize-text-document.js +15 -0
  21. package/dist/parse-types.d.ts +5 -0
  22. package/dist/parse-types.js +8 -0
  23. package/dist/read-config.js +2 -8
  24. package/dist/types.d.ts +23 -2
  25. package/package.json +16 -11
  26. package/dist/analize-url.d.ts +0 -11
  27. package/dist/analyze.d.ts +0 -14
  28. package/dist/analyze.js +0 -111
  29. package/dist/archaeologist.d.ts +0 -10
  30. package/dist/archaeologist.js +0 -23
  31. package/dist/freeze.d.ts +0 -13
  32. package/dist/freeze.js +0 -34
  33. package/dist/output-utils.d.ts +0 -6
  34. package/dist/output-utils.js +0 -11
  35. /package/dist/{diff-images.d.ts → modules/diff-images.d.ts} +0 -0
  36. /package/dist/{diff-images.js → modules/diff-images.js} +0 -0
  37. /package/dist/{diff-tree.d.ts → modules/diff-tree.d.ts} +0 -0
  38. /package/dist/{diff-tree.js → modules/diff-tree.js} +0 -0
package/README.md CHANGED
@@ -12,16 +12,22 @@
12
12
  ## CLI
13
13
 
14
14
  ```sh
15
- npx @d-zero/archaeologist -f <filepath> [--limit <number>] [--debug]
15
+ npx @d-zero/archaeologist -f <listfile> [options]
16
16
  ```
17
17
 
18
18
  URLリストを持つファイルを指定して実行します。
19
19
 
20
20
  ### オプション
21
21
 
22
- - `-f, --file <filepath>`: URLリストを持つファイルのパス(必須)
22
+ - `-f, --listfile <filepath>`: URLリストを持つファイルのパス(必須)
23
+ - `-t, --type <types>`: 比較タイプの指定(`image,dom,text`、カンマ区切り)
24
+ - `-s, --selector <selector>`: 比較対象を限定するCSSセレクター
25
+ - `-i, --ignore <selector>`: 無視するCSSセレクター
26
+ - `--devices <devices>`: デバイス指定(`desktop,mobile,tablet`、カンマ区切り)
27
+ - `--freeze <filepath>`: フリーズモード用ファイルパス
23
28
  - `--limit <number>`: 並列実行数の上限(デフォルト: 10)
24
29
  - `--debug`: デバッグモード(デフォルト: false)
30
+ - `--verbose`: 詳細ログモード(デフォルト: false)
25
31
 
26
32
  ### ファイルフォーマット
27
33
 
@@ -53,7 +59,7 @@ https://example.com/xyz/001
53
59
 
54
60
  ## ページフック
55
61
 
56
- [Frontmatter](https://jekyllrb.com/docs/front-matter/)の`hooks`に配列としてスクリプトファイルのパスを渡すと、ページを開いた後(厳密にはPuppetterの`waitUntil: 'networkidle0'`のタイミング直後)にそれらのスクリプトを実行します。スクリプトは配列の順番通りに逐次実行されます。
62
+ [Frontmatter](https://jekyllrb.com/docs/front-matter/)の`hooks`に配列としてスクリプトファイルのパスを渡すと、ページを開いた後(厳密にはPuppeteerの`waitUntil: 'networkidle0'`のタイミング直後)にそれらのスクリプトを実行します。スクリプトは配列の順番通りに逐次実行されます。
57
63
 
58
64
  ```txt
59
65
  ---
@@ -0,0 +1,12 @@
1
+ import type { URLPair } from './types.js';
2
+ import type { PageHook } from '@d-zero/puppeteer-page-scan';
3
+ export type ChildProcessParams = {
4
+ list: readonly URLPair[];
5
+ dir: string;
6
+ useOldMode: boolean;
7
+ types?: readonly string[];
8
+ selector?: string;
9
+ ignore?: string;
10
+ devices?: readonly string[];
11
+ hooks?: readonly PageHook[];
12
+ };
@@ -0,0 +1,118 @@
1
+ import { writeFile } from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { createChildProcess } from '@d-zero/puppeteer-dealer';
4
+ import { delay } from '@d-zero/shared/delay';
5
+ import c from 'ansi-colors';
6
+ import { diffImages } from './modules/diff-images.js';
7
+ import { diffText } from './modules/diff-text.js';
8
+ import { diffTree } from './modules/diff-tree.js';
9
+ import { getData } from './modules/get-data.js';
10
+ import { normalizeTextDocument } from './modules/normalize-text-document.js';
11
+ import { score } from './utils.js';
12
+ createChildProcess((param) => {
13
+ const { list, dir, types = ['image', 'dom', 'text'], selector, ignore, devices, } = param;
14
+ return {
15
+ async eachPage({ page, url: urlA, index }, logger) {
16
+ const urlPair = list.find(([url]) => url === urlA);
17
+ if (!urlPair) {
18
+ throw new Error(`Failed to find urlPair: ${urlA}`);
19
+ }
20
+ const dataPair = [];
21
+ for (const url of urlPair) {
22
+ const data = await getData(page, url, {
23
+ htmlDiffOnly: !types.includes('image'),
24
+ selector,
25
+ ignore,
26
+ devices,
27
+ }, logger);
28
+ dataPair.push(data);
29
+ await delay(600);
30
+ }
31
+ const [a, b] = dataPair;
32
+ if (!a || !b) {
33
+ throw new Error('Failed to get screenshots');
34
+ }
35
+ const screenshotResult = {};
36
+ const outputUrl = 'vs ' + c.gray(urlPair[1]);
37
+ for (const [name, screenshotA] of Object.entries(a.screenshots)) {
38
+ const screenshotB = b.screenshots[name];
39
+ const sizeName = c.bgMagenta(` ${name} `);
40
+ const id = `${index}_${name}`;
41
+ if (!screenshotB) {
42
+ throw new Error(`Failed to get screenshotB: ${id}`);
43
+ }
44
+ let image = null;
45
+ if (types.includes('image')) {
46
+ const imageDiff = await diffImages(screenshotA, screenshotB, (phase, data) => {
47
+ switch (phase) {
48
+ case 'create': {
49
+ logger(`${sizeName} ${outputUrl} 🖼️ Create images`);
50
+ break;
51
+ }
52
+ case 'resize': {
53
+ const { width, height } = data;
54
+ logger(`${sizeName} ${outputUrl} ↔️ Resize images to ${width}x${height}`);
55
+ break;
56
+ }
57
+ case 'diff': {
58
+ logger(`${sizeName} ${outputUrl} 📊 Compare images`);
59
+ break;
60
+ }
61
+ }
62
+ });
63
+ if (imageDiff) {
64
+ logger(`${sizeName} ${outputUrl} 🧩 Matches ${score(imageDiff.matches, 0.9)}`);
65
+ await delay(1500);
66
+ await writeFile(path.resolve(dir, `${id}_a.png`), imageDiff.images.a);
67
+ await writeFile(path.resolve(dir, `${id}_b.png`), imageDiff.images.b);
68
+ const outFilePath = path.resolve(dir, `${id}_diff.png`);
69
+ logger(`${sizeName} ${outputUrl} 📊 Save diff image to ${path.relative(dir, outFilePath)}`);
70
+ await writeFile(outFilePath, imageDiff.images.diff);
71
+ image = {
72
+ matches: imageDiff.matches,
73
+ file: outFilePath,
74
+ };
75
+ }
76
+ }
77
+ let dom = null;
78
+ if (types.includes('dom')) {
79
+ const htmlDiff = diffTree(a.url, b.url, screenshotA.domTree, screenshotB.domTree);
80
+ const outFilePath = path.resolve(dir, `${id}_html.diff`);
81
+ await writeFile(outFilePath, htmlDiff.result, { encoding: 'utf8' });
82
+ dom = {
83
+ matches: htmlDiff.matches,
84
+ diff: htmlDiff.changed ? htmlDiff.result : null,
85
+ file: outFilePath,
86
+ };
87
+ }
88
+ let text = null;
89
+ if (types.includes('text')) {
90
+ const contentA = normalizeTextDocument(screenshotA.text.textContent);
91
+ const contentB = normalizeTextDocument(screenshotB.text.textContent);
92
+ const altTextListA = screenshotA.text.altTextList.join('\n');
93
+ const altTextListB = screenshotB.text.altTextList.join('\n');
94
+ const textA = `${contentA}\n\n${altTextListA}`;
95
+ const textB = `${contentB}\n\n${altTextListB}`;
96
+ const textDiff = diffText(a.url, b.url, textA, textB);
97
+ const outFilePath = path.resolve(dir, `${id}_text.diff`);
98
+ await writeFile(outFilePath, `${textDiff.phrases.result}\n\n${textDiff.tokens.result}`, { encoding: 'utf8' });
99
+ text = {
100
+ matches: textDiff.tokens.matches,
101
+ diff: textDiff.tokens.changed ? textDiff.tokens.result : null,
102
+ file: outFilePath,
103
+ };
104
+ }
105
+ screenshotResult[name] = {
106
+ image,
107
+ dom,
108
+ text,
109
+ };
110
+ }
111
+ const result = {
112
+ target: [a.url, b.url],
113
+ screenshots: screenshotResult,
114
+ };
115
+ return result;
116
+ },
117
+ };
118
+ });
@@ -0,0 +1,8 @@
1
+ import type { AnalyzeOptions, URLPair } from './types.js';
2
+ import type { DealOptions } from '@d-zero/dealer';
3
+ /**
4
+ *
5
+ * @param list
6
+ * @param options
7
+ */
8
+ export declare function analyze(list: readonly URLPair[], options?: AnalyzeOptions & DealOptions): Promise<void>;
@@ -0,0 +1,59 @@
1
+ import { writeFile, mkdir } from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { createProcess, deal } from '@d-zero/puppeteer-dealer';
4
+ import c from 'ansi-colors';
5
+ import stripAnsi from 'strip-ansi';
6
+ import { analyzeUrlList } from './modules/analize-url.js';
7
+ import { score } from './utils.js';
8
+ /**
9
+ *
10
+ * @param list
11
+ * @param options
12
+ */
13
+ export async function analyze(list, options) {
14
+ const results = [];
15
+ const dir = path.resolve(process.cwd(), '.archaeologist');
16
+ await mkdir(dir, { recursive: true }).catch(() => { });
17
+ const urlInfo = analyzeUrlList(list);
18
+ const useOldMode = urlInfo.hasAuth || urlInfo.hasNoSSL;
19
+ await deal(list.map(([urlA]) => ({ id: null, url: urlA })), (_, done, total) => {
20
+ return `${c.bold.magenta('🕵️ Archaeologist')} ${done}/${total}`;
21
+ }, () => {
22
+ return createProcess(path.resolve(import.meta.dirname, 'analyze-child-process.js'), {
23
+ list,
24
+ dir,
25
+ useOldMode,
26
+ types: options?.types,
27
+ selector: options?.selector,
28
+ ignore: options?.ignore,
29
+ devices: options?.devices,
30
+ hooks: options?.hooks ?? [],
31
+ }, {
32
+ ...options,
33
+ headless: useOldMode ? 'shell' : true,
34
+ });
35
+ }, {
36
+ ...options,
37
+ each(result) {
38
+ results.push(result);
39
+ },
40
+ });
41
+ const output = [];
42
+ for (const result of results) {
43
+ output.push(c.gray(`${result.target.join(' vs ')}`));
44
+ for (const [sizeName, { image, dom, text }] of Object.entries(result.screenshots)) {
45
+ if (image) {
46
+ const { matches, file } = image;
47
+ output.push(` ${c.bgMagenta(` ${sizeName} `)} ${score(matches, 0.9)} ${file}`);
48
+ }
49
+ if (dom) {
50
+ output.push(` ${c.bgBlueBright(' HTML ')}: ${score(dom.matches, 0.995)} ${dom.file}`);
51
+ }
52
+ if (text) {
53
+ output.push(` ${c.bgGreenBright(' TEXT ')}: ${score(text.matches, 0.995)} ${text.file}`);
54
+ }
55
+ }
56
+ }
57
+ await writeFile(path.resolve(dir, 'RESULT.txt'), stripAnsi(output.join('\n').replaceAll(dir, '.')), 'utf8');
58
+ process.stdout.write(output.join('\n') + '\n');
59
+ }
package/dist/cli.js CHANGED
@@ -1,21 +1,52 @@
1
1
  #!/usr/bin/env node
2
- import minimist from 'minimist';
3
- import { archaeologist } from './archaeologist.js';
2
+ import { createCLI, parseCommonOptions, parseList } from '@d-zero/cli-core';
3
+ import { analyze } from './analyze-main-process.js';
4
+ import { freeze } from './freeze-main-process.js';
4
5
  import { readConfig } from './read-config.js';
5
- const cli = minimist(process.argv.slice(2), {
6
- alias: {
6
+ const { options, hasConfigFile } = createCLI({
7
+ aliases: {
7
8
  f: 'listfile',
9
+ t: 'type',
10
+ s: 'selector',
11
+ i: 'ignore',
12
+ },
13
+ usage: ['Usage: archaeologist -f <listfile> [--limit <number>]'],
14
+ parseArgs: (cli) => ({
15
+ ...parseCommonOptions(cli),
16
+ listfile: cli.listfile,
17
+ type: cli.type,
18
+ freeze: cli.freeze,
19
+ selector: cli.selector,
20
+ ignore: cli.ignore,
21
+ devices: cli.devices ??
22
+ // Alias for devices
23
+ cli.device,
24
+ }),
25
+ validateArgs: (options) => {
26
+ return !!(options.listfile?.length || options.freeze?.length);
8
27
  },
9
28
  });
10
- if (cli.listfile?.length) {
11
- const { pairList, hooks } = await readConfig(cli.listfile);
12
- await archaeologist(pairList, {
29
+ if (hasConfigFile) {
30
+ const { pairList, hooks } = await readConfig(options.listfile);
31
+ await analyze(pairList, {
32
+ hooks,
33
+ types: options.type ? parseList(options.type) : undefined,
34
+ selector: options.selector,
35
+ ignore: options.ignore,
36
+ devices: options.devices ? parseList(options.devices) : undefined,
37
+ limit: options.limit,
38
+ debug: options.debug,
39
+ verbose: options.verbose,
40
+ });
41
+ process.exit(0);
42
+ }
43
+ if (options.freeze) {
44
+ const { pairList, hooks } = await readConfig(options.freeze);
45
+ const list = pairList.map(([urlA]) => urlA);
46
+ await freeze(list, {
13
47
  hooks,
14
- limit: cli.limit ? Number.parseInt(cli.limit) : undefined,
15
- debug: !!cli.debug,
16
- htmlDiffOnly: !!cli.htmlDiffOnly,
48
+ limit: options.limit,
49
+ debug: options.debug,
17
50
  });
18
51
  process.exit(0);
19
52
  }
20
- process.stderr.write('Usage: archaeologist -f <listfile> [--limit <number>]\n');
21
- process.exit(1);
@@ -0,0 +1,3 @@
1
+ export type ChildProcessParams = {
2
+ dir: string;
3
+ };
@@ -0,0 +1,22 @@
1
+ import { writeFile } from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { createChildProcess } from '@d-zero/puppeteer-dealer';
4
+ import { delay } from '@d-zero/shared/delay';
5
+ import { getData } from './modules/get-data.js';
6
+ createChildProcess((param) => {
7
+ const { dir } = param;
8
+ return {
9
+ async eachPage({ page, id, url }, logger) {
10
+ const data = await getData(page, url, {}, logger);
11
+ await delay(600);
12
+ for (const size of Object.values(data.screenshots)) {
13
+ const jsonFile = path.resolve(dir, `${id}_${size.id}.html`);
14
+ const ssFile = path.resolve(dir, `${id}_${size.id}.png`);
15
+ await writeFile(jsonFile, size.dom, 'utf8');
16
+ if (size.binary) {
17
+ await writeFile(ssFile, size.binary);
18
+ }
19
+ }
20
+ },
21
+ };
22
+ });
@@ -0,0 +1,7 @@
1
+ import type { FreezeOptions } from './types.js';
2
+ /**
3
+ *
4
+ * @param list
5
+ * @param options
6
+ */
7
+ export declare function freeze(list: readonly string[], options?: FreezeOptions): Promise<string>;
@@ -0,0 +1,34 @@
1
+ import { mkdir, writeFile } from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { zip } from '@d-zero/fs/zip';
4
+ import { createProcess, deal } from '@d-zero/puppeteer-dealer';
5
+ import { timestamp } from '@d-zero/shared/timestamp';
6
+ import c from 'ansi-colors';
7
+ import { analyzeUrlList } from './modules/analize-url.js';
8
+ /**
9
+ *
10
+ * @param list
11
+ * @param options
12
+ */
13
+ export async function freeze(list, options) {
14
+ const name = `${timestamp('YYYYMMDD')}.archae`;
15
+ const dir = path.resolve(process.cwd(), `.${name}`);
16
+ await mkdir(dir, { recursive: true }).catch(() => { });
17
+ const urlInfo = analyzeUrlList(list);
18
+ const useOldMode = urlInfo.hasAuth || urlInfo.hasNoSSL;
19
+ await deal(list.map((url) => ({ id: null, url })), (_, done, total) => {
20
+ return `${c.bold.magenta('🕵️ Archaeologist Freeze❄️')} ${done}/${total}`;
21
+ }, () => {
22
+ return createProcess(path.resolve(import.meta.dirname, 'freeze-child-process.js'), {
23
+ dir,
24
+ }, {
25
+ ...options,
26
+ headless: useOldMode ? 'shell' : true,
27
+ });
28
+ });
29
+ const urlListPath = path.resolve(dir, '_URL_LIST.json');
30
+ await writeFile(urlListPath, JSON.stringify(list, null, '\t'), 'utf8');
31
+ const zipPath = path.resolve(process.cwd(), `${name}.zip`);
32
+ await zip(zipPath, dir);
33
+ return zipPath;
34
+ }
package/dist/index.d.ts CHANGED
@@ -1,2 +1,2 @@
1
- export { archaeologist } from './archaeologist.js';
1
+ export { analyze as archaeologist } from './analyze-main-process.js';
2
2
  export * from './types.js';
package/dist/index.js CHANGED
@@ -1,2 +1,2 @@
1
- export { archaeologist } from './archaeologist.js';
1
+ export { analyze as archaeologist } from './analyze-main-process.js';
2
2
  export * from './types.js';
@@ -0,0 +1,11 @@
1
+ import type { URLPair } from '../types.js';
2
+ type AnalyzedUrlList = {
3
+ hasAuth: boolean;
4
+ hasNoSSL: boolean;
5
+ };
6
+ /**
7
+ *
8
+ * @param list
9
+ */
10
+ export declare function analyzeUrlList(list: readonly (URLPair | string)[]): AnalyzedUrlList;
11
+ export {};
@@ -8,6 +8,16 @@ export function analyzeUrlList(list) {
8
8
  hasNoSSL: false,
9
9
  };
10
10
  for (const urlPair of list) {
11
+ if (typeof urlPair === 'string') {
12
+ const urlObj = new URL(urlPair);
13
+ if (urlObj.username || urlObj.password) {
14
+ result.hasAuth = true;
15
+ }
16
+ if (urlObj.protocol === 'http:') {
17
+ result.hasNoSSL = true;
18
+ }
19
+ continue;
20
+ }
11
21
  for (const url of urlPair) {
12
22
  const urlObj = new URL(url);
13
23
  if (urlObj.username || urlObj.password) {
@@ -0,0 +1,21 @@
1
+ /**
2
+ *
3
+ * @param urlA
4
+ * @param urlB
5
+ * @param phraseA
6
+ * @param phraseB
7
+ */
8
+ export declare function diffText(urlA: string, urlB: string, phraseA: string, phraseB: string): {
9
+ phrases: {
10
+ changed: boolean;
11
+ maxLine: number;
12
+ matches: number;
13
+ result: string;
14
+ };
15
+ tokens: {
16
+ changed: boolean;
17
+ maxLine: number;
18
+ matches: number;
19
+ result: string;
20
+ };
21
+ };
@@ -0,0 +1,45 @@
1
+ import { getTokenizer } from 'kuromojin';
2
+ import { diffTree } from './diff-tree.js';
3
+ const tokenizer = await getTokenizer();
4
+ /**
5
+ *
6
+ * @param text
7
+ */
8
+ function tokenList(text) {
9
+ return tokenizer
10
+ .tokenize(text)
11
+ .filter((token) => token.surface_form.trim() !== '')
12
+ .map((token) => `${token.surface_form}:${token.pos}:${token.pos_detail_1}`);
13
+ }
14
+ /**
15
+ *
16
+ * @param tokens
17
+ */
18
+ function frequencyMap(tokens) {
19
+ const map = new Map();
20
+ for (const token of tokens) {
21
+ map.set(token, (map.get(token) ?? 0) + 1);
22
+ }
23
+ return map
24
+ .entries()
25
+ .map(([token, frequency]) => `${token} x${frequency}`)
26
+ .toArray()
27
+ .toSorted((a, b) => a.localeCompare(b));
28
+ }
29
+ /**
30
+ *
31
+ * @param urlA
32
+ * @param urlB
33
+ * @param phraseA
34
+ * @param phraseB
35
+ */
36
+ export function diffText(urlA, urlB, phraseA, phraseB) {
37
+ const tokensA = tokenList(phraseA);
38
+ const tokensB = tokenList(phraseB);
39
+ const frequencyMapA = frequencyMap(tokensA).join('\n');
40
+ const frequencyMapB = frequencyMap(tokensB).join('\n');
41
+ return {
42
+ phrases: diffTree(urlA, urlB, phraseA, phraseB),
43
+ tokens: diffTree(urlA, urlB, frequencyMapA, frequencyMapB),
44
+ };
45
+ }
@@ -1,9 +1,12 @@
1
- import type { PageData } from './types.js';
2
- import type { Page } from '@d-zero/puppeteer-page';
1
+ import type { PageData } from '../types.js';
3
2
  import type { PageHook } from '@d-zero/puppeteer-screenshot';
3
+ import type { Page } from 'puppeteer';
4
4
  export interface GetDataOptions {
5
5
  readonly hooks?: readonly PageHook[];
6
6
  readonly htmlDiffOnly?: boolean;
7
+ readonly selector?: string;
8
+ readonly ignore?: string;
9
+ readonly devices?: readonly string[];
7
10
  }
8
11
  /**
9
12
  *
@@ -1,4 +1,5 @@
1
1
  import { distill } from '@d-zero/html-distiller';
2
+ import { defaultSizes } from '@d-zero/puppeteer-page-scan';
2
3
  import { screenshotListener, screenshot } from '@d-zero/puppeteer-screenshot';
3
4
  /**
4
5
  *
@@ -9,19 +10,19 @@ import { screenshotListener, screenshot } from '@d-zero/puppeteer-screenshot';
9
10
  */
10
11
  export async function getData(page, url, options, update) {
11
12
  const htmlDiffOnly = options.htmlDiffOnly ?? false;
13
+ const devices = options.devices ?? ['desktop', 'mobile'];
14
+ const sizes = {};
15
+ for (const device of devices) {
16
+ // @ts-ignore
17
+ sizes[device] = defaultSizes[device];
18
+ }
12
19
  const screenshots = await screenshot(page, url, {
13
- sizes: {
14
- desktop: {
15
- width: 1280,
16
- },
17
- mobile: {
18
- width: 375,
19
- resolution: 2,
20
- },
21
- },
20
+ sizes,
22
21
  hooks: options?.hooks ?? [],
23
22
  listener: screenshotListener(update),
24
23
  domOnly: htmlDiffOnly,
24
+ selector: options.selector,
25
+ ignore: options.ignore,
25
26
  });
26
27
  const data = { url, screenshots: {} };
27
28
  for (const [sizeName, screenshot] of Object.entries(screenshots)) {
@@ -0,0 +1,5 @@
1
+ /**
2
+ *
3
+ * @param text
4
+ */
5
+ export declare function normalizeTextDocument(text: string): string;
@@ -0,0 +1,15 @@
1
+ /**
2
+ *
3
+ * @param text
4
+ */
5
+ export function normalizeTextDocument(text) {
6
+ return (text
7
+ .trim()
8
+ // Spaces
9
+ .replaceAll(/\s+/g, '\n')
10
+ // Periods
11
+ .replaceAll('。', '。\n')
12
+ // Newlines
13
+ .replaceAll(/\n+/g, '\n')
14
+ .trim());
15
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ *
3
+ * @param typeQuery
4
+ */
5
+ export declare function parseTypes(typeQuery: string): string[];
@@ -0,0 +1,8 @@
1
+ /**
2
+ *
3
+ * @param typeQuery
4
+ */
5
+ export function parseTypes(typeQuery) {
6
+ const types = typeQuery.split(',').map((type) => type.trim());
7
+ return types;
8
+ }
@@ -1,17 +1,12 @@
1
- import fs from 'node:fs/promises';
2
- import path from 'node:path';
3
1
  import { readPageHooks } from '@d-zero/puppeteer-page-scan';
4
2
  import { toList } from '@d-zero/readtext/list';
5
- import fm from 'front-matter';
3
+ import { readConfigFile } from '@d-zero/shared/config-reader';
6
4
  /**
7
5
  *
8
6
  * @param filePath
9
7
  */
10
8
  export async function readConfig(filePath) {
11
- const fileContent = await fs.readFile(filePath, 'utf8');
12
- const content =
13
- // @ts-ignore
14
- fm(fileContent);
9
+ const { content, baseDir } = await readConfigFile(filePath);
15
10
  const urlList = toList(content.body);
16
11
  const pairList = urlList.map((urlStr) => {
17
12
  const url = new URL(urlStr);
@@ -20,7 +15,6 @@ export async function readConfig(filePath) {
20
15
  `${content.attributes.comparisonHost}${url.pathname}${url.search}`,
21
16
  ];
22
17
  });
23
- const baseDir = path.dirname(filePath);
24
18
  const hooks = await readPageHooks(content.attributes?.hooks ?? [], baseDir);
25
19
  return {
26
20
  pairList,
package/dist/types.d.ts CHANGED
@@ -1,5 +1,5 @@
1
1
  export type { PageHook } from '@d-zero/puppeteer-screenshot';
2
- import type { Screenshot } from '@d-zero/puppeteer-screenshot';
2
+ import type { PageHook, Screenshot } from '@d-zero/puppeteer-screenshot';
3
3
  export type PageData = {
4
4
  url: string;
5
5
  screenshots: Record<string, Screenshot & {
@@ -13,7 +13,8 @@ export type Result = {
13
13
  };
14
14
  export type MediaResult = {
15
15
  image: ImageResult | null;
16
- dom: DOMResult;
16
+ dom: DOMResult | null;
17
+ text: TextResult | null;
17
18
  };
18
19
  export type ImageResult = {
19
20
  matches: number;
@@ -24,3 +25,23 @@ export type DOMResult = {
24
25
  diff: string | null;
25
26
  file: string;
26
27
  };
28
+ export type TextResult = {
29
+ matches: number;
30
+ diff: string | null;
31
+ file: string;
32
+ };
33
+ export interface ArchaeologistOptions extends AnalyzeOptions {
34
+ }
35
+ export interface AnalyzeOptions extends GeneralOptions {
36
+ readonly types?: readonly string[];
37
+ readonly selector?: string;
38
+ readonly ignore?: string;
39
+ readonly devices?: readonly string[];
40
+ }
41
+ export interface FreezeOptions extends GeneralOptions {
42
+ }
43
+ interface GeneralOptions {
44
+ readonly hooks: readonly PageHook[];
45
+ readonly limit?: number;
46
+ readonly debug?: boolean;
47
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@d-zero/archaeologist",
3
- "version": "1.1.3",
3
+ "version": "3.0.0",
4
4
  "description": "Uncover visual and HTML differences in web pages with precision",
5
5
  "author": "D-ZERO",
6
6
  "license": "MIT",
@@ -27,25 +27,30 @@
27
27
  "clean": "tsc --build --clean"
28
28
  },
29
29
  "dependencies": {
30
- "@d-zero/html-distiller": "1.0.2",
31
- "@d-zero/puppeteer-dealer": "0.3.0",
32
- "@d-zero/puppeteer-page-scan": "3.0.0",
33
- "@d-zero/puppeteer-screenshot": "3.0.0",
34
- "@d-zero/readtext": "1.1.2",
30
+ "@d-zero/cli-core": "1.1.0",
31
+ "@d-zero/fs": "0.2.0",
32
+ "@d-zero/html-distiller": "1.0.3",
33
+ "@d-zero/puppeteer-dealer": "0.5.0",
34
+ "@d-zero/puppeteer-page-scan": "4.0.1",
35
+ "@d-zero/puppeteer-screenshot": "3.1.0",
36
+ "@d-zero/readtext": "1.1.4",
37
+ "@d-zero/shared": "0.9.0",
35
38
  "ansi-colors": "4.1.3",
36
- "diff": "7.0.0",
39
+ "diff": "8.0.2",
37
40
  "front-matter": "4.0.2",
38
41
  "jimp": "1.6.0",
42
+ "kuromojin": "3.0.1",
39
43
  "minimist": "1.2.8",
40
44
  "parse-diff": "0.11.1",
41
45
  "pixelmatch": "7.1.0",
42
- "pngjs": "7.0.0"
46
+ "pngjs": "7.0.0",
47
+ "puppeteer": "24.10.1",
48
+ "strip-ansi": "7.1.0"
43
49
  },
44
50
  "devDependencies": {
45
- "@d-zero/puppeteer-page": "0.3.0",
46
- "@types/diff": "7.0.1",
51
+ "@types/diff": "8.0.0",
47
52
  "@types/pixelmatch": "5.2.6",
48
53
  "@types/pngjs": "6.0.5"
49
54
  },
50
- "gitHead": "e4fd17857e31022d121527b00fd7f009dbdb2142"
55
+ "gitHead": "04c6969564182c36ee38ef41e78130936dfa4863"
51
56
  }
@@ -1,11 +0,0 @@
1
- import type { URLPair } from './types.js';
2
- type AnalyzedUrlList = {
3
- hasAuth: boolean;
4
- hasNoSSL: boolean;
5
- };
6
- /**
7
- *
8
- * @param list
9
- */
10
- export declare function analyzeUrlList(list: readonly URLPair[]): AnalyzedUrlList;
11
- export {};
package/dist/analyze.d.ts DELETED
@@ -1,14 +0,0 @@
1
- import type { Result, URLPair } from './types.js';
2
- import type { PageHook } from '@d-zero/puppeteer-screenshot';
3
- export interface AnalyzeOptions {
4
- readonly hooks: readonly PageHook[];
5
- readonly htmlDiffOnly?: boolean;
6
- readonly limit?: number;
7
- readonly debug?: boolean;
8
- }
9
- /**
10
- *
11
- * @param list
12
- * @param options
13
- */
14
- export declare function analyze(list: readonly URLPair[], options?: AnalyzeOptions): Promise<Result[]>;
package/dist/analyze.js DELETED
@@ -1,111 +0,0 @@
1
- import { writeFile, mkdir } from 'node:fs/promises';
2
- import path from 'node:path';
3
- import { deal } from '@d-zero/puppeteer-dealer';
4
- import c from 'ansi-colors';
5
- import { analyzeUrlList } from './analize-url.js';
6
- import { diffImages } from './diff-images.js';
7
- import { diffTree } from './diff-tree.js';
8
- import { getData } from './get-data.js';
9
- import { score } from './output-utils.js';
10
- /**
11
- *
12
- * @param list
13
- * @param options
14
- */
15
- export async function analyze(list, options) {
16
- const results = [];
17
- const dir = path.resolve(process.cwd(), '.archaeologist');
18
- await mkdir(dir, { recursive: true }).catch(() => { });
19
- const urlInfo = analyzeUrlList(list);
20
- const useOldMode = urlInfo.hasAuth || urlInfo.hasNoSSL;
21
- await deal(list.map(([urlA]) => ({ id: null, url: urlA })), (_, done, total) => {
22
- return `${c.bold.magenta('🕵️ Archaeologist')} ${done}/${total}`;
23
- }, {
24
- async deal(page, _, urlA, logger, index) {
25
- const urlPair = list.find(([url]) => url === urlA);
26
- if (!urlPair) {
27
- throw new Error(`Failed to find urlPair: ${urlA}`);
28
- }
29
- const dataPair = [];
30
- for (const url of urlPair) {
31
- const data = await getData(page, url, {
32
- ...options,
33
- }, logger);
34
- dataPair.push(data);
35
- await delay(600);
36
- }
37
- const [a, b] = dataPair;
38
- if (!a || !b) {
39
- throw new Error('Failed to get screenshots');
40
- }
41
- const screenshotResult = {};
42
- const outputUrl = 'vs ' + c.gray(urlPair[1]);
43
- for (const [name, screenshotA] of Object.entries(a.screenshots)) {
44
- const screenshotB = b.screenshots[name];
45
- const sizeName = c.bgMagenta(` ${name} `);
46
- const id = `${index}_${name}`;
47
- if (!screenshotB) {
48
- throw new Error(`Failed to get screenshotB: ${id}`);
49
- }
50
- const imageDiff = await diffImages(screenshotA, screenshotB, (phase, data) => {
51
- switch (phase) {
52
- case 'create': {
53
- logger(`${sizeName} ${outputUrl} 🖼️ Create images`);
54
- break;
55
- }
56
- case 'resize': {
57
- const { width, height } = data;
58
- logger(`${sizeName} ${outputUrl} ↔️ Resize images to ${width}x${height}`);
59
- break;
60
- }
61
- case 'diff': {
62
- logger(`${sizeName} ${outputUrl} 📊 Compare images`);
63
- break;
64
- }
65
- }
66
- });
67
- let image = null;
68
- if (imageDiff) {
69
- logger(`${sizeName} ${outputUrl} 🧩 Matches ${score(imageDiff.matches, 0.9)}`);
70
- await delay(1500);
71
- await writeFile(path.resolve(dir, `${id}_a.png`), imageDiff.images.a);
72
- await writeFile(path.resolve(dir, `${id}_b.png`), imageDiff.images.b);
73
- const outFilePath = path.resolve(dir, `${id}_diff.png`);
74
- logger(`${sizeName} ${outputUrl} 📊 Save diff image to ${path.relative(dir, outFilePath)}`);
75
- await writeFile(outFilePath, imageDiff.images.diff);
76
- image = {
77
- matches: imageDiff.matches,
78
- file: outFilePath,
79
- };
80
- }
81
- const htmlDiff = diffTree(a.url, b.url, screenshotA.domTree, screenshotB.domTree);
82
- const outFilePath = path.resolve(dir, `${id}_html.diff`);
83
- await writeFile(outFilePath, htmlDiff.result, { encoding: 'utf8' });
84
- screenshotResult[name] = {
85
- image,
86
- dom: {
87
- matches: htmlDiff.matches,
88
- diff: htmlDiff.changed ? htmlDiff.result : null,
89
- file: outFilePath,
90
- },
91
- };
92
- }
93
- const result = {
94
- target: [a.url, b.url],
95
- screenshots: screenshotResult,
96
- };
97
- results.push(result);
98
- },
99
- }, {
100
- ...options,
101
- headless: useOldMode ? 'shell' : true,
102
- });
103
- return results;
104
- }
105
- /**
106
- *
107
- * @param ms
108
- */
109
- function delay(ms) {
110
- return new Promise((resolve) => setTimeout(resolve, ms));
111
- }
@@ -1,10 +0,0 @@
1
- import type { AnalyzeOptions } from './analyze.js';
2
- import type { URLPair } from './types.js';
3
- export interface ArchaeologistOptions extends AnalyzeOptions {
4
- }
5
- /**
6
- *
7
- * @param list
8
- * @param options
9
- */
10
- export declare function archaeologist(list: readonly URLPair[], options?: ArchaeologistOptions): Promise<void>;
@@ -1,23 +0,0 @@
1
- import c from 'ansi-colors';
2
- import { analyze } from './analyze.js';
3
- import { score } from './output-utils.js';
4
- /**
5
- *
6
- * @param list
7
- * @param options
8
- */
9
- export async function archaeologist(list, options) {
10
- const results = await analyze(list, options);
11
- const output = [];
12
- for (const result of results) {
13
- output.push(c.gray(`${result.target.join(' vs ')}`));
14
- for (const [sizeName, { image, dom }] of Object.entries(result.screenshots)) {
15
- if (image) {
16
- const { matches, file } = image;
17
- output.push(` ${c.bgMagenta(` ${sizeName} `)} ${score(matches, 0.9)} ${file}`);
18
- }
19
- output.push(` ${c.bgBlueBright(' HTML ')}: ${score(dom.matches, 0.995)} ${dom.file}`);
20
- }
21
- }
22
- process.stdout.write(output.join('\n') + '\n');
23
- }
package/dist/freeze.d.ts DELETED
@@ -1,13 +0,0 @@
1
- import type { PageData } from './types.js';
2
- import type { PageHook } from '@d-zero/puppeteer-screenshot';
3
- export interface FreezeOptions {
4
- readonly hooks: readonly PageHook[];
5
- readonly limit?: number;
6
- readonly debug?: boolean;
7
- }
8
- /**
9
- *
10
- * @param list
11
- * @param options
12
- */
13
- export declare function freeze(list: readonly string[], options?: FreezeOptions): Promise<PageData[]>;
package/dist/freeze.js DELETED
@@ -1,34 +0,0 @@
1
- import { mkdir } from 'node:fs/promises';
2
- import path from 'node:path';
3
- import { deal } from '@d-zero/puppeteer-dealer';
4
- import { delay } from '@d-zero/shared/delay';
5
- import c from 'ansi-colors';
6
- import { analyzeUrlList } from './analize-url.js';
7
- import { getData } from './get-data.js';
8
- /**
9
- *
10
- * @param list
11
- * @param options
12
- */
13
- export async function freeze(list, options) {
14
- const results = [];
15
- const dir = path.resolve(process.cwd(), '.archaeologist');
16
- await mkdir(dir, { recursive: true }).catch(() => { });
17
- const urlInfo = analyzeUrlList(list);
18
- const useOldMode = urlInfo.hasAuth || urlInfo.hasNoSSL;
19
- await deal(list.map((url) => ({ id: null, url })), (_, done, total) => {
20
- return `${c.bold.magenta('🕵️ Archaeologist Freeze❄️')} ${done}/${total}`;
21
- }, {
22
- async deal(page, _, url, logger) {
23
- const data = await getData(page, url, {
24
- ...options,
25
- }, logger);
26
- await delay(600);
27
- results.push(data);
28
- },
29
- }, {
30
- ...options,
31
- headless: useOldMode ? 'shell' : true,
32
- });
33
- return results;
34
- }
@@ -1,6 +0,0 @@
1
- /**
2
- *
3
- * @param matches
4
- * @param threshold
5
- */
6
- export declare function score(matches: number, threshold: number): string;
@@ -1,11 +0,0 @@
1
- import c from 'ansi-colors';
2
- /**
3
- *
4
- * @param matches
5
- * @param threshold
6
- */
7
- export function score(matches, threshold) {
8
- const color = matches > threshold ? c.green : c.red;
9
- const num = (matches * 100).toFixed(1);
10
- return c.bold(color(`${num}%`));
11
- }
File without changes
File without changes
File without changes