@pseolint/core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +53 -0
- package/dist/algorithms/entity-mask.d.ts +3 -0
- package/dist/algorithms/entity-mask.d.ts.map +1 -0
- package/dist/algorithms/entity-mask.js +8 -0
- package/dist/algorithms/entity-mask.js.map +1 -0
- package/dist/algorithms/entity-mask.test.d.ts +2 -0
- package/dist/algorithms/entity-mask.test.d.ts.map +1 -0
- package/dist/algorithms/entity-mask.test.js +23 -0
- package/dist/algorithms/entity-mask.test.js.map +1 -0
- package/dist/algorithms/simhash.d.ts +4 -0
- package/dist/algorithms/simhash.d.ts.map +1 -0
- package/dist/algorithms/simhash.js +64 -0
- package/dist/algorithms/simhash.js.map +1 -0
- package/dist/algorithms/simhash.test.d.ts +2 -0
- package/dist/algorithms/simhash.test.d.ts.map +1 -0
- package/dist/algorithms/simhash.test.js +23 -0
- package/dist/algorithms/simhash.test.js.map +1 -0
- package/dist/algorithms/tf-idf.d.ts +8 -0
- package/dist/algorithms/tf-idf.d.ts.map +1 -0
- package/dist/algorithms/tf-idf.js +55 -0
- package/dist/algorithms/tf-idf.js.map +1 -0
- package/dist/auditor.d.ts +3 -0
- package/dist/auditor.d.ts.map +1 -0
- package/dist/auditor.js +730 -0
- package/dist/auditor.js.map +1 -0
- package/dist/auditor.test.d.ts +2 -0
- package/dist/auditor.test.d.ts.map +1 -0
- package/dist/auditor.test.js +134 -0
- package/dist/auditor.test.js.map +1 -0
- package/dist/enrich-findings.d.ts +9 -0
- package/dist/enrich-findings.d.ts.map +1 -0
- package/dist/enrich-findings.js +436 -0
- package/dist/enrich-findings.js.map +1 -0
- package/dist/formatters/console.d.ts +6 -0
- package/dist/formatters/console.d.ts.map +1 -0
- package/dist/formatters/console.js +237 -0
- package/dist/formatters/console.js.map +1 -0
- package/dist/formatters/html.d.ts +3 -0
- package/dist/formatters/html.d.ts.map +1 -0
- package/dist/formatters/html.js +170 -0
- package/dist/formatters/html.js.map +1 -0
- package/dist/formatters/index.d.ts +6 -0
- package/dist/formatters/index.d.ts.map +1 -0
- package/dist/formatters/index.js +5 -0
- package/dist/formatters/index.js.map +1 -0
- package/dist/formatters/json.d.ts +3 -0
- package/dist/formatters/json.d.ts.map +1 -0
- package/dist/formatters/json.js +4 -0
- package/dist/formatters/json.js.map +1 -0
- package/dist/formatters/markdown.d.ts +3 -0
- package/dist/formatters/markdown.d.ts.map +1 -0
- package/dist/formatters/markdown.js +93 -0
- package/dist/formatters/markdown.js.map +1 -0
- package/dist/index.d.ts +45 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +45 -0
- package/dist/index.js.map +1 -0
- package/dist/page-classifier.d.ts +4 -0
- package/dist/page-classifier.d.ts.map +1 -0
- package/dist/page-classifier.js +133 -0
- package/dist/page-classifier.js.map +1 -0
- package/dist/parser.d.ts +3 -0
- package/dist/parser.d.ts.map +1 -0
- package/dist/parser.js +131 -0
- package/dist/parser.js.map +1 -0
- package/dist/parser.test.d.ts +2 -0
- package/dist/parser.test.d.ts.map +1 -0
- package/dist/parser.test.js +37 -0
- package/dist/parser.test.js.map +1 -0
- package/dist/renderer.d.ts +15 -0
- package/dist/renderer.d.ts.map +1 -0
- package/dist/renderer.js +124 -0
- package/dist/renderer.js.map +1 -0
- package/dist/rule-references.d.ts +2 -0
- package/dist/rule-references.d.ts.map +1 -0
- package/dist/rule-references.js +35 -0
- package/dist/rule-references.js.map +1 -0
- package/dist/rules/cannibal/keyword-collision.d.ts +3 -0
- package/dist/rules/cannibal/keyword-collision.d.ts.map +1 -0
- package/dist/rules/cannibal/keyword-collision.js +25 -0
- package/dist/rules/cannibal/keyword-collision.js.map +1 -0
- package/dist/rules/cannibal/title-overlap.d.ts +3 -0
- package/dist/rules/cannibal/title-overlap.d.ts.map +1 -0
- package/dist/rules/cannibal/title-overlap.js +43 -0
- package/dist/rules/cannibal/title-overlap.js.map +1 -0
- package/dist/rules/cannibal/url-pattern.d.ts +3 -0
- package/dist/rules/cannibal/url-pattern.d.ts.map +1 -0
- package/dist/rules/cannibal/url-pattern.js +48 -0
- package/dist/rules/cannibal/url-pattern.js.map +1 -0
- package/dist/rules/content/eeat-signals.d.ts +3 -0
- package/dist/rules/content/eeat-signals.d.ts.map +1 -0
- package/dist/rules/content/eeat-signals.js +46 -0
- package/dist/rules/content/eeat-signals.js.map +1 -0
- package/dist/rules/content/heading-uniqueness.d.ts +3 -0
- package/dist/rules/content/heading-uniqueness.d.ts.map +1 -0
- package/dist/rules/content/heading-uniqueness.js +56 -0
- package/dist/rules/content/heading-uniqueness.js.map +1 -0
- package/dist/rules/content/meta-uniqueness.d.ts +3 -0
- package/dist/rules/content/meta-uniqueness.d.ts.map +1 -0
- package/dist/rules/content/meta-uniqueness.js +28 -0
- package/dist/rules/content/meta-uniqueness.js.map +1 -0
- package/dist/rules/content/missing-author.d.ts +3 -0
- package/dist/rules/content/missing-author.d.ts.map +1 -0
- package/dist/rules/content/missing-author.js +26 -0
- package/dist/rules/content/missing-author.js.map +1 -0
- package/dist/rules/content/unique-value.d.ts +3 -0
- package/dist/rules/content/unique-value.d.ts.map +1 -0
- package/dist/rules/content/unique-value.js +26 -0
- package/dist/rules/content/unique-value.js.map +1 -0
- package/dist/rules/links/cluster-connectivity.d.ts +7 -0
- package/dist/rules/links/cluster-connectivity.d.ts.map +1 -0
- package/dist/rules/links/cluster-connectivity.js +73 -0
- package/dist/rules/links/cluster-connectivity.js.map +1 -0
- package/dist/rules/links/cluster-key.d.ts +3 -0
- package/dist/rules/links/cluster-key.d.ts.map +1 -0
- package/dist/rules/links/cluster-key.js +22 -0
- package/dist/rules/links/cluster-key.js.map +1 -0
- package/dist/rules/links/dead-ends.d.ts +3 -0
- package/dist/rules/links/dead-ends.d.ts.map +1 -0
- package/dist/rules/links/dead-ends.js +13 -0
- package/dist/rules/links/dead-ends.js.map +1 -0
- package/dist/rules/links/hub-pages.d.ts +7 -0
- package/dist/rules/links/hub-pages.d.ts.map +1 -0
- package/dist/rules/links/hub-pages.js +73 -0
- package/dist/rules/links/hub-pages.js.map +1 -0
- package/dist/rules/links/link-depth.d.ts +3 -0
- package/dist/rules/links/link-depth.d.ts.map +1 -0
- package/dist/rules/links/link-depth.js +46 -0
- package/dist/rules/links/link-depth.js.map +1 -0
- package/dist/rules/links/orphan-pages.d.ts +3 -0
- package/dist/rules/links/orphan-pages.d.ts.map +1 -0
- package/dist/rules/links/orphan-pages.js +19 -0
- package/dist/rules/links/orphan-pages.js.map +1 -0
- package/dist/rules/schema/consistency.d.ts +3 -0
- package/dist/rules/schema/consistency.d.ts.map +1 -0
- package/dist/rules/schema/consistency.js +44 -0
- package/dist/rules/schema/consistency.js.map +1 -0
- package/dist/rules/schema/json-ld-valid.d.ts +3 -0
- package/dist/rules/schema/json-ld-valid.d.ts.map +1 -0
- package/dist/rules/schema/json-ld-valid.js +47 -0
- package/dist/rules/schema/json-ld-valid.js.map +1 -0
- package/dist/rules/schema/required-fields.d.ts +3 -0
- package/dist/rules/schema/required-fields.d.ts.map +1 -0
- package/dist/rules/schema/required-fields.js +60 -0
- package/dist/rules/schema/required-fields.js.map +1 -0
- package/dist/rules/spam/boilerplate-ratio.d.ts +3 -0
- package/dist/rules/spam/boilerplate-ratio.d.ts.map +1 -0
- package/dist/rules/spam/boilerplate-ratio.js +50 -0
- package/dist/rules/spam/boilerplate-ratio.js.map +1 -0
- package/dist/rules/spam/doorway-pattern.d.ts +4 -0
- package/dist/rules/spam/doorway-pattern.d.ts.map +1 -0
- package/dist/rules/spam/doorway-pattern.js +47 -0
- package/dist/rules/spam/doorway-pattern.js.map +1 -0
- package/dist/rules/spam/entity-swap.d.ts +7 -0
- package/dist/rules/spam/entity-swap.d.ts.map +1 -0
- package/dist/rules/spam/entity-swap.js +26 -0
- package/dist/rules/spam/entity-swap.js.map +1 -0
- package/dist/rules/spam/near-duplicate.d.ts +11 -0
- package/dist/rules/spam/near-duplicate.d.ts.map +1 -0
- package/dist/rules/spam/near-duplicate.js +25 -0
- package/dist/rules/spam/near-duplicate.js.map +1 -0
- package/dist/rules/spam/publication-velocity.d.ts +3 -0
- package/dist/rules/spam/publication-velocity.d.ts.map +1 -0
- package/dist/rules/spam/publication-velocity.js +25 -0
- package/dist/rules/spam/publication-velocity.js.map +1 -0
- package/dist/rules/spam/template-coverage.d.ts +3 -0
- package/dist/rules/spam/template-coverage.d.ts.map +1 -0
- package/dist/rules/spam/template-coverage.js +87 -0
- package/dist/rules/spam/template-coverage.js.map +1 -0
- package/dist/rules/spam/template-diversity.d.ts +3 -0
- package/dist/rules/spam/template-diversity.d.ts.map +1 -0
- package/dist/rules/spam/template-diversity.js +19 -0
- package/dist/rules/spam/template-diversity.js.map +1 -0
- package/dist/rules/spam/thin-content.d.ts +6 -0
- package/dist/rules/spam/thin-content.d.ts.map +1 -0
- package/dist/rules/spam/thin-content.js +22 -0
- package/dist/rules/spam/thin-content.js.map +1 -0
- package/dist/rules/tech/canonical-consistency.d.ts +4 -0
- package/dist/rules/tech/canonical-consistency.d.ts.map +1 -0
- package/dist/rules/tech/canonical-consistency.js +78 -0
- package/dist/rules/tech/canonical-consistency.js.map +1 -0
- package/dist/rules/tech/canonical-noindex-conflict.d.ts +3 -0
- package/dist/rules/tech/canonical-noindex-conflict.d.ts.map +1 -0
- package/dist/rules/tech/canonical-noindex-conflict.js +27 -0
- package/dist/rules/tech/canonical-noindex-conflict.js.map +1 -0
- package/dist/rules/tech/hreflang-consistency.d.ts +3 -0
- package/dist/rules/tech/hreflang-consistency.d.ts.map +1 -0
- package/dist/rules/tech/hreflang-consistency.js +99 -0
- package/dist/rules/tech/hreflang-consistency.js.map +1 -0
- package/dist/rules/tech/og-completeness.d.ts +3 -0
- package/dist/rules/tech/og-completeness.d.ts.map +1 -0
- package/dist/rules/tech/og-completeness.js +35 -0
- package/dist/rules/tech/og-completeness.js.map +1 -0
- package/dist/rules/tech/redirect-chain.d.ts +3 -0
- package/dist/rules/tech/redirect-chain.d.ts.map +1 -0
- package/dist/rules/tech/redirect-chain.js +20 -0
- package/dist/rules/tech/redirect-chain.js.map +1 -0
- package/dist/rules/tech/robots-noindex-conflict.d.ts +3 -0
- package/dist/rules/tech/robots-noindex-conflict.d.ts.map +1 -0
- package/dist/rules/tech/robots-noindex-conflict.js +30 -0
- package/dist/rules/tech/robots-noindex-conflict.js.map +1 -0
- package/dist/rules/tech/robots-sitemap-presence.d.ts +3 -0
- package/dist/rules/tech/robots-sitemap-presence.d.ts.map +1 -0
- package/dist/rules/tech/robots-sitemap-presence.js +61 -0
- package/dist/rules/tech/robots-sitemap-presence.js.map +1 -0
- package/dist/rules/tech/sitemap-completeness.d.ts +3 -0
- package/dist/rules/tech/sitemap-completeness.d.ts.map +1 -0
- package/dist/rules/tech/sitemap-completeness.js +40 -0
- package/dist/rules/tech/sitemap-completeness.js.map +1 -0
- package/dist/rules/tech/soft-404.d.ts +3 -0
- package/dist/rules/tech/soft-404.d.ts.map +1 -0
- package/dist/rules/tech/soft-404.js +24 -0
- package/dist/rules/tech/soft-404.js.map +1 -0
- package/dist/types.d.ts +170 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/url-normalize.d.ts +10 -0
- package/dist/url-normalize.d.ts.map +1 -0
- package/dist/url-normalize.js +52 -0
- package/dist/url-normalize.js.map +1 -0
- package/package.json +46 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
export * from "./types.js";
|
|
2
|
+
export * from "./auditor.js";
|
|
3
|
+
export * from "./parser.js";
|
|
4
|
+
export * from "./url-normalize.js";
|
|
5
|
+
export * from "./algorithms/simhash.js";
|
|
6
|
+
export * from "./algorithms/entity-mask.js";
|
|
7
|
+
export * from "./rules/spam/near-duplicate.js";
|
|
8
|
+
export * from "./rules/spam/entity-swap.js";
|
|
9
|
+
export * from "./rules/spam/thin-content.js";
|
|
10
|
+
export * from "./rules/spam/boilerplate-ratio.js";
|
|
11
|
+
export * from "./rules/spam/template-diversity.js";
|
|
12
|
+
export * from "./rules/spam/publication-velocity.js";
|
|
13
|
+
export * from "./rules/spam/doorway-pattern.js";
|
|
14
|
+
export * from "./rules/spam/template-coverage.js";
|
|
15
|
+
export * from "./rules/content/unique-value.js";
|
|
16
|
+
export * from "./rules/content/heading-uniqueness.js";
|
|
17
|
+
export * from "./rules/content/meta-uniqueness.js";
|
|
18
|
+
export * from "./rules/content/missing-author.js";
|
|
19
|
+
export * from "./rules/content/eeat-signals.js";
|
|
20
|
+
export * from "./rules/links/orphan-pages.js";
|
|
21
|
+
export * from "./rules/links/dead-ends.js";
|
|
22
|
+
export * from "./rules/links/link-depth.js";
|
|
23
|
+
export * from "./rules/links/cluster-connectivity.js";
|
|
24
|
+
export * from "./rules/links/hub-pages.js";
|
|
25
|
+
export * from "./rules/tech/canonical-consistency.js";
|
|
26
|
+
export * from "./rules/tech/canonical-noindex-conflict.js";
|
|
27
|
+
export * from "./rules/tech/robots-noindex-conflict.js";
|
|
28
|
+
export * from "./rules/tech/sitemap-completeness.js";
|
|
29
|
+
export * from "./rules/tech/redirect-chain.js";
|
|
30
|
+
export * from "./rules/tech/soft-404.js";
|
|
31
|
+
export * from "./rules/tech/og-completeness.js";
|
|
32
|
+
export * from "./rules/tech/hreflang-consistency.js";
|
|
33
|
+
export * from "./rules/schema/json-ld-valid.js";
|
|
34
|
+
export * from "./rules/schema/required-fields.js";
|
|
35
|
+
export * from "./rules/schema/consistency.js";
|
|
36
|
+
export * from "./algorithms/tf-idf.js";
|
|
37
|
+
export * from "./rules/cannibal/title-overlap.js";
|
|
38
|
+
export * from "./rules/cannibal/keyword-collision.js";
|
|
39
|
+
export * from "./rules/cannibal/url-pattern.js";
|
|
40
|
+
export * from "./rule-references.js";
|
|
41
|
+
export * from "./page-classifier.js";
|
|
42
|
+
export * from "./formatters/index.js";
|
|
43
|
+
export * from "./renderer.js";
|
|
44
|
+
export * from "./enrich-findings.js";
|
|
45
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,oBAAoB,CAAC;AACnC,cAAc,yBAAyB,CAAC;AACxC,cAAc,6BAA6B,CAAC;AAC5C,cAAc,gCAAgC,CAAC;AAC/C,cAAc,6BAA6B,CAAC;AAC5C,cAAc,8BAA8B,CAAC;AAC7C,cAAc,mCAAmC,CAAC;AAClD,cAAc,oCAAoC,CAAC;AACnD,cAAc,sCAAsC,CAAC;AACrD,cAAc,iCAAiC,CAAC;AAChD,cAAc,mCAAmC,CAAC;AAClD,cAAc,iCAAiC,CAAC;AAChD,cAAc,uCAAuC,CAAC;AACtD,cAAc,oCAAoC,CAAC;AACnD,cAAc,mCAAmC,CAAC;AAClD,cAAc,iCAAiC,CAAC;AAChD,cAAc,+BAA+B,CAAC;AAC9C,cAAc,4BAA4B,CAAC;AAC3C,cAAc,6BAA6B,CAAC;AAC5C,cAAc,uCAAuC,CAAC;AACtD,cAAc,4BAA4B,CAAC;AAC3C,cAAc,uCAAuC,CAAC;AACtD,cAAc,4CAA4C,CAAC;AAC3D,cAAc,yCAAyC,CAAC;AACxD,cAAc,sCAAsC,CAAC;AACrD,cAAc,gCAAgC,CAAC;AAC/C,cAAc,0BAA0B,CAAC;AACzC,cAAc,iCAAiC,CAAC;AAChD,cAAc,sCAAsC,CAAC;AACrD,cAAc,iCAAiC,CAAC;AAChD,cAAc,mCAAmC,CAAC;AAClD,cAAc,+BAA+B,CAAC;AAC9C,cAAc,wBAAwB,CAAC;AACvC,cAAc,mCAAmC,CAAC;AAClD,cAAc,uCAAuC,CAAC;AACtD,cAAc,iCAAiC,CAAC;AAChD,cAAc,sBAAsB,CAAC;AACrC,cAAc,sBAAsB,CAAC;AACrC,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,sBAAsB,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { PageGroupConfig, ParsedPage } from "./types.js";
|
|
2
|
+
export declare function classifyPages(pages: ParsedPage[], groups: Record<string, PageGroupConfig> | undefined): Map<string, ParsedPage[]>;
|
|
3
|
+
export declare function isRuleEnabled(ruleId: string, rules: string[] | undefined): boolean;
|
|
4
|
+
//# sourceMappingURL=page-classifier.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"page-classifier.d.ts","sourceRoot":"","sources":["../src/page-classifier.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,eAAe,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAoG9D,wBAAgB,aAAa,CAC3B,KAAK,EAAE,UAAU,EAAE,EACnB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,eAAe,CAAC,GAAG,SAAS,GAClD,GAAG,CAAC,MAAM,EAAE,UAAU,EAAE,CAAC,CA8B3B;AAED,wBAAgB,aAAa,CAAC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,SAAS,GAAG,OAAO,CAWlF"}
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
// Splits a glob pattern into literal and wildcard tokens.
|
|
2
|
+
function tokenize(pattern) {
|
|
3
|
+
const tokens = [];
|
|
4
|
+
let i = 0;
|
|
5
|
+
let literal = "";
|
|
6
|
+
while (i < pattern.length) {
|
|
7
|
+
if (pattern[i] === "*") {
|
|
8
|
+
if (literal.length > 0) {
|
|
9
|
+
tokens.push({ kind: "literal", value: literal });
|
|
10
|
+
literal = "";
|
|
11
|
+
}
|
|
12
|
+
if (pattern[i + 1] === "*") {
|
|
13
|
+
tokens.push({ kind: "globstar" });
|
|
14
|
+
i += 2;
|
|
15
|
+
}
|
|
16
|
+
else {
|
|
17
|
+
tokens.push({ kind: "star" });
|
|
18
|
+
i += 1;
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
literal += pattern[i];
|
|
23
|
+
i += 1;
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
if (literal.length > 0) {
|
|
27
|
+
tokens.push({ kind: "literal", value: literal });
|
|
28
|
+
}
|
|
29
|
+
return tokens;
|
|
30
|
+
}
|
|
31
|
+
// Linear glob matcher — no RegExp construction, immune to ReDoS.
|
|
32
|
+
// Supports * (any non-slash chars) and ** (any chars including slash).
|
|
33
|
+
// Anchored to the end of value; pattern may match at any slash-delimited
|
|
34
|
+
// segment boundary within the string.
|
|
35
|
+
function matchGlob(pattern, value) {
|
|
36
|
+
const normalized = value.replace(/\\/g, "/");
|
|
37
|
+
const tokens = tokenize(pattern);
|
|
38
|
+
const n = normalized.length;
|
|
39
|
+
const m = tokens.length;
|
|
40
|
+
// pos[j] = 1 means we can be at character index j in normalized.
|
|
41
|
+
// Seed position 0 and every position that sits ON a slash so that
|
|
42
|
+
// patterns starting with "/" (e.g. "/about") match at segment boundaries.
|
|
43
|
+
let pos = new Uint8Array(n + 1);
|
|
44
|
+
pos[0] = 1;
|
|
45
|
+
for (let j = 0; j < n; j++) {
|
|
46
|
+
if (normalized[j] === "/")
|
|
47
|
+
pos[j] = 1;
|
|
48
|
+
}
|
|
49
|
+
for (let ti = 0; ti < m; ti++) {
|
|
50
|
+
const tok = tokens[ti];
|
|
51
|
+
const next = new Uint8Array(n + 1);
|
|
52
|
+
if (tok.kind === "literal") {
|
|
53
|
+
const lit = tok.value;
|
|
54
|
+
const ll = lit.length;
|
|
55
|
+
for (let j = 0; j <= n - ll; j++) {
|
|
56
|
+
if (pos[j] && normalized.startsWith(lit, j)) {
|
|
57
|
+
next[j + ll] = 1;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
else if (tok.kind === "star") {
|
|
62
|
+
// Consume zero or more non-slash characters.
|
|
63
|
+
for (let j = 0; j <= n; j++) {
|
|
64
|
+
if (!pos[j])
|
|
65
|
+
continue;
|
|
66
|
+
next[j] = 1;
|
|
67
|
+
for (let k = j + 1; k <= n; k++) {
|
|
68
|
+
if (normalized[k - 1] === "/")
|
|
69
|
+
break;
|
|
70
|
+
next[k] = 1;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
// globstar: consume zero or more characters including slashes.
|
|
76
|
+
for (let j = 0; j <= n; j++) {
|
|
77
|
+
if (!pos[j])
|
|
78
|
+
continue;
|
|
79
|
+
for (let k = j; k <= n; k++) {
|
|
80
|
+
next[k] = 1;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
pos = next;
|
|
85
|
+
}
|
|
86
|
+
return pos[n] === 1;
|
|
87
|
+
}
|
|
88
|
+
function matchesGroup(url, config) {
|
|
89
|
+
const patterns = Array.isArray(config.match) ? config.match : [config.match];
|
|
90
|
+
return patterns.some((pattern) => matchGlob(pattern, url));
|
|
91
|
+
}
|
|
92
|
+
export function classifyPages(pages, groups) {
|
|
93
|
+
const result = new Map();
|
|
94
|
+
if (!groups || Object.keys(groups).length === 0) {
|
|
95
|
+
result.set("__default", [...pages]);
|
|
96
|
+
return result;
|
|
97
|
+
}
|
|
98
|
+
for (const groupName of Object.keys(groups)) {
|
|
99
|
+
result.set(groupName, []);
|
|
100
|
+
}
|
|
101
|
+
result.set("__default", []);
|
|
102
|
+
const groupEntries = Object.entries(groups);
|
|
103
|
+
for (const page of pages) {
|
|
104
|
+
let matched = false;
|
|
105
|
+
for (const [name, config] of groupEntries) {
|
|
106
|
+
if (matchesGroup(page.url, config)) {
|
|
107
|
+
result.get(name).push(page);
|
|
108
|
+
matched = true;
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
if (!matched) {
|
|
113
|
+
result.get("__default").push(page);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
return result;
|
|
117
|
+
}
|
|
118
|
+
export function isRuleEnabled(ruleId, rules) {
|
|
119
|
+
if (rules === undefined)
|
|
120
|
+
return true;
|
|
121
|
+
if (rules.length === 0)
|
|
122
|
+
return false;
|
|
123
|
+
return rules.some((pattern) => {
|
|
124
|
+
if (pattern === ruleId)
|
|
125
|
+
return true;
|
|
126
|
+
if (pattern.endsWith("/*")) {
|
|
127
|
+
const prefix = pattern.slice(0, -2);
|
|
128
|
+
return ruleId.startsWith(prefix + "/");
|
|
129
|
+
}
|
|
130
|
+
return false;
|
|
131
|
+
});
|
|
132
|
+
}
|
|
133
|
+
//# sourceMappingURL=page-classifier.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"page-classifier.js","sourceRoot":"","sources":["../src/page-classifier.ts"],"names":[],"mappings":"AAQA,0DAA0D;AAC1D,SAAS,QAAQ,CAAC,OAAe;IAC/B,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,OAAO,CAAC,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QAC1B,IAAI,OAAO,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;YACvB,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACvB,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;gBACjD,OAAO,GAAG,EAAE,CAAC;YACf,CAAC;YACD,IAAI,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;gBAC3B,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,UAAU,EAAE,CAAC,CAAC;gBAClC,CAAC,IAAI,CAAC,CAAC;YACT,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,CAAC;gBAC9B,CAAC,IAAI,CAAC,CAAC;YACT,CAAC;QACH,CAAC;aAAM,CAAC;YACN,OAAO,IAAI,OAAO,CAAC,CAAC,CAAC,CAAC;YACtB,CAAC,IAAI,CAAC,CAAC;QACT,CAAC;IACH,CAAC;IACD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC;IACnD,CAAC;IACD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,iEAAiE;AACjE,uEAAuE;AACvE,yEAAyE;AACzE,sCAAsC;AACtC,SAAS,SAAS,CAAC,OAAe,EAAE,KAAa;IAC/C,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IAC7C,MAAM,MAAM,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;IAEjC,MAAM,CAAC,GAAG,UAAU,CAAC,MAAM,CAAC;IAC5B,MAAM,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC;IAExB,iEAAiE;IACjE,kEAAkE;IAClE,0EAA0E;IAC1E,IAAI,GAAG,GAAG,IAAI,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAChC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACX,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3B,IAAI,UAAU,CAAC,CAAC,CAAC,KAAK,GAAG;YAAE,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACxC,CAAC;IAED,KAAK,IAAI,EAAE,GAAG,CAAC,EAAE,EAAE,GAAG,CAAC,EAAE,EAAE,EAAE,EAAE,CAAC;QAC9B,MAAM,GAAG,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC;QACvB,MAAM,IAAI,GAAG,IAAI,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;QAEnC,IAAI,GAAG,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;YAC3B,MAAM,GAAG,GAAG,GAAG,CAAC,KAAK,CAAC;YACtB,MAAM,EAAE,GAAG,GAAG,CAAC,MAAM,CAAC;YACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC;gBACjC,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,UAAU,CAAC,UAAU,CAAC,GAAG,EAAE,CAAC,CAAC,EAAE,CAAC;oBAC5C,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC;gBACnB,CAAC;YACH,CAAC;QACH,CAAC;aAAM,IAAI,GAAG,CAAC,IAAI,KAAK,MAAM,EAAE,CAAC;YAC/B,6CAA6C;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC5B,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;oBAAE,SAAS;gBACtB,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACZ,KAAK,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;oBAChC,IAAI,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG;wBAAE,MAAM;oBACrC,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACd,CAAC;YACH,CAAC;QACH,CAAC;aAAM,CAAC;YACN,+DAA+D;YAC/D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC5B,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;oBAAE,SAAS;gBACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC5B,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACd,CAAC;YACH,CAAC;QACH,CAAC;QAED,GAAG,GAAG,IAAI,CAAC;IACb,CAAC;IAED,OAAO,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;AACtB,CAAC;AAED,SAAS,YAAY,CAAC,GAAW,EAAE,MAAuB;IACxD,MAAM,QAAQ,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAC7E,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;AAC7D,CAAC;AAED,MAAM,UAAU,aAAa,CAC3B,KAAmB,EACnB,MAAmD;IAEnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAwB,CAAC;IAE/C,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAChD,MAAM,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC;QACpC,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,KAAK,MAAM,SAAS,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;QAC5C,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAC5B,CAAC;IACD,MAAM,CAAC,GAAG,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IAE5B,MAAM,YAAY,GAAG,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC;IAE5C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,OAAO,GAAG,KAAK,CAAC;QACpB,KAAK,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,IAAI,YAAY,EAAE,CAAC;YAC1C,IAAI,YAAY,CAAC,IAAI,CAAC,GAAG,EAAE,MAAM,CAAC,EAAE,CAAC;gBACnC,MAAM,CAAC,GAAG,CAAC,IAAI,CAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;gBAC7B,OAAO,GAAG,IAAI,CAAC;gBACf,MAAM;YACR,CAAC;QACH,CAAC;QACD,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,MAAM,CAAC,GAAG,CAAC,WAAW,CAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,MAAc,EAAE,KAA2B;IACvE,IAAI,KAAK,KAAK,SAAS;QAAE,OAAO,IAAI,CAAC;IACrC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,KAAK,CAAC;IACrC,OAAO,KAAK,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE;QAC5B,IAAI,OAAO,KAAK,MAAM;YAAE,OAAO,IAAI,CAAC;QACpC,IAAI,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YAC3B,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;YACpC,OAAO,MAAM,CAAC,UAAU,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;QACzC,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC,CAAC,CAAC;AACL,CAAC"}
|
package/dist/parser.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.d.ts","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAiE/D,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,gBAAgB,GAAG,UAAU,CA2F/F"}
|
package/dist/parser.js
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
import { load } from "cheerio";
|
|
2
|
+
import { dirname, resolve } from "node:path";
|
|
3
|
+
import { mergeNormalizeUrlOptions, normalizeAuditUrl } from "./url-normalize.js";
|
|
4
|
+
function normalizedText(input) {
|
|
5
|
+
return input.replace(/\s+/g, " ").trim();
|
|
6
|
+
}
|
|
7
|
+
function buildStructureSignature(html) {
|
|
8
|
+
const tags = Array.from(html.toLowerCase().matchAll(/<([a-z0-9-]+)(\s|>)/g)).map((m) => m[1]);
|
|
9
|
+
const counts = new Map();
|
|
10
|
+
for (const tag of tags) {
|
|
11
|
+
counts.set(tag, (counts.get(tag) ?? 0) + 1);
|
|
12
|
+
}
|
|
13
|
+
return Array.from(counts.entries())
|
|
14
|
+
.sort(([a], [b]) => a.localeCompare(b))
|
|
15
|
+
.map(([tag, count]) => `${tag}:${count}`)
|
|
16
|
+
.join("|");
|
|
17
|
+
}
|
|
18
|
+
function resolveHref(href, pageUrl, normalizeOpts) {
|
|
19
|
+
const trimmed = href.trim();
|
|
20
|
+
if (!trimmed) {
|
|
21
|
+
return null;
|
|
22
|
+
}
|
|
23
|
+
if (/^https?:\/\//i.test(trimmed)) {
|
|
24
|
+
return normalizeAuditUrl(trimmed, normalizeOpts);
|
|
25
|
+
}
|
|
26
|
+
if (/^https?:\/\//i.test(pageUrl)) {
|
|
27
|
+
try {
|
|
28
|
+
const resolved = new URL(trimmed, pageUrl).href;
|
|
29
|
+
const u = new URL(resolved);
|
|
30
|
+
if (u.protocol !== "http:" && u.protocol !== "https:") {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
return normalizeAuditUrl(resolved, normalizeOpts);
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
return null;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return normalizeAuditUrl(resolve(dirname(pageUrl), trimmed), normalizeOpts);
|
|
40
|
+
}
|
|
41
|
+
/** Resolved targets: http(s) on web bases; file paths for local `pageUrl`. */
|
|
42
|
+
function resolveAbsoluteHrefs(hrefs, pageUrl, normalizeOpts) {
|
|
43
|
+
const resolved = hrefs
|
|
44
|
+
.map((href) => href.trim())
|
|
45
|
+
.filter(Boolean)
|
|
46
|
+
.filter((href) => !href.startsWith("#"))
|
|
47
|
+
.filter((href) => !/^mailto:|^tel:|^javascript:|^data:/i.test(href))
|
|
48
|
+
.map((href) => resolveHref(href, pageUrl, normalizeOpts))
|
|
49
|
+
.filter((x) => x !== null);
|
|
50
|
+
return Array.from(new Set(resolved));
|
|
51
|
+
}
|
|
52
|
+
export function parseHtmlPage(html, url, options) {
|
|
53
|
+
const normalizeOpts = mergeNormalizeUrlOptions(options?.normalizeUrl);
|
|
54
|
+
const $ = load(html);
|
|
55
|
+
const title = normalizedText($("title").first().text());
|
|
56
|
+
const metaDescription = normalizedText($('meta[name="description"]').attr("content") ?? "");
|
|
57
|
+
const canonical = normalizedText($('link[rel="canonical"]').attr("href") ?? "");
|
|
58
|
+
const robotsMeta = normalizedText($('meta[name="robots"]').attr("content") ?? "");
|
|
59
|
+
const ogTitle = normalizedText($('meta[property="og:title"]').attr("content") ?? "");
|
|
60
|
+
const ogDescription = normalizedText($('meta[property="og:description"]').attr("content") ?? "");
|
|
61
|
+
const ogImage = normalizedText($('meta[property="og:image"]').attr("content") ?? "");
|
|
62
|
+
const hreflangs = $('link[rel="alternate"][hreflang]')
|
|
63
|
+
.map((_idx, node) => ({
|
|
64
|
+
lang: normalizedText(String($(node).attr("hreflang") ?? "")),
|
|
65
|
+
href: normalizedText(String($(node).attr("href") ?? ""))
|
|
66
|
+
}))
|
|
67
|
+
.get()
|
|
68
|
+
.filter((entry) => entry.lang.length > 0);
|
|
69
|
+
const publishedDate = normalizedText($('meta[property="article:published_time"]').attr("content") ??
|
|
70
|
+
$('meta[name="datePublished"]').attr("content") ??
|
|
71
|
+
$("time[datetime]").first().attr("datetime") ??
|
|
72
|
+
"");
|
|
73
|
+
const h1 = $("h1")
|
|
74
|
+
.map((_idx, node) => normalizedText($(node).text()))
|
|
75
|
+
.get()
|
|
76
|
+
.filter(Boolean);
|
|
77
|
+
const h2 = $("h2")
|
|
78
|
+
.map((_idx, node) => normalizedText($(node).text()))
|
|
79
|
+
.get()
|
|
80
|
+
.filter(Boolean);
|
|
81
|
+
const resolvedHrefs = resolveAbsoluteHrefs($("a[href]")
|
|
82
|
+
.map((_idx, node) => String($(node).attr("href") ?? ""))
|
|
83
|
+
.get(), url, normalizeOpts);
|
|
84
|
+
const jsonLd = [];
|
|
85
|
+
$('script[type="application/ld+json"]').each((_idx, node) => {
|
|
86
|
+
try {
|
|
87
|
+
const parsed = JSON.parse($(node).html() ?? "");
|
|
88
|
+
jsonLd.push(parsed);
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
jsonLd.push({ __parseError: true, __raw: $(node).html() ?? "" });
|
|
92
|
+
}
|
|
93
|
+
});
|
|
94
|
+
const metaAuthor = normalizedText($('meta[name="author"]').attr("content") ?? "");
|
|
95
|
+
const schemaAuthor = jsonLd.some((ld) => {
|
|
96
|
+
if (typeof ld !== "object" || ld === null)
|
|
97
|
+
return false;
|
|
98
|
+
return "author" in ld;
|
|
99
|
+
});
|
|
100
|
+
const bylineElement = $("[class*='author'], [class*='byline'], [rel='author']").length > 0;
|
|
101
|
+
const relAuthorLink = $('a[rel="author"], link[rel="author"]').length > 0;
|
|
102
|
+
$("header, footer, nav, script, style, noscript").remove();
|
|
103
|
+
const contentText = normalizedText($("body").text());
|
|
104
|
+
return {
|
|
105
|
+
url,
|
|
106
|
+
title,
|
|
107
|
+
metaDescription,
|
|
108
|
+
canonical,
|
|
109
|
+
robotsMeta,
|
|
110
|
+
og: {
|
|
111
|
+
title: ogTitle,
|
|
112
|
+
description: ogDescription,
|
|
113
|
+
image: ogImage
|
|
114
|
+
},
|
|
115
|
+
hreflangs,
|
|
116
|
+
publishedDate: publishedDate || undefined,
|
|
117
|
+
headings: { h1, h2 },
|
|
118
|
+
jsonLd,
|
|
119
|
+
authorSignals: {
|
|
120
|
+
metaAuthor,
|
|
121
|
+
schemaAuthor,
|
|
122
|
+
bylineElement,
|
|
123
|
+
relAuthorLink
|
|
124
|
+
},
|
|
125
|
+
resolvedHrefs,
|
|
126
|
+
structureSignature: buildStructureSignature(html),
|
|
127
|
+
contentText,
|
|
128
|
+
html
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
//# sourceMappingURL=parser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.js","sourceRoot":"","sources":["../src/parser.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAC;AAC/B,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAE7C,OAAO,EAAE,wBAAwB,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAEjF,SAAS,cAAc,CAAC,KAAa;IACnC,OAAO,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AAC3C,CAAC;AAED,SAAS,uBAAuB,CAAC,IAAY;IAC3C,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,sBAAsB,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IAC9F,MAAM,MAAM,GAAG,IAAI,GAAG,EAAkB,CAAC;IACzC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;SAChC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC;SACtC,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,GAAG,GAAG,IAAI,KAAK,EAAE,CAAC;SACxC,IAAI,CAAC,GAAG,CAAC,CAAC;AACf,CAAC;AAED,SAAS,WAAW,CAClB,IAAY,EACZ,OAAe,EACf,aAA0D;IAE1D,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,OAAO,iBAAiB,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IACnD,CAAC;IACD,IAAI,eAAe,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;YAChD,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;YAC5B,IAAI,CAAC,CAAC,QAAQ,KAAK,OAAO,IAAI,CAAC,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;gBACtD,OAAO,IAAI,CAAC;YACd,CAAC;YACD,OAAO,iBAAiB,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC;QACpD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IACD,OAAO,iBAAiB,CAAC,OAAO,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,OAAO,CAAC,EAAE,aAAa,CAAC,CAAC;AAC9E,CAAC;AAED,8EAA8E;AAC9E,SAAS,oBAAoB,CAC3B,KAAe,EACf,OAAe,EACf,aAA0D;IAE1D,MAAM,QAAQ,GAAG,KAAK;SACnB,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,OAAO,CAAC;SACf,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;SACvC,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,qCAAqC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SACnE,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,WAAW,CAAC,IAAI,EAAE,OAAO,EAAE,aAAa,CAAC,CAAC;SACxD,MAAM,CAAC,CAAC,CAAC,EAAe,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC;IAE1C,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAY,EAAE,GAAW,EAAE,OAA0B;IACjF,MAAM,aAAa,GAAG,wBAAwB,CAAC,OAAO,EAAE,YAAY,CAAC,CAAC;IACtE,MAAM,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC;IAErB,MAAM,KAAK,GAAG,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC;IACxD,MAAM,eAAe,GAAG,cAAc,CAAC,CAAC,CAAC,0BAA0B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IAC5F,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,uBAAuB,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;IAChF,MAAM,UAAU,GAAG,cAAc,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IAClF,MAAM,OAAO,GAAG,cAAc,CAAC,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IACrF,MAAM,aAAa,GAAG,cAAc,CAAC,CAAC,CAAC,iCAAiC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IACjG,MAAM,OAAO,GAAG,cAAc,CAAC,CAAC,CAAC,2BAA2B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IACrF,MAAM,SAAS,GAAG,CAAC,CAAC,iCAAiC,CAAC;SACnD,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC;QACpB,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,CAAC,CAAC;QAC5D,IAAI,EAAE,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;KACzD,CAAC,CAAC;SACF,GAAG,EAAE;SACL,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC5C,MAAM,aAAa,GAAG,cAAc,CAClC,CAAC,CAAC,yCAAyC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;QAC1D,CAAC,CAAC,4BAA4B,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC;QAC/C,CAAC,CAAC,gBAAgB,CAAC,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC;QAC5C,EAAE,CACL,CAAC;IAEF,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC;SACf,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;SACnD,GAAG,EAAE;SACL,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC;SACf,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;SACnD,GAAG,EAAE;SACL,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,MAAM,aAAa,GAAG,oBAAoB,CACxC,CAAC,CAAC,SAAS,CAAC;SACT,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;SACvD,GAAG,EAAE,EACR,GAAG,EACH,aAAa,CACd,CAAC;IAEF,MAAM,MAAM,GAAc,EAAE,CAAC;IAC7B,CAAC,CAAC,oCAAoC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE;QAC1D,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;YAChD,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACtB,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,CAAC,IAAI,CAAC,EAAE,YAAY,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,EAAE,CAAC,CAAC;QACnE,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,UAAU,GAAG,cAAc,CAAC,CAAC,CAAC,qBAAqB,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC;IAClF,MAAM,YAAY,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE;QACtC,IAAI,OAAO,EAAE,KAAK,QAAQ,IAAI,EAAE,KAAK,IAAI;YAAE,OAAO,KAAK,CAAC;QACxD,OAAO,QAAQ,IAAI,EAAE,CAAC;IACxB,CAAC,CAAC,CAAC;IACH,MAAM,aAAa,GACjB,CAAC,CAAC,sDAAsD,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IACvE,MAAM,aAAa,GAAG,CAAC,CAAC,qCAAqC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;IAE1E,CAAC,CAAC,8CAA8C,CAAC,CAAC,MAAM,EAAE,CAAC;IAC3D,MAAM,WAAW,GAAG,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAErD,OAAO;QACL,GAAG;QACH,KAAK;QACL,eAAe;QACf,SAAS;QACT,UAAU;QACV,EAAE,EAAE;YACF,KAAK,EAAE,OAAO;YACd,WAAW,EAAE,aAAa;YAC1B,KAAK,EAAE,OAAO;SACf;QACD,SAAS;QACT,aAAa,EAAE,aAAa,IAAI,SAAS;QACzC,QAAQ,EAAE,EAAE,EAAE,EAAE,EAAE,EAAE;QACpB,MAAM;QACN,aAAa,EAAE;YACb,UAAU;YACV,YAAY;YACZ,aAAa;YACb,aAAa;SACd;QACD,aAAa;QACb,kBAAkB,EAAE,uBAAuB,CAAC,IAAI,CAAC;QACjD,WAAW;QACX,IAAI;KACL,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.test.d.ts","sourceRoot":"","sources":["../src/parser.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { describe, expect, test } from "vitest";
|
|
2
|
+
import { parseHtmlPage } from "./parser.js";
|
|
3
|
+
describe("parseHtmlPage", () => {
|
|
4
|
+
test("extracts title, metadata, headings, and cleaned content text", () => {
|
|
5
|
+
const html = `
|
|
6
|
+
<html>
|
|
7
|
+
<head>
|
|
8
|
+
<title>California LLC Template</title>
|
|
9
|
+
<meta name="description" content="A practical filing guide." />
|
|
10
|
+
<link rel="canonical" href="https://example.dev/templates/california-llc" />
|
|
11
|
+
<meta property="article:published_time" content="2026-04-01" />
|
|
12
|
+
</head>
|
|
13
|
+
<body>
|
|
14
|
+
<header>Global nav should be removed</header>
|
|
15
|
+
<main>
|
|
16
|
+
<h1>California LLC Template</h1>
|
|
17
|
+
<h2>Filing Requirements</h2>
|
|
18
|
+
<p>California has a publication rule in some counties.</p>
|
|
19
|
+
</main>
|
|
20
|
+
<footer>Footer should be removed</footer>
|
|
21
|
+
</body>
|
|
22
|
+
</html>
|
|
23
|
+
`;
|
|
24
|
+
const parsed = parseHtmlPage(html, "https://example.dev/templates/california-llc");
|
|
25
|
+
expect(parsed.url).toBe("https://example.dev/templates/california-llc");
|
|
26
|
+
expect(parsed.title).toBe("California LLC Template");
|
|
27
|
+
expect(parsed.metaDescription).toBe("A practical filing guide.");
|
|
28
|
+
expect(parsed.canonical).toBe("https://example.dev/templates/california-llc");
|
|
29
|
+
expect(parsed.publishedDate).toBe("2026-04-01");
|
|
30
|
+
expect(parsed.headings.h1).toEqual(["California LLC Template"]);
|
|
31
|
+
expect(parsed.headings.h2).toEqual(["Filing Requirements"]);
|
|
32
|
+
expect(parsed.structureSignature).toContain("h1:1");
|
|
33
|
+
expect(parsed.contentText).toContain("California has a publication rule");
|
|
34
|
+
expect(parsed.contentText).not.toContain("Global nav should be removed");
|
|
35
|
+
});
|
|
36
|
+
});
|
|
37
|
+
//# sourceMappingURL=parser.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parser.test.js","sourceRoot":"","sources":["../src/parser.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAC;AAChD,OAAO,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAE5C,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,IAAI,CAAC,8DAA8D,EAAE,GAAG,EAAE;QACxE,MAAM,IAAI,GAAG;;;;;;;;;;;;;;;;;;KAkBZ,CAAC;QAEF,MAAM,MAAM,GAAG,aAAa,CAAC,IAAI,EAAE,8CAA8C,CAAC,CAAC;QAEnF,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;QACxE,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;QACrD,MAAM,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;QACjE,MAAM,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,8CAA8C,CAAC,CAAC;QAC9E,MAAM,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAChD,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,yBAAyB,CAAC,CAAC,CAAC;QAChE,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,qBAAqB,CAAC,CAAC,CAAC;QAC5D,MAAM,CAAC,MAAM,CAAC,kBAAkB,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QACpD,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,SAAS,CAAC,mCAAmC,CAAC,CAAC;QAC1E,MAAM,CAAC,MAAM,CAAC,WAAW,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,8BAA8B,CAAC,CAAC;IAC3E,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export interface RenderOptions {
|
|
2
|
+
browserWsEndpoint?: string;
|
|
3
|
+
concurrency: number;
|
|
4
|
+
timeoutMs: number;
|
|
5
|
+
}
|
|
6
|
+
interface RenderedPage {
|
|
7
|
+
url: string;
|
|
8
|
+
html: string;
|
|
9
|
+
}
|
|
10
|
+
export declare function renderPages(pages: Array<{
|
|
11
|
+
url: string;
|
|
12
|
+
localPath?: string;
|
|
13
|
+
}>, sourceDir: string | null, options: RenderOptions): Promise<RenderedPage[]>;
|
|
14
|
+
export {};
|
|
15
|
+
//# sourceMappingURL=renderer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"renderer.d.ts","sourceRoot":"","sources":["../src/renderer.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,aAAa;IAC5B,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAC3B,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,UAAU,YAAY;IACpB,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;CACd;AA0ED,wBAAsB,WAAW,CAC/B,KAAK,EAAE,KAAK,CAAC;IAAE,GAAG,EAAE,MAAM,CAAC;IAAC,SAAS,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,EACjD,SAAS,EAAE,MAAM,GAAG,IAAI,EACxB,OAAO,EAAE,aAAa,GACrB,OAAO,CAAC,YAAY,EAAE,CAAC,CA8DzB"}
|
package/dist/renderer.js
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { createServer } from "node:http";
|
|
2
|
+
import { readFile } from "node:fs/promises";
|
|
3
|
+
import { join, extname } from "node:path";
|
|
4
|
+
const MIME_TYPES = {
|
|
5
|
+
".html": "text/html",
|
|
6
|
+
".htm": "text/html",
|
|
7
|
+
".js": "application/javascript",
|
|
8
|
+
".css": "text/css",
|
|
9
|
+
".json": "application/json",
|
|
10
|
+
".png": "image/png",
|
|
11
|
+
".jpg": "image/jpeg",
|
|
12
|
+
".svg": "image/svg+xml",
|
|
13
|
+
};
|
|
14
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
15
|
+
async function loadPlaywright() {
|
|
16
|
+
try {
|
|
17
|
+
// @ts-ignore -- playwright-core is an optional peer dependency
|
|
18
|
+
return await import("playwright-core");
|
|
19
|
+
}
|
|
20
|
+
catch {
|
|
21
|
+
throw new Error("--render requires a browser connection.\n" +
|
|
22
|
+
" Option 1: Set PSEOLINT_BROWSER_WS to your CDP endpoint (wss://...)\n" +
|
|
23
|
+
" Option 2: Install playwright-core and Chromium:\n" +
|
|
24
|
+
" npm install playwright-core\n" +
|
|
25
|
+
" npx playwright install chromium");
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
function isLocalhost(endpoint) {
|
|
29
|
+
try {
|
|
30
|
+
const parsed = new URL(endpoint);
|
|
31
|
+
return parsed.hostname === "localhost" || parsed.hostname === "127.0.0.1";
|
|
32
|
+
}
|
|
33
|
+
catch {
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
const WS_SECURE = "wss://";
|
|
38
|
+
const WS_PLAIN = "ws" + "://";
|
|
39
|
+
function validateWsEndpoint(endpoint) {
|
|
40
|
+
if (endpoint.startsWith(WS_SECURE))
|
|
41
|
+
return;
|
|
42
|
+
if (endpoint.startsWith(WS_PLAIN) && isLocalhost(endpoint))
|
|
43
|
+
return;
|
|
44
|
+
throw new Error(`Insecure WebSocket endpoint: ${endpoint}. ` +
|
|
45
|
+
`Remote endpoints must use ${WS_SECURE}. ` +
|
|
46
|
+
`Unencrypted ${WS_PLAIN} is only allowed for localhost.`);
|
|
47
|
+
}
|
|
48
|
+
async function startStaticServer(rootDir) {
|
|
49
|
+
return new Promise((resolve) => {
|
|
50
|
+
const server = createServer(async (req, res) => {
|
|
51
|
+
const urlPath = decodeURIComponent(req.url ?? "/");
|
|
52
|
+
const filePath = join(rootDir, urlPath);
|
|
53
|
+
try {
|
|
54
|
+
const content = await readFile(filePath);
|
|
55
|
+
const ext = extname(filePath);
|
|
56
|
+
res.writeHead(200, { "Content-Type": MIME_TYPES[ext] ?? "application/octet-stream" });
|
|
57
|
+
res.end(content);
|
|
58
|
+
}
|
|
59
|
+
catch {
|
|
60
|
+
res.writeHead(404);
|
|
61
|
+
res.end("Not found");
|
|
62
|
+
}
|
|
63
|
+
});
|
|
64
|
+
server.listen(0, "127.0.0.1", () => {
|
|
65
|
+
const addr = server.address();
|
|
66
|
+
const port = typeof addr === "object" && addr ? addr.port : 0;
|
|
67
|
+
resolve({ port, close: () => server.close() });
|
|
68
|
+
});
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
export async function renderPages(pages, sourceDir, options) {
|
|
72
|
+
const pw = await loadPlaywright();
|
|
73
|
+
const endpoint = options.browserWsEndpoint
|
|
74
|
+
?? process.env.PSEOLINT_BROWSER_WS
|
|
75
|
+
?? null;
|
|
76
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
77
|
+
let browser;
|
|
78
|
+
if (endpoint) {
|
|
79
|
+
validateWsEndpoint(endpoint);
|
|
80
|
+
browser = await pw.chromium.connectOverCDP(endpoint);
|
|
81
|
+
}
|
|
82
|
+
else {
|
|
83
|
+
browser = await pw.chromium.launch({ headless: true });
|
|
84
|
+
}
|
|
85
|
+
let server = null;
|
|
86
|
+
if (sourceDir) {
|
|
87
|
+
server = await startStaticServer(sourceDir);
|
|
88
|
+
}
|
|
89
|
+
const results = [];
|
|
90
|
+
let index = 0;
|
|
91
|
+
async function processNext() {
|
|
92
|
+
while (index < pages.length) {
|
|
93
|
+
const current = index;
|
|
94
|
+
index += 1;
|
|
95
|
+
const entry = pages[current];
|
|
96
|
+
const page = await browser.newPage();
|
|
97
|
+
let navigateUrl = entry.url;
|
|
98
|
+
if (entry.localPath && server) {
|
|
99
|
+
const relativePath = entry.localPath.replace(/\\/g, "/");
|
|
100
|
+
navigateUrl = `http://127.0.0.1:${server.port}/${relativePath}`;
|
|
101
|
+
}
|
|
102
|
+
try {
|
|
103
|
+
await page.goto(navigateUrl, {
|
|
104
|
+
waitUntil: "networkidle",
|
|
105
|
+
timeout: options.timeoutMs,
|
|
106
|
+
});
|
|
107
|
+
const html = await page.content();
|
|
108
|
+
results.push({ url: entry.url, html });
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
// Skip pages that fail to render
|
|
112
|
+
}
|
|
113
|
+
finally {
|
|
114
|
+
await page.close();
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
const workers = Array.from({ length: Math.min(options.concurrency, pages.length) }, () => processNext());
|
|
119
|
+
await Promise.all(workers);
|
|
120
|
+
server?.close();
|
|
121
|
+
await browser.close();
|
|
122
|
+
return results;
|
|
123
|
+
}
|
|
124
|
+
//# sourceMappingURL=renderer.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"renderer.js","sourceRoot":"","sources":["../src/renderer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AACzC,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAa1C,MAAM,UAAU,GAA2B;IACzC,OAAO,EAAE,WAAW;IACpB,MAAM,EAAE,WAAW;IACnB,KAAK,EAAE,wBAAwB;IAC/B,MAAM,EAAE,UAAU;IAClB,OAAO,EAAE,kBAAkB;IAC3B,MAAM,EAAE,WAAW;IACnB,MAAM,EAAE,YAAY;IACpB,MAAM,EAAE,eAAe;CACxB,CAAC;AAEF,8DAA8D;AAC9D,KAAK,UAAU,cAAc;IAC3B,IAAI,CAAC;QACH,+DAA+D;QAC/D,OAAO,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CACb,2CAA2C;YAC3C,wEAAwE;YACxE,qDAAqD;YACrD,mCAAmC;YACnC,qCAAqC,CACtC,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,WAAW,CAAC,QAAgB;IACnC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;QACjC,OAAO,MAAM,CAAC,QAAQ,KAAK,WAAW,IAAI,MAAM,CAAC,QAAQ,KAAK,WAAW,CAAC;IAC5E,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,KAAK,CAAC;IACf,CAAC;AACH,CAAC;AAED,MAAM,SAAS,GAAG,QAAQ,CAAC;AAC3B,MAAM,QAAQ,GAAG,IAAI,GAAG,KAAK,CAAC;AAE9B,SAAS,kBAAkB,CAAC,QAAgB;IAC1C,IAAI,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC;QAAE,OAAO;IAC3C,IAAI,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,WAAW,CAAC,QAAQ,CAAC;QAAE,OAAO;IACnE,MAAM,IAAI,KAAK,CACb,gCAAgC,QAAQ,IAAI;QAC5C,6BAA6B,SAAS,IAAI;QAC1C,eAAe,QAAQ,iCAAiC,CACzD,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,iBAAiB,CAAC,OAAe;IAC9C,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,EAAE;YAC7C,MAAM,OAAO,GAAG,kBAAkB,CAAC,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC;YACnD,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;YACxC,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC;gBACzC,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;gBAC9B,GAAG,CAAC,SAAS,CAAC,GAAG,EAAE,EAAE,cAAc,EAAE,UAAU,CAAC,GAAG,CAAC,IAAI,0BAA0B,EAAE,CAAC,CAAC;gBACtF,GAAG,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;YACnB,CAAC;YAAC,MAAM,CAAC;gBACP,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;gBACnB,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YACvB,CAAC;QACH,CAAC,CAAC,CAAC;QACH,MAAM,CAAC,MAAM,CAAC,CAAC,EAAE,WAAW,EAAE,GAAG,EAAE;YACjC,MAAM,IAAI,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC;YAC9B,MAAM,IAAI,GAAG,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9D,OAAO,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,MAAM,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,KAAiD,EACjD,SAAwB,EACxB,OAAsB;IAEtB,MAAM,EAAE,GAAG,MAAM,cAAc,EAAE,CAAC;IAElC,MAAM,QAAQ,GAAG,OAAO,CAAC,iBAAiB;WACrC,OAAO,CAAC,GAAG,CAAC,mBAAmB;WAC/B,IAAI,CAAC;IAEV,8DAA8D;IAC9D,IAAI,OAAY,CAAC;IACjB,IAAI,QAAQ,EAAE,CAAC;QACb,kBAAkB,CAAC,QAAQ,CAAC,CAAC;QAC7B,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IACvD,CAAC;SAAM,CAAC;QACN,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,IAAI,MAAM,GAA+C,IAAI,CAAC;IAC9D,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,GAAG,MAAM,iBAAiB,CAAC,SAAS,CAAC,CAAC;IAC9C,CAAC;IAED,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,IAAI,KAAK,GAAG,CAAC,CAAC;IAEd,KAAK,UAAU,WAAW;QACxB,OAAO,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,KAAK,CAAC;YACtB,KAAK,IAAI,CAAC,CAAC;YACX,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC;YAC7B,MAAM,IAAI,GAAG,MAAM,OAAO,CAAC,OAAO,EAAE,CAAC;YAErC,IAAI,WAAW,GAAG,KAAK,CAAC,GAAG,CAAC;YAC5B,IAAI,KAAK,CAAC,SAAS,IAAI,MAAM,EAAE,CAAC;gBAC9B,MAAM,YAAY,GAAG,KAAK,CAAC,SAAS,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;gBACzD,WAAW,GAAG,oBAAoB,MAAM,CAAC,IAAI,IAAI,YAAY,EAAE,CAAC;YAClE,CAAC;YAED,IAAI,CAAC;gBACH,MAAM,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE;oBAC3B,SAAS,EAAE,aAAa;oBACxB,OAAO,EAAE,OAAO,CAAC,SAAS;iBAC3B,CAAC,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;gBAClC,OAAO,CAAC,IAAI,CAAC,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;YACzC,CAAC;YAAC,MAAM,CAAC;gBACP,iCAAiC;YACnC,CAAC;oBAAS,CAAC;gBACT,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YACrB,CAAC;QACH,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CACxB,EAAE,MAAM,EAAE,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,CAAC,MAAM,CAAC,EAAE,EACvD,GAAG,EAAE,CAAC,WAAW,EAAE,CACpB,CAAC;IACF,MAAM,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC;IAE3B,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,MAAM,OAAO,CAAC,KAAK,EAAE,CAAC;IAEtB,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rule-references.d.ts","sourceRoot":"","sources":["../src/rule-references.ts"],"names":[],"mappings":"AAAA,eAAO,MAAM,eAAe,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAiClD,CAAC"}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
export const RULE_REFERENCES = {
|
|
2
|
+
"spam/near-duplicate": "https://developers.google.com/search/docs/essentials/spam-policies#scraped-content",
|
|
3
|
+
"spam/entity-swap": "https://developers.google.com/search/docs/essentials/spam-policies#doorway-pages",
|
|
4
|
+
"spam/thin-content": "https://developers.google.com/search/docs/essentials/spam-policies#thin-content",
|
|
5
|
+
"spam/boilerplate-ratio": "https://developers.google.com/search/docs/essentials/spam-policies#thin-content",
|
|
6
|
+
"spam/template-diversity": "https://developers.google.com/search/docs/essentials/spam-policies#doorway-pages",
|
|
7
|
+
"spam/publication-velocity": "https://developers.google.com/search/docs/essentials/spam-policies#spammy-auto-generated-content",
|
|
8
|
+
"spam/doorway-pattern": "https://developers.google.com/search/docs/essentials/spam-policies#doorway-pages",
|
|
9
|
+
"spam/template-coverage": "https://developers.google.com/search/docs/essentials/spam-policies#doorway-pages",
|
|
10
|
+
"content/unique-value": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content",
|
|
11
|
+
"content/meta-uniqueness": "https://developers.google.com/search/docs/appearance/snippet#meta-descriptions",
|
|
12
|
+
"content/heading-uniqueness": "https://developers.google.com/search/docs/appearance/snippet#headings",
|
|
13
|
+
"content/missing-author": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content#eeat",
|
|
14
|
+
"content/eeat-signals": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content#eeat",
|
|
15
|
+
"links/orphan-pages": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
|
|
16
|
+
"links/dead-ends": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
|
|
17
|
+
"links/cluster-connectivity": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
|
|
18
|
+
"links/hub-pages": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
|
|
19
|
+
"links/link-depth": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
|
|
20
|
+
"tech/canonical-consistency": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
|
|
21
|
+
"tech/canonical-noindex-conflict": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
|
|
22
|
+
"tech/robots-noindex-conflict": "https://developers.google.com/search/docs/crawling-indexing/block-indexing",
|
|
23
|
+
"tech/sitemap-completeness": "https://developers.google.com/search/docs/crawling-indexing/sitemaps/overview",
|
|
24
|
+
"tech/redirect-chain": "https://developers.google.com/search/docs/crawling-indexing/301-redirects",
|
|
25
|
+
"tech/soft-404": "https://developers.google.com/search/docs/crawling-indexing/soft-404-errors",
|
|
26
|
+
"tech/og-completeness": "https://developers.google.com/search/docs/appearance/snippet",
|
|
27
|
+
"tech/hreflang-consistency": "https://developers.google.com/search/docs/specialty/international/managing-multi-regional-sites",
|
|
28
|
+
"schema/json-ld-valid": "https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data",
|
|
29
|
+
"schema/required-fields": "https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data",
|
|
30
|
+
"schema/consistency": "https://developers.google.com/search/docs/appearance/structured-data/intro-structured-data",
|
|
31
|
+
"cannibal/title-overlap": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
|
|
32
|
+
"cannibal/keyword-collision": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
|
|
33
|
+
"cannibal/url-pattern": "https://developers.google.com/search/docs/crawling-indexing/consolidate-duplicate-urls",
|
|
34
|
+
};
|
|
35
|
+
//# sourceMappingURL=rule-references.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rule-references.js","sourceRoot":"","sources":["../src/rule-references.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,eAAe,GAA2B;IACrD,qBAAqB,EAAE,oFAAoF;IAC3G,kBAAkB,EAAE,kFAAkF;IACtG,mBAAmB,EAAE,iFAAiF;IACtG,wBAAwB,EAAE,iFAAiF;IAC3G,yBAAyB,EAAE,kFAAkF;IAC7G,2BAA2B,EAAE,kGAAkG;IAC/H,sBAAsB,EAAE,kFAAkF;IAC1G,wBAAwB,EAAE,kFAAkF;IAC5G,sBAAsB,EAAE,iFAAiF;IACzG,yBAAyB,EAAE,gFAAgF;IAC3G,4BAA4B,EAAE,uEAAuE;IACrG,wBAAwB,EAAE,sFAAsF;IAChH,sBAAsB,EAAE,sFAAsF;IAC9G,oBAAoB,EAAE,6EAA6E;IACnG,iBAAiB,EAAE,6EAA6E;IAChG,4BAA4B,EAAE,6EAA6E;IAC3G,iBAAiB,EAAE,6EAA6E;IAChG,kBAAkB,EAAE,6EAA6E;IACjG,4BAA4B,EAAE,wFAAwF;IACtH,iCAAiC,EAAE,wFAAwF;IAC3H,8BAA8B,EAAE,4EAA4E;IAC5G,2BAA2B,EAAE,+EAA+E;IAC5G,qBAAqB,EAAE,2EAA2E;IAClG,eAAe,EAAE,6EAA6E;IAC9F,sBAAsB,EAAE,8DAA8D;IACtF,2BAA2B,EAAE,iGAAiG;IAC9H,sBAAsB,EAAE,4FAA4F;IACpH,wBAAwB,EAAE,4FAA4F;IACtH,oBAAoB,EAAE,4FAA4F;IAClH,wBAAwB,EAAE,wFAAwF;IAClH,4BAA4B,EAAE,wFAAwF;IACtH,sBAAsB,EAAE,wFAAwF;CACjH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"keyword-collision.d.ts","sourceRoot":"","sources":["../../../src/rules/cannibal/keyword-collision.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AAE7D,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,UAAU,EAAE,EACnB,SAAS,GAAE,MAAU,GACpB,UAAU,EAAE,CAwBd"}
|