@nitpicker/analyze-search 0.4.4 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/search-plugin.d.ts +59 -0
- package/lib/search-plugin.js +177 -0
- package/package.json +8 -8
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A search item with a custom display title.
|
|
3
|
+
* Allows decoupling the search pattern from its column header label
|
|
4
|
+
* so that report columns can have human-friendly names.
|
|
5
|
+
*/
|
|
6
|
+
type Content = {
|
|
7
|
+
/** The search pattern string (keyword or CSS selector). */
|
|
8
|
+
search: string;
|
|
9
|
+
/** Column header label displayed in the report. */
|
|
10
|
+
title: string;
|
|
11
|
+
};
|
|
12
|
+
/**
|
|
13
|
+
* Plugin options for the keyword/selector search analysis.
|
|
14
|
+
*/
|
|
15
|
+
type Options = {
|
|
16
|
+
/**
|
|
17
|
+
* CSS selector to narrow the search scope within each page.
|
|
18
|
+
* When omitted, the entire `documentElement` is searched.
|
|
19
|
+
* Useful for restricting searches to main content areas and
|
|
20
|
+
* ignoring headers, footers, and navigation.
|
|
21
|
+
*/
|
|
22
|
+
scope?: string;
|
|
23
|
+
/**
|
|
24
|
+
* Keywords to search for in DOM text nodes and element attributes.
|
|
25
|
+
* Each item can be a plain string or a `Content` object with a
|
|
26
|
+
* custom display title. Strings are converted to regex via
|
|
27
|
+
* `strToRegex` (supporting literal and `/pattern/flags` syntax).
|
|
28
|
+
*/
|
|
29
|
+
keywords?: (string | Content)[];
|
|
30
|
+
/**
|
|
31
|
+
* CSS selectors to check for existence on each page.
|
|
32
|
+
* The result is boolean (present or absent), not a count.
|
|
33
|
+
*/
|
|
34
|
+
selectors?: (string | Content)[];
|
|
35
|
+
};
|
|
36
|
+
/**
|
|
37
|
+
* Analyze plugin that searches page DOMs for keywords and CSS selectors.
|
|
38
|
+
*
|
|
39
|
+
* Keywords are matched using `recursiveSearch()`, which traverses the
|
|
40
|
+
* DOM tree depth-first and checks both text nodes and element attributes.
|
|
41
|
+
* Selectors are checked with `querySelector()` for simple existence.
|
|
42
|
+
* @example
|
|
43
|
+
* ```jsonc
|
|
44
|
+
* // nitpicker.config.json
|
|
45
|
+
* {
|
|
46
|
+
* "plugins": {
|
|
47
|
+
* "analyze": {
|
|
48
|
+
* "@nitpicker/analyze-search": {
|
|
49
|
+
* "scope": "main",
|
|
50
|
+
* "keywords": ["lorem ipsum", "/\\d{3}-\\d{4}/"],
|
|
51
|
+
* "selectors": [".breadcrumb", "nav.global"]
|
|
52
|
+
* }
|
|
53
|
+
* }
|
|
54
|
+
* }
|
|
55
|
+
* }
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
declare const _default: import("@nitpicker/core").PluginFactory<Options, string>;
|
|
59
|
+
export default _default;
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import { strToRegex } from '@d-zero/shared/str-to-regex';
|
|
2
|
+
import { definePlugin } from '@nitpicker/core';
|
|
3
|
+
/**
|
|
4
|
+
* Analyze plugin that searches page DOMs for keywords and CSS selectors.
|
|
5
|
+
*
|
|
6
|
+
* Keywords are matched using `recursiveSearch()`, which traverses the
|
|
7
|
+
* DOM tree depth-first and checks both text nodes and element attributes.
|
|
8
|
+
* Selectors are checked with `querySelector()` for simple existence.
|
|
9
|
+
* @example
|
|
10
|
+
* ```jsonc
|
|
11
|
+
* // nitpicker.config.json
|
|
12
|
+
* {
|
|
13
|
+
* "plugins": {
|
|
14
|
+
* "analyze": {
|
|
15
|
+
* "@nitpicker/analyze-search": {
|
|
16
|
+
* "scope": "main",
|
|
17
|
+
* "keywords": ["lorem ipsum", "/\\d{3}-\\d{4}/"],
|
|
18
|
+
* "selectors": [".breadcrumb", "nav.global"]
|
|
19
|
+
* }
|
|
20
|
+
* }
|
|
21
|
+
* }
|
|
22
|
+
* }
|
|
23
|
+
* ```
|
|
24
|
+
*/
|
|
25
|
+
export default definePlugin((options) => {
|
|
26
|
+
const headers = {
|
|
27
|
+
...toHeader('keyword', options.keywords),
|
|
28
|
+
...toHeader('selector', options.selectors),
|
|
29
|
+
};
|
|
30
|
+
const keywords = toArray(options.keywords);
|
|
31
|
+
const selectors = toArray(options.selectors);
|
|
32
|
+
return {
|
|
33
|
+
label: 'キーワード検索',
|
|
34
|
+
headers,
|
|
35
|
+
eachPage({ window }) {
|
|
36
|
+
const result = {};
|
|
37
|
+
const $scope = options.scope
|
|
38
|
+
? window.document.querySelector(options.scope)
|
|
39
|
+
: window.document.documentElement;
|
|
40
|
+
if (!$scope) {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
for (const keyword of keywords) {
|
|
44
|
+
const regex = strToRegex(keyword);
|
|
45
|
+
const searched = recursiveSearch($scope, regex);
|
|
46
|
+
result[`keyword:${keyword}`] = { value: searched.length };
|
|
47
|
+
}
|
|
48
|
+
for (const selector of selectors) {
|
|
49
|
+
try {
|
|
50
|
+
const $main = window.document.querySelector(selector);
|
|
51
|
+
if ($main) {
|
|
52
|
+
result[`selector:${selector}`] = { value: true };
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
// Error
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
return {
|
|
60
|
+
page: result,
|
|
61
|
+
};
|
|
62
|
+
},
|
|
63
|
+
};
|
|
64
|
+
});
|
|
65
|
+
/**
|
|
66
|
+
* Extracts unique search terms from a mixed array of strings and Content objects.
|
|
67
|
+
* Deduplication ensures the same keyword/selector is not searched twice even
|
|
68
|
+
* when specified in both plain-string and Content-object forms.
|
|
69
|
+
* @param search - Array of keyword/selector items.
|
|
70
|
+
* @returns Deduplicated array of search term strings.
|
|
71
|
+
*/
|
|
72
|
+
function toArray(search) {
|
|
73
|
+
if (!search) {
|
|
74
|
+
return [];
|
|
75
|
+
}
|
|
76
|
+
const set = new Set();
|
|
77
|
+
for (const item of search) {
|
|
78
|
+
if (typeof item === 'string') {
|
|
79
|
+
set.add(item);
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
set.add(item.title);
|
|
83
|
+
}
|
|
84
|
+
return [...set];
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Builds a header map from search items, keyed by `"{type}:{term}"`.
|
|
88
|
+
*
|
|
89
|
+
* The compound key format ensures keyword and selector columns are
|
|
90
|
+
* namespaced and do not collide (e.g. `"keyword:foo"` vs `"selector:foo"`).
|
|
91
|
+
* Content objects use their custom `title` as the column header label;
|
|
92
|
+
* plain strings get a generated label like `"Search keyword: foo"`.
|
|
93
|
+
* @param type - Whether these are keyword or selector items.
|
|
94
|
+
* @param search - Array of search items to generate headers from.
|
|
95
|
+
* @returns Header map compatible with the plugin's `headers` property.
|
|
96
|
+
*/
|
|
97
|
+
function toHeader(type, search) {
|
|
98
|
+
const header = {};
|
|
99
|
+
if (!search) {
|
|
100
|
+
return header;
|
|
101
|
+
}
|
|
102
|
+
for (const item of search) {
|
|
103
|
+
if (typeof item === 'string') {
|
|
104
|
+
header[`${type}:${item}`] = `Search ${type}: ${item}`;
|
|
105
|
+
continue;
|
|
106
|
+
}
|
|
107
|
+
header[`${type}:${item.search}`] = item.title;
|
|
108
|
+
}
|
|
109
|
+
return header;
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Recursively searches a DOM subtree for regex matches in text nodes
|
|
113
|
+
* and element attributes.
|
|
114
|
+
*
|
|
115
|
+
* ## Traversal algorithm
|
|
116
|
+
*
|
|
117
|
+
* 1. **Depth-first children first**: For each child node, recurse before
|
|
118
|
+
* examining the current node. This ensures matches are collected in
|
|
119
|
+
* document order (deepest nodes first in each subtree).
|
|
120
|
+
*
|
|
121
|
+
* 2. **`<script>` / `<style>` exclusion**: These elements are skipped
|
|
122
|
+
* entirely (including their children) because their text content is
|
|
123
|
+
* code, not user-visible content. The check happens *after* recursion
|
|
124
|
+
* so that nested elements within `<script>` templates are still
|
|
125
|
+
* excluded via the early return.
|
|
126
|
+
*
|
|
127
|
+
* 3. **Text nodes**: `TEXT_NODE` content is tested against the regex.
|
|
128
|
+
* The parent element's tag name is recorded for context.
|
|
129
|
+
*
|
|
130
|
+
* 4. **Element attributes**: For `ELEMENT_NODE`, each attribute is tested
|
|
131
|
+
* except for structural/styling attributes (`href`, `src`, `srcset`,
|
|
132
|
+
* `id`, `class`, `style`, `d`, `data-*`). These are excluded because
|
|
133
|
+
* they contain URLs, identifiers, or CSS that would produce false
|
|
134
|
+
* positives for content-oriented keyword searches.
|
|
135
|
+
* @param el - The root node to search from.
|
|
136
|
+
* @param search - The regex pattern to match against.
|
|
137
|
+
* @returns Array of matches with the element context and matched text.
|
|
138
|
+
*/
|
|
139
|
+
function recursiveSearch(el, search) {
|
|
140
|
+
const result = [];
|
|
141
|
+
for (const child of el.childNodes) {
|
|
142
|
+
result.push(...recursiveSearch(child, search));
|
|
143
|
+
}
|
|
144
|
+
if ('tagName' in el && (el.tagName === 'SCRIPT' || el.tagName === 'STYLE')) {
|
|
145
|
+
return [];
|
|
146
|
+
}
|
|
147
|
+
if (el.nodeType === Node.TEXT_NODE) {
|
|
148
|
+
const textMatched = search.exec(el.textContent || '');
|
|
149
|
+
if (textMatched) {
|
|
150
|
+
for (const matched of textMatched) {
|
|
151
|
+
result.push({
|
|
152
|
+
el: '<' + (el.parentElement?.localName || '???') + '>',
|
|
153
|
+
text: matched,
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
if (el.nodeType === Node.ELEMENT_NODE) {
|
|
159
|
+
const _el = el;
|
|
160
|
+
for (const attr of _el.attributes) {
|
|
161
|
+
if (['href', 'src', 'srcset', 'id', 'class', 'style', 'd'].includes(attr.name) ||
|
|
162
|
+
attr.name.startsWith('data-')) {
|
|
163
|
+
continue;
|
|
164
|
+
}
|
|
165
|
+
const attrMatched = search.exec(attr.value);
|
|
166
|
+
if (attrMatched) {
|
|
167
|
+
for (const matched of attrMatched) {
|
|
168
|
+
result.push({
|
|
169
|
+
el: `${_el.localName}[${attr.localName}]`,
|
|
170
|
+
text: matched,
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return result;
|
|
177
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@nitpicker/analyze-search",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "Nitpicker plugin for keyword and CSS selector search",
|
|
5
5
|
"author": "D-ZERO",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -18,8 +18,8 @@
|
|
|
18
18
|
"type": "module",
|
|
19
19
|
"exports": {
|
|
20
20
|
".": {
|
|
21
|
-
"import": "./lib/
|
|
22
|
-
"types": "./lib/
|
|
21
|
+
"import": "./lib/search-plugin.js",
|
|
22
|
+
"types": "./lib/search-plugin.d.ts"
|
|
23
23
|
}
|
|
24
24
|
},
|
|
25
25
|
"scripts": {
|
|
@@ -27,11 +27,11 @@
|
|
|
27
27
|
"clean": "tsc --build --clean"
|
|
28
28
|
},
|
|
29
29
|
"dependencies": {
|
|
30
|
-
"@d-zero/shared": "0.20.
|
|
31
|
-
"@nitpicker/core": "0.
|
|
32
|
-
"@nitpicker/crawler": "0.
|
|
33
|
-
"@nitpicker/types": "0.
|
|
30
|
+
"@d-zero/shared": "0.20.1",
|
|
31
|
+
"@nitpicker/core": "0.6.0",
|
|
32
|
+
"@nitpicker/crawler": "0.6.0",
|
|
33
|
+
"@nitpicker/types": "0.6.0",
|
|
34
34
|
"jsdom": "28.1.0"
|
|
35
35
|
},
|
|
36
|
-
"gitHead": "
|
|
36
|
+
"gitHead": "eab407f5e4b58fa3c122001d3c034488e7f6da11"
|
|
37
37
|
}
|