@yegor256/dogent 0.10.0 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +87 -12
- package/package.json +1 -1
- package/src/args.js +18 -3
- package/src/defaults.js +47 -0
- package/src/dogent.js +19 -3
- package/src/openai.js +8 -5
- package/src/prompt.js +0 -4
- package/src/rules/ambiguous-or.js +58 -0
- package/src/rules/conditional.js +55 -0
- package/src/rules/consistent.js +1 -1
- package/src/rules/default.js +60 -0
- package/src/rules/description-length.js +64 -0
- package/src/rules/description-voice.js +67 -0
- package/src/rules/duplicate-section.js +65 -0
- package/src/rules/emoji.js +60 -0
- package/src/rules/example-format.js +32 -0
- package/src/rules/external-link.js +57 -0
- package/src/rules/fence-language.js +55 -0
- package/src/rules/hidden-char.js +61 -0
- package/src/rules/homoglyph.js +82 -0
- package/src/rules/index.js +40 -0
- package/src/rules/inline-code.js +79 -0
- package/src/rules/jargon.js +14 -4
- package/src/rules/meta-reference.js +57 -0
- package/src/rules/placement.js +62 -0
- package/src/rules/quantifier.js +63 -0
- package/src/rules/scope.js +31 -0
- package/src/rules/transition.js +59 -0
- package/src/rules/units.js +81 -0
- package/src/rules/weak-verb.js +62 -0
- package/src/version.js +2 -2
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
|
|
11
|
+
const bare = (text) => text.replace(/^#{1,6}\s*/u, '').trim();
|
|
12
|
+
|
|
13
|
+
const normalize = (text) => bare(text).toLowerCase().replace(/\s+/gu, ' ');
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* DuplicateSection.
|
|
17
|
+
*
|
|
18
|
+
* Rejects two headings that carry the same name, so each section owns
|
|
19
|
+
* a distinct title. It collects every heading in order, normalizes it
|
|
20
|
+
* by case and whitespace, then flags the second and any later twin
|
|
21
|
+
* while leaving the first occurrence clean. Distinct from unique,
|
|
22
|
+
* which targets repeated prose instructions, and from short-sections,
|
|
23
|
+
* which targets heading length; this one targets repeated heading
|
|
24
|
+
* names. Its prompt stays empty since the check is fully
|
|
25
|
+
* deterministic.
|
|
26
|
+
*/
|
|
27
|
+
class DuplicateSection {
|
|
28
|
+
constructor() {
|
|
29
|
+
this.id = 'duplicate-section';
|
|
30
|
+
}
|
|
31
|
+
prompt() {
|
|
32
|
+
return '';
|
|
33
|
+
}
|
|
34
|
+
violations(document) {
|
|
35
|
+
const uri = document.uri();
|
|
36
|
+
const headers = document.walk({
|
|
37
|
+
header: (text, row) => [{text, row}],
|
|
38
|
+
prose: () => [],
|
|
39
|
+
snippet: () => [],
|
|
40
|
+
bullets: () => [],
|
|
41
|
+
frontmatter: () => []
|
|
42
|
+
});
|
|
43
|
+
return this.repeats(uri, headers);
|
|
44
|
+
}
|
|
45
|
+
repeats(uri, headers) {
|
|
46
|
+
const seen = new Set();
|
|
47
|
+
const found = [];
|
|
48
|
+
headers.forEach((header) => {
|
|
49
|
+
const norm = normalize(header.text);
|
|
50
|
+
if (seen.has(norm)) {
|
|
51
|
+
found.push(new Violation(
|
|
52
|
+
this.id,
|
|
53
|
+
'warning',
|
|
54
|
+
`duplicate section "${bare(header.text)}", give each section a distinct name`,
|
|
55
|
+
new Region(uri, header.row, 1)
|
|
56
|
+
));
|
|
57
|
+
} else {
|
|
58
|
+
seen.add(norm);
|
|
59
|
+
}
|
|
60
|
+
});
|
|
61
|
+
return found;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
module.exports = DuplicateSection;
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
const mask = require('../mask');
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Emoji.
|
|
14
|
+
*
|
|
15
|
+
* Flags any emoji or decorative pictographic symbol that adds token
|
|
16
|
+
* noise without instruction. Inline code is masked first, so a fenced
|
|
17
|
+
* or inline example may keep a needed glyph. Distinct from homoglyph,
|
|
18
|
+
* which targets letters borrowed from other scripts; this one stays to
|
|
19
|
+
* pictographs, symbols, and dingbats only and never flags a foreign
|
|
20
|
+
* letter.
|
|
21
|
+
*
|
|
22
|
+
* The check is standalone and deterministic, so prompt() returns an
|
|
23
|
+
* empty string and the AI oracle never re-checks this rule.
|
|
24
|
+
*/
|
|
25
|
+
class Emoji {
|
|
26
|
+
constructor() {
|
|
27
|
+
this.id = 'emoji';
|
|
28
|
+
this.glyph = /[\p{Extended_Pictographic}\u{2190}-\u{21FF}\u{2300}-\u{27BF}\u{2B00}-\u{2BFF}]/gu;
|
|
29
|
+
}
|
|
30
|
+
prompt() {
|
|
31
|
+
return '';
|
|
32
|
+
}
|
|
33
|
+
violations(document) {
|
|
34
|
+
const uri = document.uri();
|
|
35
|
+
return document.walk({
|
|
36
|
+
header: (text, line) => this.scan(text, line, uri),
|
|
37
|
+
prose: (text, line) => this.scan(text, line, uri),
|
|
38
|
+
snippet: () => [],
|
|
39
|
+
bullets: () => [],
|
|
40
|
+
frontmatter: () => []
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
scan(text, line, uri) {
|
|
44
|
+
const masked = mask(text);
|
|
45
|
+
const result = [];
|
|
46
|
+
let hit = this.glyph.exec(masked);
|
|
47
|
+
while (hit !== null) {
|
|
48
|
+
result.push(new Violation(
|
|
49
|
+
this.id,
|
|
50
|
+
'warning',
|
|
51
|
+
`decorative character "${hit[0]}" adds token noise, use plain text`,
|
|
52
|
+
new Region(uri, line, hit.index + 1)
|
|
53
|
+
));
|
|
54
|
+
hit = this.glyph.exec(masked);
|
|
55
|
+
}
|
|
56
|
+
return result;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
module.exports = Emoji;
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Example format.
|
|
10
|
+
*
|
|
11
|
+
* A few-shot demonstration regulates the shape of the output more
|
|
12
|
+
* strongly than any prose, so an example that disagrees with the
|
|
13
|
+
* declared format teaches the agent the wrong shape. This rule ties the
|
|
14
|
+
* `example` and `format` rules together by checking their consistency:
|
|
15
|
+
* when one SKILL.md both shows an example and declares an output format,
|
|
16
|
+
* the two must agree. The mismatch hides between two distant fragments,
|
|
17
|
+
* so this check is pure judgement: prompt() hands the comparison to the
|
|
18
|
+
* AI oracle and violations() finds nothing on its own.
|
|
19
|
+
*/
|
|
20
|
+
class ExampleFormat {
|
|
21
|
+
constructor() {
|
|
22
|
+
this.id = 'example-format';
|
|
23
|
+
}
|
|
24
|
+
prompt() {
|
|
25
|
+
return `${this.id}: in a SKILL.md that both shows an example and declares an output format, judge whether the example conforms to the declared format and flag any mismatch`;
|
|
26
|
+
}
|
|
27
|
+
violations() {
|
|
28
|
+
return [];
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
module.exports = ExampleFormat;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
const mask = require('../mask');
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* ExternalLink.
|
|
14
|
+
*
|
|
15
|
+
* Flags a bare http(s):// URL sitting in prose or a bullet item, where
|
|
16
|
+
* the page behind it may rot or inject hidden instructions. Durable
|
|
17
|
+
* guidance belongs inlined, not fetched at run time. A URL inside
|
|
18
|
+
* inline code or a fenced snippet is exempt, since those are examples.
|
|
19
|
+
* Distinct from dead-import, which targets local @path imports; this
|
|
20
|
+
* one complements untrusted and stale.
|
|
21
|
+
*/
|
|
22
|
+
class ExternalLink {
|
|
23
|
+
constructor() {
|
|
24
|
+
this.id = 'external-link';
|
|
25
|
+
}
|
|
26
|
+
prompt() {
|
|
27
|
+
return `${this.id}: judge whether an external link is load-bearing, and flag durable guidance that should be inlined instead`;
|
|
28
|
+
}
|
|
29
|
+
violations(document) {
|
|
30
|
+
const uri = document.uri();
|
|
31
|
+
return document.walk({
|
|
32
|
+
header: () => [],
|
|
33
|
+
prose: (text, line) => this.scan(text, line, uri),
|
|
34
|
+
snippet: () => [],
|
|
35
|
+
bullets: () => [],
|
|
36
|
+
frontmatter: () => []
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
scan(text, line, uri) {
|
|
40
|
+
const found = [];
|
|
41
|
+
const regex = /(?:https?:\/\/)\S+/giu;
|
|
42
|
+
const masked = mask(text);
|
|
43
|
+
let hit = regex.exec(masked);
|
|
44
|
+
while (hit !== null) {
|
|
45
|
+
found.push(new Violation(
|
|
46
|
+
this.id,
|
|
47
|
+
'warning',
|
|
48
|
+
'external URL may rot or inject, encode durable guidance instead',
|
|
49
|
+
new Region(uri, line, hit.index + 1)
|
|
50
|
+
));
|
|
51
|
+
hit = regex.exec(masked);
|
|
52
|
+
}
|
|
53
|
+
return found;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
module.exports = ExternalLink;
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* FenceLanguage.
|
|
13
|
+
*
|
|
14
|
+
* Demands that every fenced code block declare a language right after
|
|
15
|
+
* its opening fence. A bare fence of backticks or tildes with no info
|
|
16
|
+
* string leaves readers and tooling guessing at the snippet's syntax,
|
|
17
|
+
* so it earns a warning. A fence that names a language stays clean.
|
|
18
|
+
*
|
|
19
|
+
* The check is standalone and deterministic, so prompt() returns an
|
|
20
|
+
* empty string and the AI oracle never re-checks this rule.
|
|
21
|
+
*/
|
|
22
|
+
class FenceLanguage {
|
|
23
|
+
constructor() {
|
|
24
|
+
this.id = 'fence-language';
|
|
25
|
+
this.fence = /^\s*(?:```|~~~)\s*(?<lang>\S*)/u;
|
|
26
|
+
}
|
|
27
|
+
prompt() {
|
|
28
|
+
return '';
|
|
29
|
+
}
|
|
30
|
+
violations(document) {
|
|
31
|
+
const uri = document.uri();
|
|
32
|
+
return document.walk({
|
|
33
|
+
header: () => [],
|
|
34
|
+
prose: () => [],
|
|
35
|
+
snippet: (content, row) => this.scan(content, row, uri),
|
|
36
|
+
bullets: () => [],
|
|
37
|
+
frontmatter: () => []
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
scan(content, row, uri) {
|
|
41
|
+
const [first] = content.split('\n');
|
|
42
|
+
const hit = this.fence.exec(first);
|
|
43
|
+
if (hit !== null && hit.groups.lang !== '') {
|
|
44
|
+
return [];
|
|
45
|
+
}
|
|
46
|
+
return [new Violation(
|
|
47
|
+
this.id,
|
|
48
|
+
'warning',
|
|
49
|
+
'fenced block has no language tag, declare one',
|
|
50
|
+
new Region(uri, row, 1)
|
|
51
|
+
)];
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
module.exports = FenceLanguage;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* HiddenChar.
|
|
13
|
+
*
|
|
14
|
+
* Demands that every line carry only visible characters, rejecting any
|
|
15
|
+
* invisible or control codepoint that hides inside the text. Scans every
|
|
16
|
+
* fragment, including snippets, because a zero-width space, a bidirectional
|
|
17
|
+
* override, or a variation selector tucked into code is just as dangerous as
|
|
18
|
+
* one tucked into prose. Flags zero-width characters, bidi controls, and
|
|
19
|
+
* variation selectors, naming each by its hex codepoint so it can be deleted.
|
|
20
|
+
*
|
|
21
|
+
* The check is standalone and deterministic, so prompt() returns an
|
|
22
|
+
* empty string and the AI oracle never re-checks this rule.
|
|
23
|
+
*/
|
|
24
|
+
class HiddenChar {
|
|
25
|
+
constructor() {
|
|
26
|
+
this.id = 'hidden-char';
|
|
27
|
+
this.hidden = /[\u200B-\u200D\uFEFF\u202A-\u202E\u2066-\u2069\uFE00-\uFE0F\u{E0100}-\u{E01EF}]/gu;
|
|
28
|
+
}
|
|
29
|
+
prompt() {
|
|
30
|
+
return '';
|
|
31
|
+
}
|
|
32
|
+
violations(document) {
|
|
33
|
+
const uri = document.uri();
|
|
34
|
+
return document.walk({
|
|
35
|
+
header: (text, line) => this.scan(text, line, uri),
|
|
36
|
+
prose: (text, line) => this.scan(text, line, uri),
|
|
37
|
+
snippet: (text, line) => this.scan(text, line, uri),
|
|
38
|
+
bullets: () => [],
|
|
39
|
+
frontmatter: () => []
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
scan(text, line, uri) {
|
|
43
|
+
const found = [];
|
|
44
|
+
this.hidden.lastIndex = 0;
|
|
45
|
+
let hit = this.hidden.exec(text);
|
|
46
|
+
while (hit !== null) {
|
|
47
|
+
const hex = hit[0].codePointAt(0).toString(16).toUpperCase();
|
|
48
|
+
const code = hex.padStart(4, '0');
|
|
49
|
+
found.push(new Violation(
|
|
50
|
+
this.id,
|
|
51
|
+
'error',
|
|
52
|
+
`invisible character U+${code} found, delete it`,
|
|
53
|
+
new Region(uri, line, hit.index + 1)
|
|
54
|
+
));
|
|
55
|
+
hit = this.hidden.exec(text);
|
|
56
|
+
}
|
|
57
|
+
return found;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
module.exports = HiddenChar;
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
const mask = require('../mask');
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Homoglyph.
|
|
14
|
+
*
|
|
15
|
+
* Rejects mixed-script look-alike characters that masquerade as plain
|
|
16
|
+
* ASCII. A token mixing an ASCII Latin letter with a confusable from
|
|
17
|
+
* Cyrillic, Greek, or full-width Latin reads as one word yet hides a
|
|
18
|
+
* foreign codepoint, so it slips past humans while breaking tools. The
|
|
19
|
+
* check flags every such confusable character at its own column. Inline
|
|
20
|
+
* code is masked first, so a deliberately quoted example stays clean.
|
|
21
|
+
*
|
|
22
|
+
* The check is standalone and deterministic, so prompt() returns an
|
|
23
|
+
* empty string and the AI oracle never re-checks this rule.
|
|
24
|
+
*/
|
|
25
|
+
class Homoglyph {
|
|
26
|
+
constructor() {
|
|
27
|
+
this.id = 'homoglyph';
|
|
28
|
+
this.latin = /[A-Za-z]/u;
|
|
29
|
+
this.confusable = /[Ѐ-ӿͰ-Ͽ-]/u;
|
|
30
|
+
}
|
|
31
|
+
prompt() {
|
|
32
|
+
return '';
|
|
33
|
+
}
|
|
34
|
+
violations(document) {
|
|
35
|
+
const uri = document.uri();
|
|
36
|
+
return document.walk({
|
|
37
|
+
header: (text, line) => this.scan(text, line, uri),
|
|
38
|
+
prose: (text, line) => this.scan(text, line, uri),
|
|
39
|
+
snippet: () => [],
|
|
40
|
+
bullets: () => [],
|
|
41
|
+
frontmatter: () => []
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
scan(text, line, uri) {
|
|
45
|
+
const clean = mask(text);
|
|
46
|
+
const result = [];
|
|
47
|
+
const token = /\S+/gu;
|
|
48
|
+
let match = token.exec(clean);
|
|
49
|
+
while (match !== null) {
|
|
50
|
+
const [word] = match;
|
|
51
|
+
if (this.latin.test(word) && this.confusable.test(word)) {
|
|
52
|
+
this.flag(word, match.index).forEach((spot) => {
|
|
53
|
+
result.push(new Violation(
|
|
54
|
+
this.id,
|
|
55
|
+
'error',
|
|
56
|
+
`mixed-script character "${spot.char}" (U+${spot.point}) found, use plain ASCII`,
|
|
57
|
+
new Region(uri, line, spot.column)
|
|
58
|
+
));
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
match = token.exec(clean);
|
|
62
|
+
}
|
|
63
|
+
return result;
|
|
64
|
+
}
|
|
65
|
+
flag(word, start) {
|
|
66
|
+
const spots = [];
|
|
67
|
+
[...word].forEach((char, offset) => {
|
|
68
|
+
if (!this.confusable.test(char)) {
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
const point = char
|
|
72
|
+
.codePointAt(0)
|
|
73
|
+
.toString(16)
|
|
74
|
+
.toUpperCase()
|
|
75
|
+
.padStart(4, '0');
|
|
76
|
+
spots.push({char, point, column: start + offset + 1});
|
|
77
|
+
});
|
|
78
|
+
return spots;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
module.exports = Homoglyph;
|
package/src/rules/index.js
CHANGED
|
@@ -49,6 +49,26 @@ const ToolClarity = require('./tool-clarity');
|
|
|
49
49
|
const CounterExample = require('./counter-example');
|
|
50
50
|
const Rationale = require('./rationale');
|
|
51
51
|
const SelfContained = require('./self-contained');
|
|
52
|
+
const Quantifier = require('./quantifier');
|
|
53
|
+
const WeakVerb = require('./weak-verb');
|
|
54
|
+
const Default = require('./default');
|
|
55
|
+
const MetaReference = require('./meta-reference');
|
|
56
|
+
const AmbiguousOr = require('./ambiguous-or');
|
|
57
|
+
const ExternalLink = require('./external-link');
|
|
58
|
+
const Conditional = require('./conditional');
|
|
59
|
+
const Transition = require('./transition');
|
|
60
|
+
const Placement = require('./placement');
|
|
61
|
+
const InlineCode = require('./inline-code');
|
|
62
|
+
const Emoji = require('./emoji');
|
|
63
|
+
const Homoglyph = require('./homoglyph');
|
|
64
|
+
const DuplicateSection = require('./duplicate-section');
|
|
65
|
+
const DescriptionVoice = require('./description-voice');
|
|
66
|
+
const ExampleFormat = require('./example-format');
|
|
67
|
+
const DescriptionLength = require('./description-length');
|
|
68
|
+
const Scope = require('./scope');
|
|
69
|
+
const HiddenChar = require('./hidden-char');
|
|
70
|
+
const Units = require('./units');
|
|
71
|
+
const FenceLanguage = require('./fence-language');
|
|
52
72
|
|
|
53
73
|
module.exports = () => [
|
|
54
74
|
new Grouped(),
|
|
@@ -92,6 +112,26 @@ module.exports = () => [
|
|
|
92
112
|
new CounterExample(),
|
|
93
113
|
new Rationale(),
|
|
94
114
|
new SelfContained(),
|
|
115
|
+
new Quantifier(),
|
|
116
|
+
new WeakVerb(),
|
|
117
|
+
new Default(),
|
|
118
|
+
new MetaReference(),
|
|
119
|
+
new AmbiguousOr(),
|
|
120
|
+
new ExternalLink(),
|
|
121
|
+
new Conditional(),
|
|
122
|
+
new Transition(),
|
|
123
|
+
new Placement(),
|
|
124
|
+
new InlineCode(),
|
|
125
|
+
new Emoji(),
|
|
126
|
+
new Homoglyph(),
|
|
127
|
+
new DuplicateSection(),
|
|
128
|
+
new DescriptionVoice(),
|
|
129
|
+
new ExampleFormat(),
|
|
130
|
+
new DescriptionLength(),
|
|
131
|
+
new Scope(),
|
|
132
|
+
new HiddenChar(),
|
|
133
|
+
new Units(),
|
|
134
|
+
new FenceLanguage(),
|
|
95
135
|
new Unique(),
|
|
96
136
|
new Frontmatter(
|
|
97
137
|
'SKILL.md',
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
const mask = require('../mask');
|
|
11
|
+
|
|
12
|
+
const PATTERNS = [
|
|
13
|
+
/\b(?:npm|npx|node|git|eslint|mocha|yarn|pnpm|cd|rm|mkdir|chmod|cat|sed|grep|curl|docker)\s+[\w./-]+/gu,
|
|
14
|
+
/(?<![\w/.@])[\w-]+(?:\/[\w.-]+)+/gu,
|
|
15
|
+
/(?<![\w/.@])[\w-]+\.(?:js|ts|jsx|tsx|json|md|ya?ml|sh|py|rb|go|rs|toml|cfg|lock|txt|xml|html|css)\b/gu,
|
|
16
|
+
/(?<![\w-])(?:--[A-Za-z][\w-]*|-[A-Za-z])(?![\w])/gu
|
|
17
|
+
];
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* InlineCode.
|
|
21
|
+
*
|
|
22
|
+
* When a command, path, filename, or flag sits bare in prose, the model
|
|
23
|
+
* cannot cleanly tell the literal token from the surrounding words and
|
|
24
|
+
* may reword or reformat it. Markdown inline code marks such a token as
|
|
25
|
+
* literal, and consistent code-versus-prose marking measurably lowers
|
|
26
|
+
* misinterpretation. This standalone check flags a bare literal — a
|
|
27
|
+
* slashed path, a filename carrying a known extension, a CLI flag, or a
|
|
28
|
+
* known shell command followed by an argument — once its inline-code
|
|
29
|
+
* spans are masked away, so an already-backticked literal passes. It
|
|
30
|
+
* leaves @-imports to the dead-import rule. Its prompt hands borderline
|
|
31
|
+
* literals to the AI oracle.
|
|
32
|
+
*/
|
|
33
|
+
class InlineCode {
|
|
34
|
+
constructor() {
|
|
35
|
+
this.id = 'inline-code';
|
|
36
|
+
}
|
|
37
|
+
prompt() {
|
|
38
|
+
return `${this.id}: flag a bare literal token (command, path, filename, or flag) that should be wrapped in backticks, judging borderline cases`;
|
|
39
|
+
}
|
|
40
|
+
violations(document) {
|
|
41
|
+
const uri = document.uri();
|
|
42
|
+
return document.walk({
|
|
43
|
+
header: () => [],
|
|
44
|
+
prose: (text, line) => this.scan(text, line, uri),
|
|
45
|
+
snippet: () => [],
|
|
46
|
+
bullets: () => [],
|
|
47
|
+
frontmatter: () => []
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
scan(text, line, uri) {
|
|
51
|
+
const masked = mask(text);
|
|
52
|
+
const spans = [];
|
|
53
|
+
PATTERNS.forEach((pattern) => {
|
|
54
|
+
let hit = pattern.exec(masked);
|
|
55
|
+
while (hit !== null) {
|
|
56
|
+
spans.push({token: hit[0], from: hit.index, to: hit.index + hit[0].length});
|
|
57
|
+
hit = pattern.exec(masked);
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
return InlineCode.prune(spans).map((span) => new Violation(
|
|
61
|
+
this.id,
|
|
62
|
+
'warning',
|
|
63
|
+
`literal "${span.token}" must be wrapped in backticks`,
|
|
64
|
+
new Region(uri, line, span.from + 1)
|
|
65
|
+
));
|
|
66
|
+
}
|
|
67
|
+
static prune(spans) {
|
|
68
|
+
const ordered = spans.slice().sort((one, two) => one.from - two.from || two.to - one.to);
|
|
69
|
+
const kept = [];
|
|
70
|
+
ordered.forEach((span) => {
|
|
71
|
+
if (!kept.some((other) => span.from >= other.from && span.to <= other.to)) {
|
|
72
|
+
kept.push(span);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
return kept;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
module.exports = InlineCode;
|
package/src/rules/jargon.js
CHANGED
|
@@ -37,12 +37,20 @@ const ALLOWLIST = new Set([
|
|
|
37
37
|
'CLAUDE'
|
|
38
38
|
]);
|
|
39
39
|
|
|
40
|
+
const initials = (gloss) => (gloss.match(/[A-Za-z]+/gu) || [])
|
|
41
|
+
.map((word) => word[0].toUpperCase())
|
|
42
|
+
.join('');
|
|
43
|
+
|
|
40
44
|
const defined = (masked) => {
|
|
41
45
|
const found = new Set();
|
|
42
|
-
const regex = /\b(?<acronym>[A-Z]{2,})\s*\(/gu;
|
|
46
|
+
const regex = /\b(?<acronym>[A-Z]{2,})\s*\(|\((?<gloss>[^)]+)\)/gu;
|
|
43
47
|
let hit = regex.exec(masked);
|
|
44
48
|
while (hit !== null) {
|
|
45
|
-
|
|
49
|
+
if (hit.groups.acronym) {
|
|
50
|
+
found.add(hit.groups.acronym);
|
|
51
|
+
} else {
|
|
52
|
+
found.add(initials(hit.groups.gloss));
|
|
53
|
+
}
|
|
46
54
|
hit = regex.exec(masked);
|
|
47
55
|
}
|
|
48
56
|
return found;
|
|
@@ -56,8 +64,10 @@ const undefining = (acronym, scope) => !scope.known.has(acronym) &&
|
|
|
56
64
|
*
|
|
57
65
|
* Flags an acronym that lands in prose without ever being expanded. An
|
|
58
66
|
* acronym counts as defined when the document, anywhere, follows it with
|
|
59
|
-
* a parenthetical gloss, as in "RBAC (role-based access control)",
|
|
60
|
-
*
|
|
67
|
+
* a parenthetical gloss, as in "RBAC (role-based access control)", or when
|
|
68
|
+
* a parenthetical's word initials spell it, as in "AAA pattern
|
|
69
|
+
* (Arrange-Act-Assert)", so a single expansion licenses every later
|
|
70
|
+
* mention. Well-known acronyms sit
|
|
61
71
|
* in a built-in allowlist and pass untouched. Only the first unexpanded
|
|
62
72
|
* occurrence of each acronym is reported. Its prompt hands non-acronym
|
|
63
73
|
* domain jargon, the rare nouns a reader cannot parse, to the AI oracle.
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
const mask = require('../mask');
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* MetaReference.
|
|
14
|
+
*
|
|
15
|
+
* Flags self-referential framing of the model or the document, such as
|
|
16
|
+
* "as an AI", "you are a model", "this prompt", or "these instructions".
|
|
17
|
+
* Such framing narrates the setup instead of issuing a command, so it
|
|
18
|
+
* adds no instruction and earns deletion. Distinct from persona, which
|
|
19
|
+
* targets role assignment like "Act as a reviewer"; this one targets
|
|
20
|
+
* the model talking about itself or the document talking about itself.
|
|
21
|
+
*/
|
|
22
|
+
class MetaReference {
|
|
23
|
+
constructor() {
|
|
24
|
+
this.id = 'meta-reference';
|
|
25
|
+
this.phrase = /\b(?:as an ai|as a language model|you are an ai|you are a model|this prompt|these instructions|this manifesto|the system prompt)\b/giu;
|
|
26
|
+
}
|
|
27
|
+
prompt() {
|
|
28
|
+
return `${this.id}: flag self-referential framing of the model or document beyond the fixed list, and delete it`;
|
|
29
|
+
}
|
|
30
|
+
violations(document) {
|
|
31
|
+
const uri = document.uri();
|
|
32
|
+
return document.walk({
|
|
33
|
+
header: () => [],
|
|
34
|
+
prose: (text, line) => this.scan(text, line, uri),
|
|
35
|
+
snippet: () => [],
|
|
36
|
+
bullets: () => [],
|
|
37
|
+
frontmatter: () => []
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
scan(text, line, uri) {
|
|
41
|
+
const masked = mask(text);
|
|
42
|
+
const out = [];
|
|
43
|
+
let hit = this.phrase.exec(masked);
|
|
44
|
+
while (hit !== null) {
|
|
45
|
+
out.push(new Violation(
|
|
46
|
+
this.id,
|
|
47
|
+
'warning',
|
|
48
|
+
`meta self-reference "${hit[0]}" issues no command, delete it`,
|
|
49
|
+
new Region(uri, line, hit.index + 1)
|
|
50
|
+
));
|
|
51
|
+
hit = this.phrase.exec(masked);
|
|
52
|
+
}
|
|
53
|
+
return out;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
module.exports = MetaReference;
|