@yegor256/dogent 0.9.1 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -12
- package/package.json +3 -2
- package/src/args.js +35 -4
- package/src/defaults.js +47 -0
- package/src/dogent.js +42 -16
- package/src/openai.js +8 -5
- package/src/prompt.js +0 -4
- package/src/report.js +8 -2
- package/src/rules/ambiguous-or.js +58 -0
- package/src/rules/budget.js +50 -0
- package/src/rules/concise.js +48 -0
- package/src/rules/conditional.js +55 -0
- package/src/rules/consistent.js +1 -1
- package/src/rules/counter-example.js +60 -0
- package/src/rules/default.js +60 -0
- package/src/rules/description-length.js +64 -0
- package/src/rules/description-voice.js +67 -0
- package/src/rules/done.js +53 -0
- package/src/rules/duplicate-section.js +65 -0
- package/src/rules/emoji.js +60 -0
- package/src/rules/emphasis.js +81 -0
- package/src/rules/example-format.js +32 -0
- package/src/rules/example.js +60 -0
- package/src/rules/external-link.js +57 -0
- package/src/rules/fence-language.js +55 -0
- package/src/rules/format.js +68 -0
- package/src/rules/hidden-char.js +61 -0
- package/src/rules/homoglyph.js +82 -0
- package/src/rules/index.js +80 -0
- package/src/rules/inline-code.js +79 -0
- package/src/rules/jargon.js +115 -0
- package/src/rules/meta-reference.js +57 -0
- package/src/rules/ordered.js +57 -0
- package/src/rules/persona.js +55 -0
- package/src/rules/placement.js +62 -0
- package/src/rules/positive.js +57 -0
- package/src/rules/pseudo-heading.js +55 -0
- package/src/rules/quantifier.js +63 -0
- package/src/rules/rationale.js +54 -0
- package/src/rules/referential.js +67 -0
- package/src/rules/scope.js +31 -0
- package/src/rules/self-contained.js +66 -0
- package/src/rules/stale.js +62 -0
- package/src/rules/terms.js +77 -0
- package/src/rules/tool-clarity.js +61 -0
- package/src/rules/transition.js +59 -0
- package/src/rules/units.js +81 -0
- package/src/rules/untrusted.js +59 -0
- package/src/rules/vague.js +63 -0
- package/src/rules/weak-verb.js +62 -0
- package/src/version.js +2 -2
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
const mask = require('../mask');
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* ExternalLink.
|
|
14
|
+
*
|
|
15
|
+
* Flags a bare http(s):// URL sitting in prose or a bullet item, where
|
|
16
|
+
* the page behind it may rot or inject hidden instructions. Durable
|
|
17
|
+
* guidance belongs inlined, not fetched at run time. A URL inside
|
|
18
|
+
* inline code or a fenced snippet is exempt, since those are examples.
|
|
19
|
+
* Distinct from dead-import, which targets local @path imports; this
|
|
20
|
+
* one complements untrusted and stale.
|
|
21
|
+
*/
|
|
22
|
+
class ExternalLink {
|
|
23
|
+
constructor() {
|
|
24
|
+
this.id = 'external-link';
|
|
25
|
+
}
|
|
26
|
+
prompt() {
|
|
27
|
+
return `${this.id}: judge whether an external link is load-bearing, and flag durable guidance that should be inlined instead`;
|
|
28
|
+
}
|
|
29
|
+
violations(document) {
|
|
30
|
+
const uri = document.uri();
|
|
31
|
+
return document.walk({
|
|
32
|
+
header: () => [],
|
|
33
|
+
prose: (text, line) => this.scan(text, line, uri),
|
|
34
|
+
snippet: () => [],
|
|
35
|
+
bullets: () => [],
|
|
36
|
+
frontmatter: () => []
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
scan(text, line, uri) {
|
|
40
|
+
const found = [];
|
|
41
|
+
const regex = /(?:https?:\/\/)\S+/giu;
|
|
42
|
+
const masked = mask(text);
|
|
43
|
+
let hit = regex.exec(masked);
|
|
44
|
+
while (hit !== null) {
|
|
45
|
+
found.push(new Violation(
|
|
46
|
+
this.id,
|
|
47
|
+
'warning',
|
|
48
|
+
'external URL may rot or inject, encode durable guidance instead',
|
|
49
|
+
new Region(uri, line, hit.index + 1)
|
|
50
|
+
));
|
|
51
|
+
hit = regex.exec(masked);
|
|
52
|
+
}
|
|
53
|
+
return found;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
module.exports = ExternalLink;
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* FenceLanguage.
|
|
13
|
+
*
|
|
14
|
+
* Demands that every fenced code block declare a language right after
|
|
15
|
+
* its opening fence. A bare fence of backticks or tildes with no info
|
|
16
|
+
* string leaves readers and tooling guessing at the snippet's syntax,
|
|
17
|
+
* so it earns a warning. A fence that names a language stays clean.
|
|
18
|
+
*
|
|
19
|
+
* The check is standalone and deterministic, so prompt() returns an
|
|
20
|
+
* empty string and the AI oracle never re-checks this rule.
|
|
21
|
+
*/
|
|
22
|
+
class FenceLanguage {
|
|
23
|
+
constructor() {
|
|
24
|
+
this.id = 'fence-language';
|
|
25
|
+
this.fence = /^\s*(?:```|~~~)\s*(?<lang>\S*)/u;
|
|
26
|
+
}
|
|
27
|
+
prompt() {
|
|
28
|
+
return '';
|
|
29
|
+
}
|
|
30
|
+
violations(document) {
|
|
31
|
+
const uri = document.uri();
|
|
32
|
+
return document.walk({
|
|
33
|
+
header: () => [],
|
|
34
|
+
prose: () => [],
|
|
35
|
+
snippet: (content, row) => this.scan(content, row, uri),
|
|
36
|
+
bullets: () => [],
|
|
37
|
+
frontmatter: () => []
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
scan(content, row, uri) {
|
|
41
|
+
const [first] = content.split('\n');
|
|
42
|
+
const hit = this.fence.exec(first);
|
|
43
|
+
if (hit !== null && hit.groups.lang !== '') {
|
|
44
|
+
return [];
|
|
45
|
+
}
|
|
46
|
+
return [new Violation(
|
|
47
|
+
this.id,
|
|
48
|
+
'warning',
|
|
49
|
+
'fenced block has no language tag, declare one',
|
|
50
|
+
new Region(uri, row, 1)
|
|
51
|
+
)];
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
module.exports = FenceLanguage;
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Format.
|
|
13
|
+
*
|
|
14
|
+
* Demands that a SKILL.md which produces output pin down that output's
|
|
15
|
+
* shape. Structured-output generation grows far more reliable when the
|
|
16
|
+
* expected format is declared and shown, while leaving it implicit
|
|
17
|
+
* produces brittle, drifting output. A standalone checker flags a skill
|
|
18
|
+
* whose instructions describe producing output (verbs like "produce",
|
|
19
|
+
* "output", "return", "generate", "write", "emit") yet no section or
|
|
20
|
+
* snippet declares the output shape. This is distinct from the example
|
|
21
|
+
* rule: an example shows one case, a format spec defines the contract.
|
|
22
|
+
* Its prompt asks the AI oracle whether the declared format is concrete
|
|
23
|
+
* enough to be machine-checkable.
|
|
24
|
+
*/
|
|
25
|
+
class Format {
|
|
26
|
+
constructor() {
|
|
27
|
+
this.id = 'format';
|
|
28
|
+
}
|
|
29
|
+
prompt() {
|
|
30
|
+
return `${this.id}: in a SKILL.md, judge whether the declared output format is concrete and machine-checkable, and flag a generating skill that pins down no format`;
|
|
31
|
+
}
|
|
32
|
+
violations(document) {
|
|
33
|
+
const uri = document.uri();
|
|
34
|
+
if (uri.replace(/^.*\//u, '') !== 'SKILL.md') {
|
|
35
|
+
return [];
|
|
36
|
+
}
|
|
37
|
+
const heading = /^#{1,6}\s+.*\b(?:format|schema|structure|output)\b/iu;
|
|
38
|
+
const verb = /\b(?:produces?|outputs?|returns?|generates?|writes?|emits?)\b/iu;
|
|
39
|
+
const signals = document.walk({
|
|
40
|
+
header: (text) => {
|
|
41
|
+
if (heading.test(text)) {
|
|
42
|
+
return ['declared'];
|
|
43
|
+
}
|
|
44
|
+
return [];
|
|
45
|
+
},
|
|
46
|
+
prose: (text) => {
|
|
47
|
+
if (verb.test(text)) {
|
|
48
|
+
return ['generates'];
|
|
49
|
+
}
|
|
50
|
+
return [];
|
|
51
|
+
},
|
|
52
|
+
snippet: () => ['declared'],
|
|
53
|
+
bullets: () => [],
|
|
54
|
+
frontmatter: () => []
|
|
55
|
+
});
|
|
56
|
+
if (!signals.includes('generates') || signals.includes('declared')) {
|
|
57
|
+
return [];
|
|
58
|
+
}
|
|
59
|
+
return [new Violation(
|
|
60
|
+
this.id,
|
|
61
|
+
'warning',
|
|
62
|
+
'SKILL.md generates output but never declares its format',
|
|
63
|
+
new Region(uri, 1, 1)
|
|
64
|
+
)];
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
module.exports = Format;
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* HiddenChar.
|
|
13
|
+
*
|
|
14
|
+
* Demands that every line carry only visible characters, rejecting any
|
|
15
|
+
* invisible or control codepoint that hides inside the text. Scans every
|
|
16
|
+
* fragment, including snippets, because a zero-width space, a bidirectional
|
|
17
|
+
* override, or a variation selector tucked into code is just as dangerous as
|
|
18
|
+
* one tucked into prose. Flags zero-width characters, bidi controls, and
|
|
19
|
+
* variation selectors, naming each by its hex codepoint so it can be deleted.
|
|
20
|
+
*
|
|
21
|
+
* The check is standalone and deterministic, so prompt() returns an
|
|
22
|
+
* empty string and the AI oracle never re-checks this rule.
|
|
23
|
+
*/
|
|
24
|
+
class HiddenChar {
|
|
25
|
+
constructor() {
|
|
26
|
+
this.id = 'hidden-char';
|
|
27
|
+
this.hidden = /[\u200B-\u200D\uFEFF\u202A-\u202E\u2066-\u2069\uFE00-\uFE0F\u{E0100}-\u{E01EF}]/gu;
|
|
28
|
+
}
|
|
29
|
+
prompt() {
|
|
30
|
+
return '';
|
|
31
|
+
}
|
|
32
|
+
violations(document) {
|
|
33
|
+
const uri = document.uri();
|
|
34
|
+
return document.walk({
|
|
35
|
+
header: (text, line) => this.scan(text, line, uri),
|
|
36
|
+
prose: (text, line) => this.scan(text, line, uri),
|
|
37
|
+
snippet: (text, line) => this.scan(text, line, uri),
|
|
38
|
+
bullets: () => [],
|
|
39
|
+
frontmatter: () => []
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
scan(text, line, uri) {
|
|
43
|
+
const found = [];
|
|
44
|
+
this.hidden.lastIndex = 0;
|
|
45
|
+
let hit = this.hidden.exec(text);
|
|
46
|
+
while (hit !== null) {
|
|
47
|
+
const hex = hit[0].codePointAt(0).toString(16).toUpperCase();
|
|
48
|
+
const code = hex.padStart(4, '0');
|
|
49
|
+
found.push(new Violation(
|
|
50
|
+
this.id,
|
|
51
|
+
'error',
|
|
52
|
+
`invisible character U+${code} found, delete it`,
|
|
53
|
+
new Region(uri, line, hit.index + 1)
|
|
54
|
+
));
|
|
55
|
+
hit = this.hidden.exec(text);
|
|
56
|
+
}
|
|
57
|
+
return found;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
module.exports = HiddenChar;
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
const mask = require('../mask');
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Homoglyph.
|
|
14
|
+
*
|
|
15
|
+
* Rejects mixed-script look-alike characters that masquerade as plain
|
|
16
|
+
* ASCII. A token mixing an ASCII Latin letter with a confusable from
|
|
17
|
+
* Cyrillic, Greek, or full-width Latin reads as one word yet hides a
|
|
18
|
+
* foreign codepoint, so it slips past humans while breaking tools. The
|
|
19
|
+
* check flags every such confusable character at its own column. Inline
|
|
20
|
+
* code is masked first, so a deliberately quoted example stays clean.
|
|
21
|
+
*
|
|
22
|
+
* The check is standalone and deterministic, so prompt() returns an
|
|
23
|
+
* empty string and the AI oracle never re-checks this rule.
|
|
24
|
+
*/
|
|
25
|
+
class Homoglyph {
|
|
26
|
+
constructor() {
|
|
27
|
+
this.id = 'homoglyph';
|
|
28
|
+
this.latin = /[A-Za-z]/u;
|
|
29
|
+
this.confusable = /[Ѐ-ӿͰ-Ͽ-]/u;
|
|
30
|
+
}
|
|
31
|
+
prompt() {
|
|
32
|
+
return '';
|
|
33
|
+
}
|
|
34
|
+
violations(document) {
|
|
35
|
+
const uri = document.uri();
|
|
36
|
+
return document.walk({
|
|
37
|
+
header: (text, line) => this.scan(text, line, uri),
|
|
38
|
+
prose: (text, line) => this.scan(text, line, uri),
|
|
39
|
+
snippet: () => [],
|
|
40
|
+
bullets: () => [],
|
|
41
|
+
frontmatter: () => []
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
scan(text, line, uri) {
|
|
45
|
+
const clean = mask(text);
|
|
46
|
+
const result = [];
|
|
47
|
+
const token = /\S+/gu;
|
|
48
|
+
let match = token.exec(clean);
|
|
49
|
+
while (match !== null) {
|
|
50
|
+
const [word] = match;
|
|
51
|
+
if (this.latin.test(word) && this.confusable.test(word)) {
|
|
52
|
+
this.flag(word, match.index).forEach((spot) => {
|
|
53
|
+
result.push(new Violation(
|
|
54
|
+
this.id,
|
|
55
|
+
'error',
|
|
56
|
+
`mixed-script character "${spot.char}" (U+${spot.point}) found, use plain ASCII`,
|
|
57
|
+
new Region(uri, line, spot.column)
|
|
58
|
+
));
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
match = token.exec(clean);
|
|
62
|
+
}
|
|
63
|
+
return result;
|
|
64
|
+
}
|
|
65
|
+
flag(word, start) {
|
|
66
|
+
const spots = [];
|
|
67
|
+
[...word].forEach((char, offset) => {
|
|
68
|
+
if (!this.confusable.test(char)) {
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
const point = char
|
|
72
|
+
.codePointAt(0)
|
|
73
|
+
.toString(16)
|
|
74
|
+
.toUpperCase()
|
|
75
|
+
.padStart(4, '0');
|
|
76
|
+
spots.push({char, point, column: start + offset + 1});
|
|
77
|
+
});
|
|
78
|
+
return spots;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
module.exports = Homoglyph;
|
package/src/rules/index.js
CHANGED
|
@@ -21,6 +21,7 @@ const NameMatchesDir = require('./name-matches-dir');
|
|
|
21
21
|
const Polite = require('./polite');
|
|
22
22
|
const Unfinished = require('./unfinished');
|
|
23
23
|
const Crowded = require('./crowded');
|
|
24
|
+
const Budget = require('./budget');
|
|
24
25
|
const DescriptionTriggers = require('./description-triggers');
|
|
25
26
|
const Atomic = require('./atomic');
|
|
26
27
|
const Hedging = require('./hedging');
|
|
@@ -29,6 +30,45 @@ const Unique = require('./unique');
|
|
|
29
30
|
const Consistent = require('./consistent');
|
|
30
31
|
const Simple = require('./simple');
|
|
31
32
|
const SectionLevel = require('./section-level');
|
|
33
|
+
const Format = require('./format');
|
|
34
|
+
const Untrusted = require('./untrusted');
|
|
35
|
+
const Ordered = require('./ordered');
|
|
36
|
+
const Emphasis = require('./emphasis');
|
|
37
|
+
const Persona = require('./persona');
|
|
38
|
+
const Concise = require('./concise');
|
|
39
|
+
const Example = require('./example');
|
|
40
|
+
const Referential = require('./referential');
|
|
41
|
+
const Vague = require('./vague');
|
|
42
|
+
const Positive = require('./positive');
|
|
43
|
+
const Done = require('./done');
|
|
44
|
+
const Terms = require('./terms');
|
|
45
|
+
const Jargon = require('./jargon');
|
|
46
|
+
const PseudoHeading = require('./pseudo-heading');
|
|
47
|
+
const Stale = require('./stale');
|
|
48
|
+
const ToolClarity = require('./tool-clarity');
|
|
49
|
+
const CounterExample = require('./counter-example');
|
|
50
|
+
const Rationale = require('./rationale');
|
|
51
|
+
const SelfContained = require('./self-contained');
|
|
52
|
+
const Quantifier = require('./quantifier');
|
|
53
|
+
const WeakVerb = require('./weak-verb');
|
|
54
|
+
const Default = require('./default');
|
|
55
|
+
const MetaReference = require('./meta-reference');
|
|
56
|
+
const AmbiguousOr = require('./ambiguous-or');
|
|
57
|
+
const ExternalLink = require('./external-link');
|
|
58
|
+
const Conditional = require('./conditional');
|
|
59
|
+
const Transition = require('./transition');
|
|
60
|
+
const Placement = require('./placement');
|
|
61
|
+
const InlineCode = require('./inline-code');
|
|
62
|
+
const Emoji = require('./emoji');
|
|
63
|
+
const Homoglyph = require('./homoglyph');
|
|
64
|
+
const DuplicateSection = require('./duplicate-section');
|
|
65
|
+
const DescriptionVoice = require('./description-voice');
|
|
66
|
+
const ExampleFormat = require('./example-format');
|
|
67
|
+
const DescriptionLength = require('./description-length');
|
|
68
|
+
const Scope = require('./scope');
|
|
69
|
+
const HiddenChar = require('./hidden-char');
|
|
70
|
+
const Units = require('./units');
|
|
71
|
+
const FenceLanguage = require('./fence-language');
|
|
32
72
|
|
|
33
73
|
module.exports = () => [
|
|
34
74
|
new Grouped(),
|
|
@@ -37,6 +77,7 @@ module.exports = () => [
|
|
|
37
77
|
new SectionLevel(),
|
|
38
78
|
new LineLength(80),
|
|
39
79
|
new TokenCount(4000),
|
|
80
|
+
new Concise(200),
|
|
40
81
|
new NoArticles(),
|
|
41
82
|
new Command(),
|
|
42
83
|
new Punctuation(),
|
|
@@ -44,14 +85,53 @@ module.exports = () => [
|
|
|
44
85
|
new Redundant(),
|
|
45
86
|
new Consistent(),
|
|
46
87
|
new Simple(),
|
|
88
|
+
new Referential(),
|
|
47
89
|
new NameMatchesDir(),
|
|
48
90
|
new Polite(),
|
|
49
91
|
new Unfinished(),
|
|
50
92
|
new Crowded(10),
|
|
93
|
+
new Budget(60),
|
|
51
94
|
new DescriptionTriggers(),
|
|
95
|
+
new Example(),
|
|
96
|
+
new Format(),
|
|
52
97
|
new Atomic(),
|
|
98
|
+
new Ordered(),
|
|
53
99
|
new Hedging(),
|
|
100
|
+
new Vague(),
|
|
101
|
+
new ToolClarity(),
|
|
54
102
|
new Passive(),
|
|
103
|
+
new Untrusted(),
|
|
104
|
+
new Emphasis(),
|
|
105
|
+
new Persona(),
|
|
106
|
+
new Positive(),
|
|
107
|
+
new Done(),
|
|
108
|
+
new Terms(),
|
|
109
|
+
new Jargon(),
|
|
110
|
+
new PseudoHeading(),
|
|
111
|
+
new Stale(),
|
|
112
|
+
new CounterExample(),
|
|
113
|
+
new Rationale(),
|
|
114
|
+
new SelfContained(),
|
|
115
|
+
new Quantifier(),
|
|
116
|
+
new WeakVerb(),
|
|
117
|
+
new Default(),
|
|
118
|
+
new MetaReference(),
|
|
119
|
+
new AmbiguousOr(),
|
|
120
|
+
new ExternalLink(),
|
|
121
|
+
new Conditional(),
|
|
122
|
+
new Transition(),
|
|
123
|
+
new Placement(),
|
|
124
|
+
new InlineCode(),
|
|
125
|
+
new Emoji(),
|
|
126
|
+
new Homoglyph(),
|
|
127
|
+
new DuplicateSection(),
|
|
128
|
+
new DescriptionVoice(),
|
|
129
|
+
new ExampleFormat(),
|
|
130
|
+
new DescriptionLength(),
|
|
131
|
+
new Scope(),
|
|
132
|
+
new HiddenChar(),
|
|
133
|
+
new Units(),
|
|
134
|
+
new FenceLanguage(),
|
|
55
135
|
new Unique(),
|
|
56
136
|
new Frontmatter(
|
|
57
137
|
'SKILL.md',
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
const mask = require('../mask');
|
|
11
|
+
|
|
12
|
+
const PATTERNS = [
|
|
13
|
+
/\b(?:npm|npx|node|git|eslint|mocha|yarn|pnpm|cd|rm|mkdir|chmod|cat|sed|grep|curl|docker)\s+[\w./-]+/gu,
|
|
14
|
+
/(?<![\w/.@])[\w-]+(?:\/[\w.-]+)+/gu,
|
|
15
|
+
/(?<![\w/.@])[\w-]+\.(?:js|ts|jsx|tsx|json|md|ya?ml|sh|py|rb|go|rs|toml|cfg|lock|txt|xml|html|css)\b/gu,
|
|
16
|
+
/(?<![\w-])(?:--[A-Za-z][\w-]*|-[A-Za-z])(?![\w])/gu
|
|
17
|
+
];
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* InlineCode.
|
|
21
|
+
*
|
|
22
|
+
* When a command, path, filename, or flag sits bare in prose, the model
|
|
23
|
+
* cannot cleanly tell the literal token from the surrounding words and
|
|
24
|
+
* may reword or reformat it. Markdown inline code marks such a token as
|
|
25
|
+
* literal, and consistent code-versus-prose marking measurably lowers
|
|
26
|
+
* misinterpretation. This standalone check flags a bare literal — a
|
|
27
|
+
* slashed path, a filename carrying a known extension, a CLI flag, or a
|
|
28
|
+
* known shell command followed by an argument — once its inline-code
|
|
29
|
+
* spans are masked away, so an already-backticked literal passes. It
|
|
30
|
+
* leaves @-imports to the dead-import rule. Its prompt hands borderline
|
|
31
|
+
* literals to the AI oracle.
|
|
32
|
+
*/
|
|
33
|
+
class InlineCode {
|
|
34
|
+
constructor() {
|
|
35
|
+
this.id = 'inline-code';
|
|
36
|
+
}
|
|
37
|
+
prompt() {
|
|
38
|
+
return `${this.id}: flag a bare literal token (command, path, filename, or flag) that should be wrapped in backticks, judging borderline cases`;
|
|
39
|
+
}
|
|
40
|
+
violations(document) {
|
|
41
|
+
const uri = document.uri();
|
|
42
|
+
return document.walk({
|
|
43
|
+
header: () => [],
|
|
44
|
+
prose: (text, line) => this.scan(text, line, uri),
|
|
45
|
+
snippet: () => [],
|
|
46
|
+
bullets: () => [],
|
|
47
|
+
frontmatter: () => []
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
scan(text, line, uri) {
|
|
51
|
+
const masked = mask(text);
|
|
52
|
+
const spans = [];
|
|
53
|
+
PATTERNS.forEach((pattern) => {
|
|
54
|
+
let hit = pattern.exec(masked);
|
|
55
|
+
while (hit !== null) {
|
|
56
|
+
spans.push({token: hit[0], from: hit.index, to: hit.index + hit[0].length});
|
|
57
|
+
hit = pattern.exec(masked);
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
return InlineCode.prune(spans).map((span) => new Violation(
|
|
61
|
+
this.id,
|
|
62
|
+
'warning',
|
|
63
|
+
`literal "${span.token}" must be wrapped in backticks`,
|
|
64
|
+
new Region(uri, line, span.from + 1)
|
|
65
|
+
));
|
|
66
|
+
}
|
|
67
|
+
static prune(spans) {
|
|
68
|
+
const ordered = spans.slice().sort((one, two) => one.from - two.from || two.to - one.to);
|
|
69
|
+
const kept = [];
|
|
70
|
+
ordered.forEach((span) => {
|
|
71
|
+
if (!kept.some((other) => span.from >= other.from && span.to <= other.to)) {
|
|
72
|
+
kept.push(span);
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
return kept;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
module.exports = InlineCode;
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
const mask = require('../mask');
|
|
11
|
+
|
|
12
|
+
const ALLOWLIST = new Set([
|
|
13
|
+
'AI',
|
|
14
|
+
'CI',
|
|
15
|
+
'CD',
|
|
16
|
+
'CLI',
|
|
17
|
+
'API',
|
|
18
|
+
'URL',
|
|
19
|
+
'URI',
|
|
20
|
+
'HTTP',
|
|
21
|
+
'HTTPS',
|
|
22
|
+
'JSON',
|
|
23
|
+
'YAML',
|
|
24
|
+
'XML',
|
|
25
|
+
'HTML',
|
|
26
|
+
'CSS',
|
|
27
|
+
'SQL',
|
|
28
|
+
'ID',
|
|
29
|
+
'OK',
|
|
30
|
+
'OS',
|
|
31
|
+
'IO',
|
|
32
|
+
'NPM',
|
|
33
|
+
'PR',
|
|
34
|
+
'MIT',
|
|
35
|
+
'SARIF',
|
|
36
|
+
'SKILL',
|
|
37
|
+
'CLAUDE'
|
|
38
|
+
]);
|
|
39
|
+
|
|
40
|
+
const initials = (gloss) => (gloss.match(/[A-Za-z]+/gu) || [])
|
|
41
|
+
.map((word) => word[0].toUpperCase())
|
|
42
|
+
.join('');
|
|
43
|
+
|
|
44
|
+
const defined = (masked) => {
|
|
45
|
+
const found = new Set();
|
|
46
|
+
const regex = /\b(?<acronym>[A-Z]{2,})\s*\(|\((?<gloss>[^)]+)\)/gu;
|
|
47
|
+
let hit = regex.exec(masked);
|
|
48
|
+
while (hit !== null) {
|
|
49
|
+
if (hit.groups.acronym) {
|
|
50
|
+
found.add(hit.groups.acronym);
|
|
51
|
+
} else {
|
|
52
|
+
found.add(initials(hit.groups.gloss));
|
|
53
|
+
}
|
|
54
|
+
hit = regex.exec(masked);
|
|
55
|
+
}
|
|
56
|
+
return found;
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
const undefining = (acronym, scope) => !scope.known.has(acronym) &&
|
|
60
|
+
!ALLOWLIST.has(acronym);
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Jargon.
|
|
64
|
+
*
|
|
65
|
+
* Flags an acronym that lands in prose without ever being expanded. An
|
|
66
|
+
* acronym counts as defined when the document, anywhere, follows it with
|
|
67
|
+
* a parenthetical gloss, as in "RBAC (role-based access control)", or when
|
|
68
|
+
* a parenthetical's word initials spell it, as in "AAA pattern
|
|
69
|
+
* (Arrange-Act-Assert)", so a single expansion licenses every later
|
|
70
|
+
* mention. Well-known acronyms sit
|
|
71
|
+
* in a built-in allowlist and pass untouched. Only the first unexpanded
|
|
72
|
+
* occurrence of each acronym is reported. Its prompt hands non-acronym
|
|
73
|
+
* domain jargon, the rare nouns a reader cannot parse, to the AI oracle.
|
|
74
|
+
*/
|
|
75
|
+
class Jargon {
|
|
76
|
+
constructor() {
|
|
77
|
+
this.id = 'jargon';
|
|
78
|
+
}
|
|
79
|
+
prompt() {
|
|
80
|
+
return `${this.id}: flag non-acronym domain jargon, rare nouns a fresh reader cannot parse, and ask for a plain-word definition on first use`;
|
|
81
|
+
}
|
|
82
|
+
violations(document) {
|
|
83
|
+
const uri = document.uri();
|
|
84
|
+
const known = defined(mask(document.text()));
|
|
85
|
+
const seen = new Set();
|
|
86
|
+
return document.walk({
|
|
87
|
+
header: () => [],
|
|
88
|
+
prose: (text, line) => this.scan(text, line, {uri, known, seen}),
|
|
89
|
+
snippet: () => [],
|
|
90
|
+
bullets: () => [],
|
|
91
|
+
frontmatter: () => []
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
scan(text, line, scope) {
|
|
95
|
+
const hits = [...mask(text).matchAll(/\b[A-Z]{2,}\b/gu)];
|
|
96
|
+
return hits.reduce((found, hit) => {
|
|
97
|
+
const [acronym] = hit;
|
|
98
|
+
const novel = !scope.seen.has(acronym) && undefining(acronym, scope);
|
|
99
|
+
scope.seen.add(acronym);
|
|
100
|
+
return novel
|
|
101
|
+
? found.concat(this.flag(acronym, new Region(scope.uri, line, hit.index + 1)))
|
|
102
|
+
: found;
|
|
103
|
+
}, []);
|
|
104
|
+
}
|
|
105
|
+
flag(acronym, region) {
|
|
106
|
+
return new Violation(
|
|
107
|
+
this.id,
|
|
108
|
+
'warning',
|
|
109
|
+
`acronym "${acronym}" never expanded, define it on first use`,
|
|
110
|
+
region
|
|
111
|
+
);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
module.exports = Jargon;
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
|
|
3
|
+
* SPDX-License-Identifier: MIT
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
'use strict';
|
|
7
|
+
|
|
8
|
+
const Violation = require('../violation');
|
|
9
|
+
const Region = require('../region');
|
|
10
|
+
const mask = require('../mask');
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* MetaReference.
|
|
14
|
+
*
|
|
15
|
+
* Flags self-referential framing of the model or the document, such as
|
|
16
|
+
* "as an AI", "you are a model", "this prompt", or "these instructions".
|
|
17
|
+
* Such framing narrates the setup instead of issuing a command, so it
|
|
18
|
+
* adds no instruction and earns deletion. Distinct from persona, which
|
|
19
|
+
* targets role assignment like "Act as a reviewer"; this one targets
|
|
20
|
+
* the model talking about itself or the document talking about itself.
|
|
21
|
+
*/
|
|
22
|
+
class MetaReference {
|
|
23
|
+
constructor() {
|
|
24
|
+
this.id = 'meta-reference';
|
|
25
|
+
this.phrase = /\b(?:as an ai|as a language model|you are an ai|you are a model|this prompt|these instructions|this manifesto|the system prompt)\b/giu;
|
|
26
|
+
}
|
|
27
|
+
prompt() {
|
|
28
|
+
return `${this.id}: flag self-referential framing of the model or document beyond the fixed list, and delete it`;
|
|
29
|
+
}
|
|
30
|
+
violations(document) {
|
|
31
|
+
const uri = document.uri();
|
|
32
|
+
return document.walk({
|
|
33
|
+
header: () => [],
|
|
34
|
+
prose: (text, line) => this.scan(text, line, uri),
|
|
35
|
+
snippet: () => [],
|
|
36
|
+
bullets: () => [],
|
|
37
|
+
frontmatter: () => []
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
scan(text, line, uri) {
|
|
41
|
+
const masked = mask(text);
|
|
42
|
+
const out = [];
|
|
43
|
+
let hit = this.phrase.exec(masked);
|
|
44
|
+
while (hit !== null) {
|
|
45
|
+
out.push(new Violation(
|
|
46
|
+
this.id,
|
|
47
|
+
'warning',
|
|
48
|
+
`meta self-reference "${hit[0]}" issues no command, delete it`,
|
|
49
|
+
new Region(uri, line, hit.index + 1)
|
|
50
|
+
));
|
|
51
|
+
hit = this.phrase.exec(masked);
|
|
52
|
+
}
|
|
53
|
+
return out;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
module.exports = MetaReference;
|