@yegor256/dogent 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,65 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
3
+ * SPDX-License-Identifier: MIT
4
+ */
5
+
6
+ 'use strict';
7
+
8
+ const Violation = require('../violation');
9
+ const Region = require('../region');
10
+
11
+ const bare = (text) => text.replace(/^#{1,6}\s*/u, '').trim();
12
+
13
+ const normalize = (text) => bare(text).toLowerCase().replace(/\s+/gu, ' ');
14
+
15
+ /**
16
+ * DuplicateSection.
17
+ *
18
+ * Rejects two headings that carry the same name, so each section owns
19
+ * a distinct title. It collects every heading in order, normalizes it
20
+ * by case and whitespace, then flags the second and any later twin
21
+ * while leaving the first occurrence clean. Distinct from unique,
22
+ * which targets repeated prose instructions, and from short-sections,
23
+ * which targets heading length; this one targets repeated heading
24
+ * names. Its prompt stays empty since the check is fully
25
+ * deterministic.
26
+ */
27
+ class DuplicateSection {
28
+ constructor() {
29
+ this.id = 'duplicate-section';
30
+ }
31
+ prompt() {
32
+ return '';
33
+ }
34
+ violations(document) {
35
+ const uri = document.uri();
36
+ const headers = document.walk({
37
+ header: (text, row) => [{text, row}],
38
+ prose: () => [],
39
+ snippet: () => [],
40
+ bullets: () => [],
41
+ frontmatter: () => []
42
+ });
43
+ return this.repeats(uri, headers);
44
+ }
45
+ repeats(uri, headers) {
46
+ const seen = new Set();
47
+ const found = [];
48
+ headers.forEach((header) => {
49
+ const norm = normalize(header.text);
50
+ if (seen.has(norm)) {
51
+ found.push(new Violation(
52
+ this.id,
53
+ 'warning',
54
+ `duplicate section "${bare(header.text)}", give each section a distinct name`,
55
+ new Region(uri, header.row, 1)
56
+ ));
57
+ } else {
58
+ seen.add(norm);
59
+ }
60
+ });
61
+ return found;
62
+ }
63
+ }
64
+
65
+ module.exports = DuplicateSection;
@@ -0,0 +1,60 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
3
+ * SPDX-License-Identifier: MIT
4
+ */
5
+
6
+ 'use strict';
7
+
8
+ const Violation = require('../violation');
9
+ const Region = require('../region');
10
+ const mask = require('../mask');
11
+
12
+ /**
13
+ * Emoji.
14
+ *
15
+ * Flags any emoji or decorative pictographic symbol that adds token
16
+ * noise without instruction. Inline code is masked first, so a fenced
17
+ * or inline example may keep a needed glyph. Distinct from homoglyph,
18
+ * which targets letters borrowed from other scripts; this one stays to
19
+ * pictographs, symbols, and dingbats only and never flags a foreign
20
+ * letter.
21
+ *
22
+ * The check is standalone and deterministic, so prompt() returns an
23
+ * empty string and the AI oracle never re-checks this rule.
24
+ */
25
+ class Emoji {
26
+ constructor() {
27
+ this.id = 'emoji';
28
+ this.glyph = /[\p{Extended_Pictographic}\u{2190}-\u{21FF}\u{2300}-\u{27BF}\u{2B00}-\u{2BFF}]/gu;
29
+ }
30
+ prompt() {
31
+ return '';
32
+ }
33
+ violations(document) {
34
+ const uri = document.uri();
35
+ return document.walk({
36
+ header: (text, line) => this.scan(text, line, uri),
37
+ prose: (text, line) => this.scan(text, line, uri),
38
+ snippet: () => [],
39
+ bullets: () => [],
40
+ frontmatter: () => []
41
+ });
42
+ }
43
+ scan(text, line, uri) {
44
+ const masked = mask(text);
45
+ const result = [];
46
+ let hit = this.glyph.exec(masked);
47
+ while (hit !== null) {
48
+ result.push(new Violation(
49
+ this.id,
50
+ 'warning',
51
+ `decorative character "${hit[0]}" adds token noise, use plain text`,
52
+ new Region(uri, line, hit.index + 1)
53
+ ));
54
+ hit = this.glyph.exec(masked);
55
+ }
56
+ return result;
57
+ }
58
+ }
59
+
60
+ module.exports = Emoji;
@@ -0,0 +1,32 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
3
+ * SPDX-License-Identifier: MIT
4
+ */
5
+
6
+ 'use strict';
7
+
8
+ /**
9
+ * Example format.
10
+ *
11
+ * A few-shot demonstration regulates the shape of the output more
12
+ * strongly than any prose, so an example that disagrees with the
13
+ * declared format teaches the agent the wrong shape. This rule ties the
14
+ * `example` and `format` rules together by checking their consistency:
15
+ * when one SKILL.md both shows an example and declares an output format,
16
+ * the two must agree. The mismatch hides between two distant fragments,
17
+ * so this check is pure judgement: prompt() hands the comparison to the
18
+ * AI oracle and violations() finds nothing on its own.
19
+ */
20
+ class ExampleFormat {
21
+ constructor() {
22
+ this.id = 'example-format';
23
+ }
24
+ prompt() {
25
+ return `${this.id}: in a SKILL.md that both shows an example and declares an output format, judge whether the example conforms to the declared format and flag any mismatch`;
26
+ }
27
+ violations() {
28
+ return [];
29
+ }
30
+ }
31
+
32
+ module.exports = ExampleFormat;
@@ -0,0 +1,57 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
3
+ * SPDX-License-Identifier: MIT
4
+ */
5
+
6
+ 'use strict';
7
+
8
+ const Violation = require('../violation');
9
+ const Region = require('../region');
10
+ const mask = require('../mask');
11
+
12
+ /**
13
+ * ExternalLink.
14
+ *
15
+ * Flags a bare http(s):// URL sitting in prose or a bullet item, where
16
+ * the page behind it may rot or inject hidden instructions. Durable
17
+ * guidance belongs inlined, not fetched at run time. A URL inside
18
+ * inline code or a fenced snippet is exempt, since those are examples.
19
+ * Distinct from dead-import, which targets local @path imports; this
20
+ * one complements untrusted and stale.
21
+ */
22
+ class ExternalLink {
23
+ constructor() {
24
+ this.id = 'external-link';
25
+ }
26
+ prompt() {
27
+ return `${this.id}: judge whether an external link is load-bearing, and flag durable guidance that should be inlined instead`;
28
+ }
29
+ violations(document) {
30
+ const uri = document.uri();
31
+ return document.walk({
32
+ header: () => [],
33
+ prose: (text, line) => this.scan(text, line, uri),
34
+ snippet: () => [],
35
+ bullets: () => [],
36
+ frontmatter: () => []
37
+ });
38
+ }
39
+ scan(text, line, uri) {
40
+ const found = [];
41
+ const regex = /(?:https?:\/\/)\S+/giu;
42
+ const masked = mask(text);
43
+ let hit = regex.exec(masked);
44
+ while (hit !== null) {
45
+ found.push(new Violation(
46
+ this.id,
47
+ 'warning',
48
+ 'external URL may rot or inject, encode durable guidance instead',
49
+ new Region(uri, line, hit.index + 1)
50
+ ));
51
+ hit = regex.exec(masked);
52
+ }
53
+ return found;
54
+ }
55
+ }
56
+
57
+ module.exports = ExternalLink;
@@ -0,0 +1,55 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
3
+ * SPDX-License-Identifier: MIT
4
+ */
5
+
6
+ 'use strict';
7
+
8
+ const Violation = require('../violation');
9
+ const Region = require('../region');
10
+
11
+ /**
12
+ * FenceLanguage.
13
+ *
14
+ * Demands that every fenced code block declare a language right after
15
+ * its opening fence. A bare fence of backticks or tildes with no info
16
+ * string leaves readers and tooling guessing at the snippet's syntax,
17
+ * so it earns a warning. A fence that names a language stays clean.
18
+ *
19
+ * The check is standalone and deterministic, so prompt() returns an
20
+ * empty string and the AI oracle never re-checks this rule.
21
+ */
22
+ class FenceLanguage {
23
+ constructor() {
24
+ this.id = 'fence-language';
25
+ this.fence = /^\s*(?:```|~~~)\s*(?<lang>\S*)/u;
26
+ }
27
+ prompt() {
28
+ return '';
29
+ }
30
+ violations(document) {
31
+ const uri = document.uri();
32
+ return document.walk({
33
+ header: () => [],
34
+ prose: () => [],
35
+ snippet: (content, row) => this.scan(content, row, uri),
36
+ bullets: () => [],
37
+ frontmatter: () => []
38
+ });
39
+ }
40
+ scan(content, row, uri) {
41
+ const [first] = content.split('\n');
42
+ const hit = this.fence.exec(first);
43
+ if (hit !== null && hit.groups.lang !== '') {
44
+ return [];
45
+ }
46
+ return [new Violation(
47
+ this.id,
48
+ 'warning',
49
+ 'fenced block has no language tag, declare one',
50
+ new Region(uri, row, 1)
51
+ )];
52
+ }
53
+ }
54
+
55
+ module.exports = FenceLanguage;
@@ -0,0 +1,61 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
3
+ * SPDX-License-Identifier: MIT
4
+ */
5
+
6
+ 'use strict';
7
+
8
+ const Violation = require('../violation');
9
+ const Region = require('../region');
10
+
11
+ /**
12
+ * HiddenChar.
13
+ *
14
+ * Demands that every line carry only visible characters, rejecting any
15
+ * invisible or control codepoint that hides inside the text. Scans every
16
+ * fragment, including snippets, because a zero-width space, a bidirectional
17
+ * override, or a variation selector tucked into code is just as dangerous as
18
+ * one tucked into prose. Flags zero-width characters, bidi controls, and
19
+ * variation selectors, naming each by its hex codepoint so it can be deleted.
20
+ *
21
+ * The check is standalone and deterministic, so prompt() returns an
22
+ * empty string and the AI oracle never re-checks this rule.
23
+ */
24
+ class HiddenChar {
25
+ constructor() {
26
+ this.id = 'hidden-char';
27
+ this.hidden = /[\u200B-\u200D\uFEFF\u202A-\u202E\u2066-\u2069\uFE00-\uFE0F\u{E0100}-\u{E01EF}]/gu;
28
+ }
29
+ prompt() {
30
+ return '';
31
+ }
32
+ violations(document) {
33
+ const uri = document.uri();
34
+ return document.walk({
35
+ header: (text, line) => this.scan(text, line, uri),
36
+ prose: (text, line) => this.scan(text, line, uri),
37
+ snippet: (text, line) => this.scan(text, line, uri),
38
+ bullets: () => [],
39
+ frontmatter: () => []
40
+ });
41
+ }
42
+ scan(text, line, uri) {
43
+ const found = [];
44
+ this.hidden.lastIndex = 0;
45
+ let hit = this.hidden.exec(text);
46
+ while (hit !== null) {
47
+ const hex = hit[0].codePointAt(0).toString(16).toUpperCase();
48
+ const code = hex.padStart(4, '0');
49
+ found.push(new Violation(
50
+ this.id,
51
+ 'error',
52
+ `invisible character U+${code} found, delete it`,
53
+ new Region(uri, line, hit.index + 1)
54
+ ));
55
+ hit = this.hidden.exec(text);
56
+ }
57
+ return found;
58
+ }
59
+ }
60
+
61
+ module.exports = HiddenChar;
@@ -0,0 +1,82 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
3
+ * SPDX-License-Identifier: MIT
4
+ */
5
+
6
+ 'use strict';
7
+
8
+ const Violation = require('../violation');
9
+ const Region = require('../region');
10
+ const mask = require('../mask');
11
+
12
+ /**
13
+ * Homoglyph.
14
+ *
15
+ * Rejects mixed-script look-alike characters that masquerade as plain
16
+ * ASCII. A token mixing an ASCII Latin letter with a confusable from
17
+ * Cyrillic, Greek, or full-width Latin reads as one word yet hides a
18
+ * foreign codepoint, so it slips past humans while breaking tools. The
19
+ * check flags every such confusable character at its own column. Inline
20
+ * code is masked first, so a deliberately quoted example stays clean.
21
+ *
22
+ * The check is standalone and deterministic, so prompt() returns an
23
+ * empty string and the AI oracle never re-checks this rule.
24
+ */
25
+ class Homoglyph {
26
+ constructor() {
27
+ this.id = 'homoglyph';
28
+ this.latin = /[A-Za-z]/u;
29
+ this.confusable = /[Ѐ-ӿͰ-Ͽ＀-￯]/u;
30
+ }
31
+ prompt() {
32
+ return '';
33
+ }
34
+ violations(document) {
35
+ const uri = document.uri();
36
+ return document.walk({
37
+ header: (text, line) => this.scan(text, line, uri),
38
+ prose: (text, line) => this.scan(text, line, uri),
39
+ snippet: () => [],
40
+ bullets: () => [],
41
+ frontmatter: () => []
42
+ });
43
+ }
44
+ scan(text, line, uri) {
45
+ const clean = mask(text);
46
+ const result = [];
47
+ const token = /\S+/gu;
48
+ let match = token.exec(clean);
49
+ while (match !== null) {
50
+ const [word] = match;
51
+ if (this.latin.test(word) && this.confusable.test(word)) {
52
+ this.flag(word, match.index).forEach((spot) => {
53
+ result.push(new Violation(
54
+ this.id,
55
+ 'error',
56
+ `mixed-script character "${spot.char}" (U+${spot.point}) found, use plain ASCII`,
57
+ new Region(uri, line, spot.column)
58
+ ));
59
+ });
60
+ }
61
+ match = token.exec(clean);
62
+ }
63
+ return result;
64
+ }
65
+ flag(word, start) {
66
+ const spots = [];
67
+ [...word].forEach((char, offset) => {
68
+ if (!this.confusable.test(char)) {
69
+ return;
70
+ }
71
+ const point = char
72
+ .codePointAt(0)
73
+ .toString(16)
74
+ .toUpperCase()
75
+ .padStart(4, '0');
76
+ spots.push({char, point, column: start + offset + 1});
77
+ });
78
+ return spots;
79
+ }
80
+ }
81
+
82
+ module.exports = Homoglyph;
@@ -49,6 +49,26 @@ const ToolClarity = require('./tool-clarity');
49
49
  const CounterExample = require('./counter-example');
50
50
  const Rationale = require('./rationale');
51
51
  const SelfContained = require('./self-contained');
52
+ const Quantifier = require('./quantifier');
53
+ const WeakVerb = require('./weak-verb');
54
+ const Default = require('./default');
55
+ const MetaReference = require('./meta-reference');
56
+ const AmbiguousOr = require('./ambiguous-or');
57
+ const ExternalLink = require('./external-link');
58
+ const Conditional = require('./conditional');
59
+ const Transition = require('./transition');
60
+ const Placement = require('./placement');
61
+ const InlineCode = require('./inline-code');
62
+ const Emoji = require('./emoji');
63
+ const Homoglyph = require('./homoglyph');
64
+ const DuplicateSection = require('./duplicate-section');
65
+ const DescriptionVoice = require('./description-voice');
66
+ const ExampleFormat = require('./example-format');
67
+ const DescriptionLength = require('./description-length');
68
+ const Scope = require('./scope');
69
+ const HiddenChar = require('./hidden-char');
70
+ const Units = require('./units');
71
+ const FenceLanguage = require('./fence-language');
52
72
 
53
73
  module.exports = () => [
54
74
  new Grouped(),
@@ -92,6 +112,26 @@ module.exports = () => [
92
112
  new CounterExample(),
93
113
  new Rationale(),
94
114
  new SelfContained(),
115
+ new Quantifier(),
116
+ new WeakVerb(),
117
+ new Default(),
118
+ new MetaReference(),
119
+ new AmbiguousOr(),
120
+ new ExternalLink(),
121
+ new Conditional(),
122
+ new Transition(),
123
+ new Placement(),
124
+ new InlineCode(),
125
+ new Emoji(),
126
+ new Homoglyph(),
127
+ new DuplicateSection(),
128
+ new DescriptionVoice(),
129
+ new ExampleFormat(),
130
+ new DescriptionLength(),
131
+ new Scope(),
132
+ new HiddenChar(),
133
+ new Units(),
134
+ new FenceLanguage(),
95
135
  new Unique(),
96
136
  new Frontmatter(
97
137
  'SKILL.md',
@@ -0,0 +1,79 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
3
+ * SPDX-License-Identifier: MIT
4
+ */
5
+
6
+ 'use strict';
7
+
8
+ const Violation = require('../violation');
9
+ const Region = require('../region');
10
+ const mask = require('../mask');
11
+
12
+ const PATTERNS = [
13
+ /\b(?:npm|npx|node|git|eslint|mocha|yarn|pnpm|cd|rm|mkdir|chmod|cat|sed|grep|curl|docker)\s+[\w./-]+/gu,
14
+ /(?<![\w/.@])[\w-]+(?:\/[\w.-]+)+/gu,
15
+ /(?<![\w/.@])[\w-]+\.(?:js|ts|jsx|tsx|json|md|ya?ml|sh|py|rb|go|rs|toml|cfg|lock|txt|xml|html|css)\b/gu,
16
+ /(?<![\w-])(?:--[A-Za-z][\w-]*|-[A-Za-z])(?![\w])/gu
17
+ ];
18
+
19
+ /**
20
+ * InlineCode.
21
+ *
22
+ * When a command, path, filename, or flag sits bare in prose, the model
23
+ * cannot cleanly tell the literal token from the surrounding words and
24
+ * may reword or reformat it. Markdown inline code marks such a token as
25
+ * literal, and consistent code-versus-prose marking measurably lowers
26
+ * misinterpretation. This standalone check flags a bare literal — a
27
+ * slashed path, a filename carrying a known extension, a CLI flag, or a
28
+ * known shell command followed by an argument — once its inline-code
29
+ * spans are masked away, so an already-backticked literal passes. It
30
+ * leaves @-imports to the dead-import rule. Its prompt hands borderline
31
+ * literals to the AI oracle.
32
+ */
33
+ class InlineCode {
34
+ constructor() {
35
+ this.id = 'inline-code';
36
+ }
37
+ prompt() {
38
+ return `${this.id}: flag a bare literal token (command, path, filename, or flag) that should be wrapped in backticks, judging borderline cases`;
39
+ }
40
+ violations(document) {
41
+ const uri = document.uri();
42
+ return document.walk({
43
+ header: () => [],
44
+ prose: (text, line) => this.scan(text, line, uri),
45
+ snippet: () => [],
46
+ bullets: () => [],
47
+ frontmatter: () => []
48
+ });
49
+ }
50
+ scan(text, line, uri) {
51
+ const masked = mask(text);
52
+ const spans = [];
53
+ PATTERNS.forEach((pattern) => {
54
+ let hit = pattern.exec(masked);
55
+ while (hit !== null) {
56
+ spans.push({token: hit[0], from: hit.index, to: hit.index + hit[0].length});
57
+ hit = pattern.exec(masked);
58
+ }
59
+ });
60
+ return InlineCode.prune(spans).map((span) => new Violation(
61
+ this.id,
62
+ 'warning',
63
+ `literal "${span.token}" must be wrapped in backticks`,
64
+ new Region(uri, line, span.from + 1)
65
+ ));
66
+ }
67
+ static prune(spans) {
68
+ const ordered = spans.slice().sort((one, two) => one.from - two.from || two.to - one.to);
69
+ const kept = [];
70
+ ordered.forEach((span) => {
71
+ if (!kept.some((other) => span.from >= other.from && span.to <= other.to)) {
72
+ kept.push(span);
73
+ }
74
+ });
75
+ return kept;
76
+ }
77
+ }
78
+
79
+ module.exports = InlineCode;
@@ -37,12 +37,20 @@ const ALLOWLIST = new Set([
37
37
  'CLAUDE'
38
38
  ]);
39
39
 
40
+ const initials = (gloss) => (gloss.match(/[A-Za-z]+/gu) || [])
41
+ .map((word) => word[0].toUpperCase())
42
+ .join('');
43
+
40
44
  const defined = (masked) => {
41
45
  const found = new Set();
42
- const regex = /\b(?<acronym>[A-Z]{2,})\s*\(/gu;
46
+ const regex = /\b(?<acronym>[A-Z]{2,})\s*\(|\((?<gloss>[^)]+)\)/gu;
43
47
  let hit = regex.exec(masked);
44
48
  while (hit !== null) {
45
- found.add(hit.groups.acronym);
49
+ if (hit.groups.acronym) {
50
+ found.add(hit.groups.acronym);
51
+ } else {
52
+ found.add(initials(hit.groups.gloss));
53
+ }
46
54
  hit = regex.exec(masked);
47
55
  }
48
56
  return found;
@@ -56,8 +64,10 @@ const undefining = (acronym, scope) => !scope.known.has(acronym) &&
56
64
  *
57
65
  * Flags an acronym that lands in prose without ever being expanded. An
58
66
  * acronym counts as defined when the document, anywhere, follows it with
59
- * a parenthetical gloss, as in "RBAC (role-based access control)", so a
60
- * single expansion licenses every later mention. Well-known acronyms sit
67
+ * a parenthetical gloss, as in "RBAC (role-based access control)", or when
68
+ * a parenthetical's word initials spell it, as in "AAA pattern
69
+ * (Arrange-Act-Assert)", so a single expansion licenses every later
70
+ * mention. Well-known acronyms sit
61
71
  * in a built-in allowlist and pass untouched. Only the first unexpanded
62
72
  * occurrence of each acronym is reported. Its prompt hands non-acronym
63
73
  * domain jargon, the rare nouns a reader cannot parse, to the AI oracle.
@@ -0,0 +1,57 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2026 Yegor Bugayenko
3
+ * SPDX-License-Identifier: MIT
4
+ */
5
+
6
+ 'use strict';
7
+
8
+ const Violation = require('../violation');
9
+ const Region = require('../region');
10
+ const mask = require('../mask');
11
+
12
+ /**
13
+ * MetaReference.
14
+ *
15
+ * Flags self-referential framing of the model or the document, such as
16
+ * "as an AI", "you are a model", "this prompt", or "these instructions".
17
+ * Such framing narrates the setup instead of issuing a command, so it
18
+ * adds no instruction and earns deletion. Distinct from persona, which
19
+ * targets role assignment like "Act as a reviewer"; this one targets
20
+ * the model talking about itself or the document talking about itself.
21
+ */
22
+ class MetaReference {
23
+ constructor() {
24
+ this.id = 'meta-reference';
25
+ this.phrase = /\b(?:as an ai|as a language model|you are an ai|you are a model|this prompt|these instructions|this manifesto|the system prompt)\b/giu;
26
+ }
27
+ prompt() {
28
+ return `${this.id}: flag self-referential framing of the model or document beyond the fixed list, and delete it`;
29
+ }
30
+ violations(document) {
31
+ const uri = document.uri();
32
+ return document.walk({
33
+ header: () => [],
34
+ prose: (text, line) => this.scan(text, line, uri),
35
+ snippet: () => [],
36
+ bullets: () => [],
37
+ frontmatter: () => []
38
+ });
39
+ }
40
+ scan(text, line, uri) {
41
+ const masked = mask(text);
42
+ const out = [];
43
+ let hit = this.phrase.exec(masked);
44
+ while (hit !== null) {
45
+ out.push(new Violation(
46
+ this.id,
47
+ 'warning',
48
+ `meta self-reference "${hit[0]}" issues no command, delete it`,
49
+ new Region(uri, line, hit.index + 1)
50
+ ));
51
+ hit = this.phrase.exec(masked);
52
+ }
53
+ return out;
54
+ }
55
+ }
56
+
57
+ module.exports = MetaReference;