@yegor256/dogent 0.12.1 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -53,7 +53,7 @@ Most rewrite prompts for you or score a file, while we enforce
53
53
  Run it on any manifesto file, no installation required:
54
54
 
55
55
  ```bash
56
- npx @yegor256/dogent@0.11.0 SKILL.md
56
+ npx @yegor256/dogent@0.12.1 SKILL.md
57
57
  ```
58
58
 
59
59
  Point it at a directory to lint the default manifestos it holds
@@ -266,7 +266,7 @@ Reference `dogent` as a remote hook in `.pre-commit-config.yaml`:
266
266
  ```yaml
267
267
  repos:
268
268
  - repo: https://github.com/yegor256/dogent
269
- rev: 0.11.0
269
+ rev: 0.12.1
270
270
  hooks:
271
271
  - id: dogent
272
272
  ```
package/package.json CHANGED
@@ -40,7 +40,7 @@
40
40
  "lint": "eslint .",
41
41
  "test": "mocha 'test/**/*.js' --timeout 60000"
42
42
  },
43
- "version": "0.12.1",
43
+ "version": "0.12.3",
44
44
  "dependencies": {
45
45
  "minimist": "^1.2.8",
46
46
  "pretty-ms": "^7.0.1"
package/src/dogent.js CHANGED
@@ -73,11 +73,17 @@ const scan = () => {
73
73
  return [];
74
74
  };
75
75
  const scanned = scan();
76
- scanned.forEach((file) => process.stderr.write(`Scanning ${file}\n`));
76
+ const bodies = new Map(scanned.map((file) => [file, fs.readFileSync(file, 'utf8')]));
77
+ scanned.forEach((file) => {
78
+ const body = bodies.get(file);
79
+ const lines = body === '' ? 0 : body.split('\n').length - (body.endsWith('\n') ? 1 : 0);
80
+ const bytes = Buffer.byteLength(body);
81
+ process.stderr.write(`Scanning ${file} (${lines} lines, ${bytes} bytes)\n`);
82
+ });
77
83
  const checks = rules();
78
84
  process.stderr.write(`${scanned.length} files scanned, ${checks.length} rules applied\n`);
79
85
  const documents = scanned.map(
80
- (file) => new Markdown(file, fs.readFileSync(file, 'utf8')).document()
86
+ (file) => new Markdown(file, bodies.get(file)).document()
81
87
  );
82
88
  const started = Date.now();
83
89
  const suppressed = args.suppress();
package/src/oracle.js CHANGED
@@ -14,7 +14,9 @@ const Answer = require('./answer');
14
14
  * The AI second opinion. Wraps the rules and a chat endpoint, builds one
15
15
  * prompt from a document, asks the endpoint, and parses the reply into
16
16
  * violations paired with the token usage the model reported. Mirrors a
17
- * rule, but consults a model instead of guessing.
17
+ * rule, but consults a model instead of guessing. Lets each rule veto an
18
+ * oracle flag it knows to be false, so a deterministic guard overrides
19
+ * the model.
18
20
  */
19
21
  class Oracle {
20
22
  constructor(rules, chat) {
@@ -24,7 +26,11 @@ class Oracle {
24
26
  async violations(document) {
25
27
  const reply = await this.chat.answer(new Prompt(this.rules, document).text());
26
28
  return {
27
- found: new Answer(reply.content).violations(),
29
+ found: new Answer(reply.content).violations().filter(
30
+ (violation) => !this.rules.some(
31
+ (rule) => rule.suppress?.(violation, document)
32
+ )
33
+ ),
28
34
  usage: reply.usage
29
35
  };
30
36
  }
@@ -15,6 +15,8 @@ const Region = require('../region');
15
15
  * checker can only guess: it flags lines that open with a pronoun or
16
16
  * end with a question mark, both signs of description, not order. Its
17
17
  * prompt hands the subtler imperative-mood judgement to the AI oracle.
18
+ * A deterministic guard then drops any oracle flag on a line that
19
+ * shows neither sign, so a base-form imperative is never flagged.
18
20
  */
19
21
  class Command {
20
22
  constructor() {
@@ -37,16 +39,7 @@ class Command {
37
39
  });
38
40
  }
39
41
  judge(text, line, uri) {
40
- const clean = text.replace(/^\s*(?:[-*+]|\d+\.)\s+/u, '').trim();
41
- if (clean === '') {
42
- return [];
43
- }
44
- const first = clean
45
- .split(/\s+/u)[0]
46
- .toLowerCase()
47
- .replace(/[^a-z]/gu, '');
48
- const weak = /^(?:i|you|we|they|he|she|it|this|that|these|those|there|here)$/u;
49
- if (!weak.test(first) && clean.slice(-1) !== '?') {
42
+ if (!this.describes(text)) {
50
43
  return [];
51
44
  }
52
45
  return [new Violation(
@@ -56,6 +49,25 @@ class Command {
56
49
  new Region(uri, line, 1)
57
50
  )];
58
51
  }
52
+ suppress(violation, document) {
53
+ if (violation.rule !== this.id) {
54
+ return false;
55
+ }
56
+ const lines = document.text().split('\n');
57
+ return !this.describes(lines[violation.spot.line() - 1] || '');
58
+ }
59
+ describes(text) {
60
+ const clean = text.replace(/^\s*(?:[-*+]|\d+\.)\s+/u, '').trim();
61
+ if (clean === '') {
62
+ return false;
63
+ }
64
+ const first = clean
65
+ .split(/\s+/u)[0]
66
+ .toLowerCase()
67
+ .replace(/[^a-z]/gu, '');
68
+ const weak = /^(?:i|you|we|they|he|she|it|this|that|these|those|there|here)$/u;
69
+ return weak.test(first) || clean.slice(-1) === '?';
70
+ }
59
71
  }
60
72
 
61
73
  module.exports = Command;
@@ -16,6 +16,9 @@ const Region = require('../region');
16
16
  * short or that never names a trigger with the word "when". Its prompt
17
17
  * hands the deeper judgement to the AI oracle, which weighs whether the
18
18
  * description truly names the situations and phrases that activate it.
19
+ * A quoted example phrase is the strongest form of that naming, so a
20
+ * deterministic guard then vetoes any oracle flag on a description that
21
+ * holds both "when" and a quoted phrase, however the model rules.
19
22
  */
20
23
  class DescriptionTriggers {
21
24
  constructor() {
@@ -26,7 +29,7 @@ class DescriptionTriggers {
26
29
  return 'Name the concrete situations and user phrases that should activate the skill in its description, so the loader knows exactly when to invoke it.';
27
30
  }
28
31
  prompt() {
29
- return `${this.id}: in a SKILL.md, flag a description that is too short or fails to name the concrete situations and user phrases that should activate the skill, even when it contains the word "when"`;
32
+ return `${this.id}: in a SKILL.md, flag a description that is too short or fails to name the concrete situations and user phrases that should activate the skill, even when it contains the word "when"; a description that quotes an example user phrase, such as "file this bug", already names a trigger in its strongest form and must never be flagged`;
30
33
  }
31
34
  violations(document) {
32
35
  const uri = document.uri();
@@ -56,6 +59,30 @@ class DescriptionTriggers {
56
59
  }
57
60
  return [];
58
61
  }
62
+ suppress(violation, document) {
63
+ if (violation.rule !== this.id) {
64
+ return false;
65
+ }
66
+ const value = this.description(document);
67
+ return /\bwhen\b/iu.test(value) && this.quoted(value);
68
+ }
69
+ description(document) {
70
+ const pairs = document.walk({
71
+ header: () => [],
72
+ prose: () => [],
73
+ snippet: () => [],
74
+ bullets: () => [],
75
+ frontmatter: (keys) => keys
76
+ });
77
+ const found = pairs.filter((pair) => pair.key === 'description');
78
+ if (found.length === 0) {
79
+ return '';
80
+ }
81
+ return found[0].value.trim();
82
+ }
83
+ quoted(value) {
84
+ return /["'‘’“”][^"'‘’“”]+["'‘’“”]/u.test(value);
85
+ }
59
86
  flag(message, row, uri) {
60
87
  return new Violation(this.id, 'warning', message, new Region(uri, row, 1));
61
88
  }
@@ -35,6 +35,21 @@ class NoArticles {
35
35
  frontmatter: () => []
36
36
  });
37
37
  }
38
+ suppress(violation, document) {
39
+ if (violation.rule !== this.id) {
40
+ return false;
41
+ }
42
+ return this.headers(document).has(violation.spot.line());
43
+ }
44
+ headers(document) {
45
+ return new Set(document.walk({
46
+ header: (text, line) => [line],
47
+ prose: () => [],
48
+ snippet: () => [],
49
+ bullets: () => [],
50
+ frontmatter: () => []
51
+ }));
52
+ }
38
53
  scan(text, line, uri) {
39
54
  const found = [];
40
55
  const masked = mask(text);
@@ -21,6 +21,10 @@ const mask = require('../mask');
21
21
  * oracle, which rewrites a prohibition with no keyword as a positive
22
22
  * command. The prompt demands an actual negation before flagging, so
23
23
  * an affirmative imperative that already states what to do stays clean.
24
+ * Because the model still misreads plain imperatives as bans, a
25
+ * deterministic guard then drops any oracle flag on a line that carries
26
+ * no negation token at all, so an affirmative imperative can never be
27
+ * reported regardless of what the model returns.
24
28
  */
25
29
  class Positive {
26
30
  constructor() {
@@ -42,6 +46,17 @@ class Positive {
42
46
  frontmatter: () => []
43
47
  });
44
48
  }
49
+ suppress(violation, document) {
50
+ if (violation.rule !== this.id) {
51
+ return false;
52
+ }
53
+ const lines = document.text().split('\n');
54
+ return !this.negated(lines[violation.spot.line() - 1] || '');
55
+ }
56
+ negated(text) {
57
+ const regex = /\b(?:do not|don't|never|avoid|refrain from|must not|no longer|no|not)\b/iu;
58
+ return regex.test(mask(text));
59
+ }
45
60
  scan(text, line, uri) {
46
61
  const regex = /^(?<marker>\s*(?:[-*+]|\d+\.)\s+)?(?:do not|don't|never|avoid|refrain from|must not|no longer)\b/iu;
47
62
  const hit = regex.exec(mask(text));
@@ -44,7 +44,7 @@ class Untrusted {
44
44
  }
45
45
  scan(text, line, uri) {
46
46
  const masked = mask(text);
47
- const verb = /\b(?:read|fetch|open|follow|execute)\b/iu;
47
+ const verb = /(?<!-)\b(?:read|fetch|open|follow|execute)\b(?!-)/iu;
48
48
  const source = /\b(?:page|url|link|email|file|issue|output|comment)\b/iu;
49
49
  const guard = /\b(?:as data|do not follow|treat as untrusted|inside delimiters|untrusted)\b/iu;
50
50
  if (!verb.test(masked) || !source.test(masked) || guard.test(masked)) {
package/src/version.js CHANGED
@@ -9,8 +9,8 @@
9
9
  * Version.
10
10
  *
11
11
  * The current release of dogent, replaced on every release by rultor.
12
- * The default `0.12.1` marks an unreleased build straight from source.
12
+ * The default `0.12.3` marks an unreleased build straight from source.
13
13
  */
14
- const version = '0.12.1';
14
+ const version = '0.12.3';
15
15
 
16
16
  module.exports = version;