@faircopy/rules-nlp 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,16 @@
1
+ import { Rule } from '@faircopy/core';
2
+
3
+ interface NoFilterWordsOptions {
4
+ phrases?: string[];
5
+ }
6
+ declare const noFilterWords: Rule<NoFilterWordsOptions>;
7
+
8
+ interface NoPassiveVoiceOptions {
9
+ allowedAuxiliaries?: string[];
10
+ }
11
+ declare const noPassiveVoice: Rule<NoPassiveVoiceOptions>;
12
+
13
+ /** All NLP rules keyed by their rule ID. */
14
+ declare const ruleRegistry: Map<string, Rule>;
15
+
16
+ export { type NoFilterWordsOptions, type NoPassiveVoiceOptions, noFilterWords, noPassiveVoice, ruleRegistry };
package/dist/index.js ADDED
@@ -0,0 +1,117 @@
1
+ // src/utils.ts
2
+ import nlp from "compromise";
3
+ function createDoc(text) {
4
+ return nlp(text);
5
+ }
6
+ function getMatchOccurrences(text, matches) {
7
+ const json = matches.json({ offset: true, text: true, terms: { offset: true } });
8
+ return json.flatMap((entry) => {
9
+ const start = entry.offset?.start ?? entry.terms?.[0]?.offset?.start;
10
+ const length = entry.offset?.length ?? sumTermLengths(entry.terms);
11
+ if (typeof start !== "number" || typeof length !== "number" || length <= 0) {
12
+ return [];
13
+ }
14
+ return [{
15
+ text: entry.text ?? text.slice(start, start + length),
16
+ start,
17
+ end: start + length
18
+ }];
19
+ });
20
+ }
21
+ function sumTermLengths(terms) {
22
+ if (!terms?.length) return void 0;
23
+ let total = 0;
24
+ for (const term of terms) {
25
+ const length = term.offset?.length;
26
+ if (typeof length !== "number") return void 0;
27
+ total += length;
28
+ }
29
+ return total;
30
+ }
31
+
32
+ // src/no-filter-words.ts
33
+ var DEFAULT_PHRASES = [
34
+ "I think",
35
+ "it seems",
36
+ "basically",
37
+ "in order to"
38
+ ];
39
+ var noFilterWords = {
40
+ id: "no-filter-words",
41
+ description: "Ban filter phrases that distance the claim from the reader",
42
+ defaults: { phrases: DEFAULT_PHRASES },
43
+ help: "Filter phrases announce a perspective or pad the sentence instead of making the point. Delete the phrase or rewrite the sentence so the claim stands on its own.",
44
+ check({ text, sourceMap, options }) {
45
+ const diagnostics = [];
46
+ const phrases = options.phrases?.length ? options.phrases : DEFAULT_PHRASES;
47
+ const doc = createDoc(text);
48
+ for (const phrase of phrases) {
49
+ const matches = doc.match(phrase);
50
+ for (const occurrence of getMatchOccurrences(text, matches)) {
51
+ const start = sourceMap[occurrence.start];
52
+ const end = sourceMap[occurrence.end - 1];
53
+ if (start === void 0 || end === void 0) continue;
54
+ diagnostics.push({
55
+ ruleId: "no-filter-words",
56
+ severity: "error",
57
+ message: `remove "${occurrence.text.toLowerCase()}" \u2014 state the claim directly`,
58
+ range: { start, end: end + 1 },
59
+ help: noFilterWords.help
60
+ });
61
+ }
62
+ }
63
+ return diagnostics;
64
+ }
65
+ };
66
+
67
+ // src/no-passive-voice.ts
68
+ var DEFAULT_ALLOWED_AUXILIARIES = ["is", "are", "was", "were", "be", "been", "being"];
69
+ var noPassiveVoice = {
70
+ id: "no-passive-voice",
71
+ description: "Flag likely passive-voice constructions using POS tagging patterns",
72
+ defaults: { allowedAuxiliaries: DEFAULT_ALLOWED_AUXILIARIES },
73
+ help: "Passive voice often hides the actor and adds drag. Prefer naming who did the action unless the actor genuinely does not matter.",
74
+ check({ text, sourceMap, options }) {
75
+ const diagnostics = [];
76
+ const auxiliaries = new Set((options.allowedAuxiliaries?.length ? options.allowedAuxiliaries : DEFAULT_ALLOWED_AUXILIARIES).map((value) => value.toLowerCase()));
77
+ const doc = createDoc(text);
78
+ const matches = doc.match("(#Copula|#Auxiliary) #PastTense");
79
+ for (const occurrence of getMatchOccurrences(text, matches)) {
80
+ const words = occurrence.text.trim().split(/\s+/);
81
+ if (words.length < 2) continue;
82
+ if (!auxiliaries.has(words[0].toLowerCase())) continue;
83
+ const start = sourceMap[occurrence.start];
84
+ const end = sourceMap[occurrence.end - 1];
85
+ if (start === void 0 || end === void 0) continue;
86
+ diagnostics.push({
87
+ ruleId: "no-passive-voice",
88
+ severity: "warn",
89
+ message: `rewrite passive construction "${occurrence.text}" with a named actor`,
90
+ range: { start, end: end + 1 },
91
+ help: noPassiveVoice.help
92
+ });
93
+ }
94
+ return dedupeDiagnostics(diagnostics);
95
+ }
96
+ };
97
+ function dedupeDiagnostics(diagnostics) {
98
+ const seen = /* @__PURE__ */ new Set();
99
+ return diagnostics.filter((diagnostic) => {
100
+ const key = `${diagnostic.range.start}:${diagnostic.range.end}`;
101
+ if (seen.has(key)) return false;
102
+ seen.add(key);
103
+ return true;
104
+ });
105
+ }
106
+
107
+ // src/index.ts
108
+ var ruleRegistry = /* @__PURE__ */ new Map([
109
+ ["no-filter-words", noFilterWords],
110
+ ["no-passive-voice", noPassiveVoice]
111
+ ]);
112
+ export {
113
+ noFilterWords,
114
+ noPassiveVoice,
115
+ ruleRegistry
116
+ };
117
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/utils.ts","../src/no-filter-words.ts","../src/no-passive-voice.ts","../src/index.ts"],"sourcesContent":["import nlp from 'compromise'\nimport type { DocView, JsonOffsetEntry, JsonOffsetTerm, MatchView } from './types.js'\n\nexport interface MatchOccurrence {\n text: string\n start: number\n end: number\n}\n\nexport function createDoc(text: string): DocView {\n return nlp(text) as unknown as DocView\n}\n\nexport function getMatchOccurrences(text: string, matches: MatchView): MatchOccurrence[] {\n const json = matches.json({ offset: true, text: true, terms: { offset: true } }) as JsonOffsetEntry[]\n\n return json.flatMap((entry) => {\n const start = entry.offset?.start ?? entry.terms?.[0]?.offset?.start\n const length = entry.offset?.length ?? sumTermLengths(entry.terms)\n\n if (typeof start !== 'number' || typeof length !== 'number' || length <= 0) {\n return []\n }\n\n return [{\n text: entry.text ?? text.slice(start, start + length),\n start,\n end: start + length,\n }]\n })\n}\n\nfunction sumTermLengths(terms: JsonOffsetTerm[] | undefined): number | undefined {\n if (!terms?.length) return undefined\n\n let total = 0\n for (const term of terms) {\n const length = term.offset?.length\n if (typeof length !== 'number') return undefined\n total += length\n }\n\n return total\n}\n","import type { Diagnostic, Rule, RuleInput } from '@faircopy/core'\nimport { createDoc, getMatchOccurrences } from './utils.js'\n\nexport interface NoFilterWordsOptions {\n phrases?: string[]\n}\n\nconst DEFAULT_PHRASES = [\n 'I think',\n 'it seems',\n 'basically',\n 'in order to',\n]\n\nexport const noFilterWords: Rule<NoFilterWordsOptions> = {\n id: 'no-filter-words',\n description: 'Ban filter phrases that distance the claim from the reader',\n defaults: { phrases: DEFAULT_PHRASES },\n help: 'Filter phrases announce a perspective or pad the sentence instead of making the point. Delete the phrase or rewrite the sentence so the claim stands on its own.',\n\n check({ text, sourceMap, options }: RuleInput<NoFilterWordsOptions>): Diagnostic[] {\n const diagnostics: Diagnostic[] = []\n const phrases = options.phrases?.length ? options.phrases : DEFAULT_PHRASES\n const doc = createDoc(text)\n\n for (const phrase of phrases) {\n const matches = doc.match(phrase)\n for (const occurrence of getMatchOccurrences(text, matches)) {\n const start = sourceMap[occurrence.start]\n const end = sourceMap[occurrence.end - 1]\n if (start === undefined || end === undefined) continue\n\n diagnostics.push({\n ruleId: 'no-filter-words',\n severity: 'error',\n message: `remove \"${occurrence.text.toLowerCase()}\" — state the claim directly`,\n range: { start, end: end + 1 },\n help: noFilterWords.help,\n })\n }\n }\n\n return diagnostics\n },\n}\n","import type { Diagnostic, Rule, RuleInput } from '@faircopy/core'\nimport { createDoc, getMatchOccurrences } from './utils.js'\n\nexport interface NoPassiveVoiceOptions {\n allowedAuxiliaries?: string[]\n}\n\nconst DEFAULT_ALLOWED_AUXILIARIES = ['is', 'are', 'was', 'were', 'be', 'been', 'being']\n\nexport const noPassiveVoice: Rule<NoPassiveVoiceOptions> = {\n id: 'no-passive-voice',\n description: 'Flag likely passive-voice constructions using POS tagging patterns',\n defaults: { allowedAuxiliaries: DEFAULT_ALLOWED_AUXILIARIES },\n help: 'Passive voice often hides the actor and adds drag. Prefer naming who did the action unless the actor genuinely does not matter.',\n\n check({ text, sourceMap, options }: RuleInput<NoPassiveVoiceOptions>): Diagnostic[] {\n const diagnostics: Diagnostic[] = []\n const auxiliaries = new Set((options.allowedAuxiliaries?.length ? options.allowedAuxiliaries : DEFAULT_ALLOWED_AUXILIARIES).map(value => value.toLowerCase()))\n const doc = createDoc(text)\n const matches = doc.match('(#Copula|#Auxiliary) #PastTense')\n\n for (const occurrence of getMatchOccurrences(text, matches)) {\n const words = occurrence.text.trim().split(/\\s+/)\n if (words.length < 2) continue\n if (!auxiliaries.has(words[0]!.toLowerCase())) continue\n\n const start = sourceMap[occurrence.start]\n const end = sourceMap[occurrence.end - 1]\n if (start === undefined || end === undefined) continue\n\n diagnostics.push({\n ruleId: 'no-passive-voice',\n severity: 'warn',\n message: `rewrite passive construction \"${occurrence.text}\" with a named actor`,\n range: { start, end: end + 1 },\n help: noPassiveVoice.help,\n })\n }\n\n return dedupeDiagnostics(diagnostics)\n },\n}\n\nfunction dedupeDiagnostics(diagnostics: Diagnostic[]): Diagnostic[] {\n const seen = new Set<string>()\n return diagnostics.filter((diagnostic) => {\n const key = `${diagnostic.range.start}:${diagnostic.range.end}`\n if (seen.has(key)) return false\n seen.add(key)\n return true\n })\n}\n","import type { Rule } from '@faircopy/core'\nimport { noFilterWords } from './no-filter-words.js'\nimport { noPassiveVoice } from './no-passive-voice.js'\n\nexport { noFilterWords } from './no-filter-words.js'\nexport { noPassiveVoice } from './no-passive-voice.js'\nexport type { NoFilterWordsOptions } from './no-filter-words.js'\nexport type { NoPassiveVoiceOptions } from './no-passive-voice.js'\n\n/** All NLP rules keyed by their rule ID. */\nexport const ruleRegistry: Map<string, Rule> = new Map([\n ['no-filter-words', noFilterWords as Rule],\n ['no-passive-voice', noPassiveVoice as Rule],\n])\n"],"mappings":";AAAA,OAAO,SAAS;AAST,SAAS,UAAU,MAAuB;AAC/C,SAAO,IAAI,IAAI;AACjB;AAEO,SAAS,oBAAoB,MAAc,SAAuC;AACvF,QAAM,OAAO,QAAQ,KAAK,EAAE,QAAQ,MAAM,MAAM,MAAM,OAAO,EAAE,QAAQ,KAAK,EAAE,CAAC;AAE/E,SAAO,KAAK,QAAQ,CAAC,UAAU;AAC7B,UAAM,QAAQ,MAAM,QAAQ,SAAS,MAAM,QAAQ,CAAC,GAAG,QAAQ;AAC/D,UAAM,SAAS,MAAM,QAAQ,UAAU,eAAe,MAAM,KAAK;AAEjE,QAAI,OAAO,UAAU,YAAY,OAAO,WAAW,YAAY,UAAU,GAAG;AAC1E,aAAO,CAAC;AAAA,IACV;AAEA,WAAO,CAAC;AAAA,MACN,MAAM,MAAM,QAAQ,KAAK,MAAM,OAAO,QAAQ,MAAM;AAAA,MACpD;AAAA,MACA,KAAK,QAAQ;AAAA,IACf,CAAC;AAAA,EACH,CAAC;AACH;AAEA,SAAS,eAAe,OAAyD;AAC/E,MAAI,CAAC,OAAO,OAAQ,QAAO;AAE3B,MAAI,QAAQ;AACZ,aAAW,QAAQ,OAAO;AACxB,UAAM,SAAS,KAAK,QAAQ;AAC5B,QAAI,OAAO,WAAW,SAAU,QAAO;AACvC,aAAS;AAAA,EACX;AAEA,SAAO;AACT;;;ACpCA,IAAM,kBAAkB;AAAA,EACtB;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAEO,IAAM,gBAA4C;AAAA,EACvD,IAAI;AAAA,EACJ,aAAa;AAAA,EACb,UAAU,EAAE,SAAS,gBAAgB;AAAA,EACrC,MAAM;AAAA,EAEN,MAAM,EAAE,MAAM,WAAW,QAAQ,GAAkD;AACjF,UAAM,cAA4B,CAAC;AACnC,UAAM,UAAU,QAAQ,SAAS,SAAS,QAAQ,UAAU;AAC5D,UAAM,MAAM,UAAU,IAAI;AAE1B,eAAW,UAAU,SAAS;AAC5B,YAAM,UAAU,IAAI,MAAM,MAAM;AAChC,iBAAW,cAAc,oBAAoB,MAAM,OAAO,GAAG;AAC3D,cAAM,QAAQ,UAAU,WAAW,KAAK;AACxC,cAAM,MAAM,UAAU,WAAW,MAAM,CAAC;AACxC,YAAI,UAAU,UAAa,QAAQ,OAAW;AAE9C,oBAAY,KAAK;AAAA,UACf,QAAQ;AAAA,UACR,UAAU;AAAA,UACV,SAAS,WAAW,WAAW,KAAK,YAAY,CAAC;AAAA,UACjD,OAAO,EAAE,OAAO,KAAK,MAAM,EAAE;AAAA,UAC7B,MAAM,cAAc;AAAA,QACtB,CAAC;AAAA,MACH;AAAA,IACF;AAEA,WAAO;AAAA,EACT;AACF;;;ACrCA,IAAM,8BAA8B,CAAC,MAAM,OAAO,OAAO,QAAQ,MAAM,QAAQ,OAAO;AAE/E,IAAM,iBAA8C;AAAA,EACzD,IAAI;AAAA,EACJ,aAAa;AAAA,EACb,UAAU,EAAE,oBAAoB,4BAA4B;AAAA,EAC5D,MAAM;AAAA,EAEN,MAAM,EAAE,MAAM,WAAW,QAAQ,GAAmD;AAClF,UAAM,cAA4B,CAAC;AACnC,UAAM,cAAc,IAAI,KAAK,QAAQ,oBAAoB,SAAS,QAAQ,qBAAqB,6BAA6B,IAAI,WAAS,MAAM,YAAY,CAAC,CAAC;AAC7J,UAAM,MAAM,UAAU,IAAI;AAC1B,UAAM,UAAU,IAAI,MAAM,iCAAiC;AAE3D,eAAW,cAAc,oBAAoB,MAAM,OAAO,GAAG;AAC3D,YAAM,QAAQ,WAAW,KAAK,KAAK,EAAE,MAAM,KAAK;AAChD,UAAI,MAAM,SAAS,EAAG;AACtB,UAAI,CAAC,YAAY,IAAI,MAAM,CAAC,EAAG,YAAY,CAAC,EAAG;AAE/C,YAAM,QAAQ,UAAU,WAAW,KAAK;AACxC,YAAM,MAAM,UAAU,WAAW,MAAM,CAAC;AACxC,UAAI,UAAU,UAAa,QAAQ,OAAW;AAE9C,kBAAY,KAAK;AAAA,QACf,QAAQ;AAAA,QACR,UAAU;AAAA,QACV,SAAS,iCAAiC,WAAW,IAAI;AAAA,QACzD,OAAO,EAAE,OAAO,KAAK,MAAM,EAAE;AAAA,QAC7B,MAAM,eAAe;AAAA,MACvB,CAAC;AAAA,IACH;AAEA,WAAO,kBAAkB,WAAW;AAAA,EACtC;AACF;AAEA,SAAS,kBAAkB,aAAyC;AAClE,QAAM,OAAO,oBAAI,IAAY;AAC7B,SAAO,YAAY,OAAO,CAAC,eAAe;AACxC,UAAM,MAAM,GAAG,WAAW,MAAM,KAAK,IAAI,WAAW,MAAM,GAAG;AAC7D,QAAI,KAAK,IAAI,GAAG,EAAG,QAAO;AAC1B,SAAK,IAAI,GAAG;AACZ,WAAO;AAAA,EACT,CAAC;AACH;;;ACzCO,IAAM,eAAkC,oBAAI,IAAI;AAAA,EACrD,CAAC,mBAAmB,aAAqB;AAAA,EACzC,CAAC,oBAAoB,cAAsB;AAC7C,CAAC;","names":[]}
package/package.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "@faircopy/rules-nlp",
3
+ "version": "1.1.1",
4
+ "description": "Optional NLP-powered ruleset for faircopy using compromise",
5
+ "type": "module",
6
+ "exports": {
7
+ ".": {
8
+ "types": "./dist/index.d.ts",
9
+ "import": "./dist/index.js"
10
+ }
11
+ },
12
+ "files": [
13
+ "dist"
14
+ ],
15
+ "dependencies": {
16
+ "compromise": "^14.15.0",
17
+ "@faircopy/core": "1.1.1"
18
+ },
19
+ "devDependencies": {
20
+ "@types/bun": "latest",
21
+ "tsup": "^8.4.0",
22
+ "typescript": "^5.8.0"
23
+ },
24
+ "publishConfig": {
25
+ "access": "public"
26
+ },
27
+ "license": "MIT",
28
+ "author": "omniaura",
29
+ "repository": {
30
+ "type": "git",
31
+ "url": "https://github.com/omniaura/faircopy"
32
+ },
33
+ "scripts": {
34
+ "build": "tsup",
35
+ "typecheck": "tsc --noEmit",
36
+ "test": "echo 'no tests yet'"
37
+ }
38
+ }