npm - @khanacademy/perseus-linter - Versions diffs - 0.0.0-PR973-20240207204934 → 0.0.0-PR973-20240207213425 - Mend

@khanacademy/perseus-linter 0.0.0-PR973-20240207204934 → 0.0.0-PR973-20240207213425

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

package/.eslintrc.js +12 -0
package/CHANGELOG.md +168 -0
package/package.json +4 -7
package/src/README.md +41 -0
package/src/__tests__/matcher.test.ts +498 -0
package/src/__tests__/rule.test.ts +110 -0
package/src/__tests__/rules.test.ts +548 -0
package/src/__tests__/selector-parser.test.ts +51 -0
package/src/__tests__/tree-transformer.test.ts +444 -0
package/src/index.ts +281 -0
package/src/proptypes.ts +19 -0
package/src/rule.ts +419 -0
package/src/rules/absolute-url.ts +23 -0
package/src/rules/all-rules.ts +71 -0
package/src/rules/blockquoted-math.ts +9 -0
package/src/rules/blockquoted-widget.ts +9 -0
package/src/rules/double-spacing-after-terminal.ts +11 -0
package/src/rules/extra-content-spacing.ts +11 -0
package/src/rules/heading-level-1.ts +13 -0
package/src/rules/heading-level-skip.ts +19 -0
package/src/rules/heading-sentence-case.ts +10 -0
package/src/rules/heading-title-case.ts +68 -0
package/src/rules/image-alt-text.ts +20 -0
package/src/rules/image-in-table.ts +9 -0
package/src/rules/image-spaces-around-urls.ts +34 -0
package/src/rules/image-widget.ts +49 -0
package/src/rules/link-click-here.ts +10 -0
package/src/rules/lint-utils.ts +47 -0
package/src/rules/long-paragraph.ts +13 -0
package/src/rules/math-adjacent.ts +9 -0
package/src/rules/math-align-extra-break.ts +10 -0
package/src/rules/math-align-linebreaks.ts +42 -0
package/src/rules/math-empty.ts +9 -0
package/src/rules/math-font-size.ts +11 -0
package/src/rules/math-frac.ts +9 -0
package/src/rules/math-nested.ts +10 -0
package/src/rules/math-starts-with-space.ts +11 -0
package/src/rules/math-text-empty.ts +9 -0
package/src/rules/math-without-dollars.ts +13 -0
package/src/rules/nested-lists.ts +10 -0
package/src/rules/profanity.ts +9 -0
package/src/rules/table-missing-cells.ts +19 -0
package/src/rules/unbalanced-code-delimiters.ts +13 -0
package/src/rules/unescaped-dollar.ts +9 -0
package/src/rules/widget-in-table.ts +9 -0
package/src/selector.ts +504 -0
package/src/tree-transformer.ts +583 -0
package/src/types.ts +7 -0
package/src/version.ts +10 -0
package/tsconfig-build.json +12 -0
package/tsconfig-build.tsbuildinfo +1 -0

package/src/rule.ts ADDED Viewed

@@ -0,0 +1,419 @@
+/**
+ * The Rule class represents a Perseus lint rule. A Rule instance has a check()
+ * method that takes the same (node, state, content) arguments that a
+ * TreeTransformer traversal callback function does. Call the check() method
+ * during a tree traversal to determine whether the current node of the tree
+ * violates the rule. If there is no violation, then check() returns
+ * null. Otherwise, it returns an object that includes the name of the rule,
+ * an error message, and the start and end positions within the node's content
+ * string of the lint.
+ *
+ * A Perseus lint rule consists of a name, a severity, a selector, a pattern
+ * (RegExp) and two functions. The check() method uses the selector, pattern,
+ * and functions as follows:
+ *
+ * - First, when determining which rules to apply to a particular piece of
+ *   content, each rule can specify an optional function provided in the fifth
+ *   parameter to evaluate whether or not we should be applying this rule.
+ *   If the function returns false, we don't use the rule on this content.
+ *
+ * - Next, check() tests whether the node currently being traversed matches
+ *   the selector. If it does not, then the rule does not apply at this node
+ *   and there is no lint and check() returns null.
+ *
+ * - If the selector matched, then check() tests the text content of the node
+ *   (and its children) against the pattern. If the pattern does not match,
+ *   then there is no lint, and check() returns null.
+ *
+ * - If both the selector and pattern match, then check() calls the function
+ *   passing the TraversalState object, the content string for the node, the
+ *   array of nodes returned by the selector match, and the array of strings
+ *   returned by the pattern match. This function can use these arguments to
+ *   implement any kind of lint detection logic it wants. If it determines
+ *   that there is no lint, then it should return null. Otherwise, it should
+ *   return an error message as a string, or an object with `message`, `start`
+ *   and `end` properties. The start and end properties are numbers that mark
+ *   the beginning and end of the problematic content. Note that these numbers
+ *   are relative to the content string passed to the traversal callback, not
+ *   to the entire string that was used to generate the parse tree in the
+ *   first place. TODO(davidflanagan): modify the simple-markdown library to
+ *   have an option to add the text offset of each node to the parse
+ *   tree. This will allows us to pinpoint lint errors within a long string
+ *   of markdown text.
+ *
+ * - If the function returns null, then check() returns null. Otherwise,
+ *   check() returns an object with `rule`, `message`, `start` and `end`
+ *   properties. The value of the `rule` property is the name of the rule,
+ *   which is useful for error reporting purposes.
+ *
+ * The name, severity, selector, pattern and function arguments to the Rule()
+ * constructor are optional, but you may not omit both the selector and the
+ * pattern. If you do not specify a selector, a default selector that matches
+ * any node of type "text" will be used. If you do not specify a pattern, then
+ * any node that matches the selector will be assumed to match the pattern as
+ * well. If you don't pass a function as the fourth argument to the Rule()
+ * constructor, then you must pass an error message string instead. If you do
+ * this, you'll get a default function that unconditionally returns an object
+ * that includes the error message and the start and end indexes of the
+ * portion of the content string that matched the pattern. If you don't pass a
+ * function in the fifth parameter, the rule will be applied in any context.
+ *
+ * One of the design goals of this Rule class is to allow simple lint rules to
+ * be described in JSON files without any JavaScript code. So in addition to
+ * the Rule() constructor, the class also defines a Rule.makeRule() factory
+ * method. This method takes a single object as its argument and expects the
+ * object to have four string properties. The `name` property is passed as the
+ * first argument to the Rule() construtctor.  The optional `selector`
+ * property, if specified, is passed to Selector.parse() and the resulting
+ * Selector object is used as the second argument to Rule().  The optional
+ * `pattern` property is converted to a RegExp before being passed as the
+ * third argument to Rule(). (See Rule.makePattern() for details on the string
+ * to RegExp conversion). Finally, the `message` property specifies an error
+ * message that is passed as the final argument to Rule(). You can also use a
+ * real RegExp as the value of the `pattern` property or define a custom lint
+ * function on the `lint` property instead of setting the `message`
+ * property. Doing either of these things means that your rule description can
+ * no longer be saved in a JSON file, however.
+ *
+ * For example, here are two lint rules defined with Rule.makeRule():
+ *
+ *    let nestedLists = Rule.makeRule({
+ *        name: "nested-lists",
+ *        selector: "list list",
+ *        message: `Nested lists:
+ *    nested lists are hard to read on mobile devices;
+ *    do not use additional indentation.`,
+ *    });
+ *
+ *    let longParagraph = Rule.makeRule({
+ *        name: "long-paragraph",
+ *        selector: "paragraph",
+ *        pattern: /^.{501,}/,
+ *        lint: function(state, content, nodes, match) {
+ *            return `Paragraph too long:
+ *    This paragraph is ${content.length} characters long.
+ *    Shorten it to 500 characters or fewer.`;
+ *        },
+ *    });
+ *
+ * Certain advanced lint rules need additional information about the content
+ * being linted in order to detect lint. For example, a rule to check for
+ * whitespace at the start and end of the URL for an image can't use the
+ * information in the node or content arguments because the markdown parser
+ * strips leading and trailing whitespace when parsing. (Nevertheless, these
+ * spaces have been a practical problem for our content translation process so
+ * in order to check for them, a lint rule needs access to the original
+ * unparsed source text. Similarly, there are various lint rules that check
+ * widget usage. For example, it is easy to write a lint rule to ensure that
+ * images have alt text for images encoded in markdown. But when images are
+ * added to our content via an image widget we also want to be able to check
+ * for alt text. In order to do this, the lint rule needs to be able to look
+ * widgets up by name in the widgets object associated with the parse tree.
+ *
+ * In order to support advanced linting rules like these, the check() method
+ * takes a context object as its optional fourth argument, and passes this
+ * object on to the lint function of each rule. Rules that require extra
+ * context should not assume that they will always get it, and should verify
+ * that the necessary context has been supplied before using it. Currently the
+ * "content" property of the context object is the unparsed source text if
+ * available, and the "widgets" property of the context object is the widget
+ * object associated with that content string in the JSON object that defines
+ * the Perseus article or exercise that is being linted.
+ */
+import {Errors, PerseusError} from "@khanacademy/perseus-error";
+import Selector from "./selector";
+import type {TraversalState, TreeNode} from "./tree-transformer";
+// This represents the type returned by String.match(). It is an
+// array of strings, but also has index:number and input:string properties.
+// TypeScript doesn't handle it well, so we punt and just use any.
+export type PatternMatchType = any;
+// This is the return type of the check() method of a Rule object
+export type RuleCheckReturnType =
+    | {
+          rule: string;
+          message: string;
+          start: number;
+          end: number;
+          severity?: number;
+      }
+    | null
+    | undefined;
+// This is the return type of the lint detection function passed as the 4th
+// argument to the Rule() constructor. It can return null or a string or an
+// object containing a string and two numbers.
+// prettier-ignore
+// (prettier formats this in a way that ka-lint does not like)
+export type LintTesterReturnType = string | {
+    message: string,
+    start: number,
+    end: number
+} | null | undefined;
+export type LintRuleContextObject = any | null | undefined;
+// This is the type of the lint detection function that the Rule() constructor
+// expects as its fourth argument. It is passed the TraversalState object and
+// content string that were passed to check(), and is also passed the array of
+// nodes returned by the selector match and the array of strings returned by
+// the pattern match. It should return null if no lint is detected or an
+// error message or an object contining an error message.
+export type LintTester = (
+    state: TraversalState,
+    content: string,
+    selectorMatch: ReadonlyArray<TreeNode>,
+    patternMatch: PatternMatchType,
+    context: LintRuleContextObject,
+) => LintTesterReturnType;
+// An optional check to verify whether or not a particular rule should
+// be checked by context. For example, some rules only apply in exercises,
+// and should never be applied to articles. Defaults to true, so if we
+// omit the applies function in a rule, it'll be tested everywhere.
+export type AppliesTester = (context: LintRuleContextObject) => boolean;
+/**
+ * A Rule object describes a Perseus lint rule. See the comment at the top of
+ * this file for detailed description.
+ */
+export default class Rule {
+    name: string; // The name of the rule
+    severity: number; // The severity of the rule
+    selector: Selector; // The specified selector or the DEFAULT_SELECTOR
+    pattern: RegExp | null | undefined; // A regular expression if one was specified
+    lint: LintTester; // The lint-testing function or a default
+    applies: AppliesTester; // Checks to see if we should apply a rule or not
+    message: string | null | undefined; // The error message for use with the default function
+    static DEFAULT_SELECTOR: Selector;
+    // The comment at the top of this file has detailed docs for
+    // this constructor and its arguments
+    constructor(
+        name: string | null | undefined,
+        severity: number | null | undefined,
+        selector: Selector | null | undefined,
+        pattern: RegExp | null | undefined,
+        lint: LintTester | string,
+        applies: AppliesTester,
+    ) {
+        if (!selector && !pattern) {
+            throw new PerseusError(
+                "Lint rules must have a selector or pattern",
+                Errors.InvalidInput,
+                {metadata: {name}},
+            );
+        }
+        this.name = name || "unnamed rule";
+        this.severity = severity || Rule.Severity.BULK_WARNING;
+        this.selector = selector || Rule.DEFAULT_SELECTOR;
+        this.pattern = pattern || null;
+        // If we're called with an error message instead of a function then
+        // use a default function that will return the message.
+        if (typeof lint === "function") {
+            this.lint = lint;
+            this.message = null;
+        } else {
+            this.lint = (...args) => this._defaultLintFunction(...args);
+            this.message = lint;
+        }
+        this.applies =
+            applies ||
+            function () {
+                return true;
+            };
+    }
+    // A factory method for use with rules described in JSON files
+    // See the documentation at the start of this file for details.
+    static makeRule(options: any): Rule {
+        return new Rule(
+            options.name,
+            options.severity,
+            options.selector ? Selector.parse(options.selector) : null,
+            Rule.makePattern(options.pattern),
+            options.lint || options.message,
+            options.applies,
+        );
+    }
+    // Check the node n to see if it violates this lint rule.  A return value
+    // of false means there is no lint.  A returned object indicates a lint
+    // error. See the documentation at the top of this file for details.
+    check(
+        node: TreeNode,
+        traversalState: TraversalState,
+        content: string,
+        context: LintRuleContextObject,
+    ): RuleCheckReturnType {
+        // First, see if we match the selector.
+        // If no selector was passed to the constructor, we use a
+        // default selector that matches text nodes.
+        const selectorMatch = this.selector.match(traversalState);
+        // If the selector did not match, then we're done
+        if (!selectorMatch) {
+            return null;
+        }
+        // If the selector matched, then see if the pattern matches
+        let patternMatch;
+        if (this.pattern) {
+            patternMatch = content.match(this.pattern);
+        } else {
+            // If there is no pattern, then just match all of the content.
+            // Use a fake RegExp match object to represent this default match.
+            patternMatch = Rule.FakePatternMatch(content, content, 0);
+        }
+        // If there was a pattern and it didn't match, then we're done
+        if (!patternMatch) {
+            return null;
+        }
+        try {
+            // If we get here, then the selector and pattern have matched
+            // so now we call the lint function to see if there is lint.
+            const error = this.lint(
+                traversalState,
+                content,
+                selectorMatch,
+                patternMatch,
+                context,
+            );
+            if (!error) {
+                return null; // No lint; we're done
+            }
+            if (typeof error === "string") {
+                // If the lint function returned a string we assume it
+                // applies to the entire content of the node and return it.
+                return {
+                    rule: this.name,
+                    severity: this.severity,
+                    message: error,
+                    start: 0,
+                    end: content.length,
+                };
+            }
+            // If the lint function returned an object, then we just
+            // add the rule name to the message, start and end.
+            return {
+                rule: this.name,
+                severity: this.severity,
+                message: error.message,
+                start: error.start,
+                end: error.end,
+            };
+        } catch (e: any) {
+            // If the lint function threw an exception we handle that as
+            // a special type of lint. We want the user to see the lint
+            // warning in this case (even though it is out of their control)
+            // so that the bug gets reported. Otherwise we'd never know that
+            // a rule was failing.
+            return {
+                rule: "lint-rule-failure",
+                message: `Exception in rule ${this.name}: ${e.message}
+Stack trace:
+${e.stack}`,
+                start: 0,
+                end: content.length,
+            };
+        }
+    }
+    // This internal method is the default lint function that we use when a
+    // rule is defined without a function. This is useful for rules where the
+    // selector and/or pattern match are enough to indicate lint. This
+    // function unconditionally returns the error message that was passed in
+    // place of a function, but also adds start and end properties that
+    // specify which particular portion of the node content matched the
+    // pattern.
+    _defaultLintFunction(
+        state: TraversalState,
+        content: string,
+        selectorMatch: ReadonlyArray<TreeNode>,
+        patternMatch: PatternMatchType,
+        context: LintRuleContextObject,
+    ): {
+        end: number;
+        message: string;
+        start: number;
+    } {
+        return {
+            message: this.message || "",
+            start: patternMatch.index,
+            end: patternMatch.index + patternMatch[0].length,
+        };
+    }
+    // The makeRule() factory function uses this static method to turn its
+    // argument into a RegExp. If the argument is already a RegExp, we just
+    // return it. Otherwise, we compile it into a RegExp and return that.
+    // The reason this is necessary is that Rule.makeRule() is designed for
+    // use with data from JSON files and JSON files can't include RegExp
+    // literals. Strings passed to this function do not need to be delimited
+    // with / characters unless you want to include flags for the RegExp.
+    //
+    // Examples:
+    //
+    //   input ""        ==> output null
+    //   input /foo/     ==> output /foo/
+    //   input "foo"     ==> output /foo/
+    //   input "/foo/i"  ==> output /foo/i
+    //
+    static makePattern(
+        pattern?: RegExp | string | null,
+    ): RegExp | null | undefined {
+        if (!pattern) {
+            return null;
+        }
+        if (pattern instanceof RegExp) {
+            return pattern;
+        }
+        if (pattern[0] === "/") {
+            const lastSlash = pattern.lastIndexOf("/");
+            const expression = pattern.substring(1, lastSlash);
+            const flags = pattern.substring(lastSlash + 1);
+            // @ts-expect-error - TS2713 - Cannot access 'RegExp.flags' because 'RegExp' is a type, but not a namespace. Did you mean to retrieve the type of the property 'flags' in 'RegExp' with 'RegExp["flags"]'?
+            return new RegExp(expression, flags as RegExp.flags);
+        }
+        return new RegExp(pattern);
+    }
+    // This static method returns an string array with index and input
+    // properties added, in order to simulate the return value of the
+    // String.match() method. We use it when a Rule has no pattern and we
+    // want to simulate a match on the entire content string.
+    static FakePatternMatch(
+        input: string,
+        match: string | null | undefined,
+        index: number,
+    ): PatternMatchType {
+        const result: any = [match];
+        result.index = index;
+        result.input = input;
+        return result;
+    }
+    static Severity: {
+        BULK_WARNING: number;
+        ERROR: number;
+        GUIDELINE: number;
+        WARNING: number;
+    } = {
+        ERROR: 1,
+        WARNING: 2,
+        GUIDELINE: 3,
+        BULK_WARNING: 4,
+    };
+}
+Rule.DEFAULT_SELECTOR = Selector.parse("text");

package/src/rules/absolute-url.ts ADDED Viewed

@@ -0,0 +1,23 @@
+import Rule from "../rule";
+import {getHostname} from "./lint-utils";
+export default Rule.makeRule({
+    name: "absolute-url",
+    severity: Rule.Severity.GUIDELINE,
+    selector: "link, image",
+    lint: function (state, content, nodes, match) {
+        const url = nodes[0].target;
+        const hostname = getHostname(url);
+        if (
+            hostname === "khanacademy.org" ||
+            hostname.endsWith(".khanacademy.org")
+        ) {
+            return `Don't use absolute URLs:
+When linking to KA content or images, omit the
+https://www.khanacademy.org URL prefix.
+Use a relative URL beginning with / instead.`;
+        }
+    },
+}) as Rule;

package/src/rules/all-rules.ts ADDED Viewed

@@ -0,0 +1,71 @@
+// TODO(davidflanagan):
+// This should probably be converted to use import and to export
+// and object that maps rule names to rules. Also, maybe this should
+// be an auto-generated file with a script that updates it any time
+// we add a new rule?
+import AbsoluteUrl from "./absolute-url";
+import BlockquotedMath from "./blockquoted-math";
+import BlockquotedWidget from "./blockquoted-widget";
+import DoubleSpacingAfterTerminal from "./double-spacing-after-terminal";
+import ExtraContentSpacing from "./extra-content-spacing";
+import HeadingLevel1 from "./heading-level-1";
+import HeadingLevelSkip from "./heading-level-skip";
+import HeadingSentenceCase from "./heading-sentence-case";
+import HeadingTitleCase from "./heading-title-case";
+import ImageAltText from "./image-alt-text";
+import ImageInTable from "./image-in-table";
+import ImageSpacesAroundUrls from "./image-spaces-around-urls";
+import ImageWidget from "./image-widget";
+import LinkClickHere from "./link-click-here";
+import LongParagraph from "./long-paragraph";
+import MathAdjacent from "./math-adjacent";
+import MathAlignExtraBreak from "./math-align-extra-break";
+import MathAlignLinebreaks from "./math-align-linebreaks";
+import MathEmpty from "./math-empty";
+import MathFontSize from "./math-font-size";
+import MathFrac from "./math-frac";
+import MathNested from "./math-nested";
+import MathStartsWithSpace from "./math-starts-with-space";
+import MathTextEmpty from "./math-text-empty";
+import MathWithoutDollars from "./math-without-dollars";
+import NestedLists from "./nested-lists";
+import Profanity from "./profanity";
+import TableMissingCells from "./table-missing-cells";
+import UnbalancedCodeDelimiters from "./unbalanced-code-delimiters";
+import UnescapedDollar from "./unescaped-dollar";
+import WidgetInTable from "./widget-in-table";
+export default [
+    AbsoluteUrl,
+    BlockquotedMath,
+    BlockquotedWidget,
+    DoubleSpacingAfterTerminal,
+    ExtraContentSpacing,
+    HeadingLevel1,
+    HeadingLevelSkip,
+    HeadingSentenceCase,
+    HeadingTitleCase,
+    ImageAltText,
+    ImageInTable,
+    LinkClickHere,
+    LongParagraph,
+    MathAdjacent,
+    MathAlignExtraBreak,
+    MathAlignLinebreaks,
+    MathEmpty,
+    MathFontSize,
+    MathFrac,
+    MathNested,
+    MathStartsWithSpace,
+    MathTextEmpty,
+    NestedLists,
+    TableMissingCells,
+    UnescapedDollar,
+    WidgetInTable,
+    Profanity,
+    MathWithoutDollars,
+    UnbalancedCodeDelimiters,
+    ImageSpacesAroundUrls,
+    ImageWidget,
+];

package/src/rules/blockquoted-math.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import Rule from "../rule";
+export default Rule.makeRule({
+    name: "blockquoted-math",
+    severity: Rule.Severity.WARNING,
+    selector: "blockQuote math, blockQuote blockMath",
+    message: `Blockquoted math:
+math should not be indented.`,
+}) as Rule;

package/src/rules/blockquoted-widget.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import Rule from "../rule";
+export default Rule.makeRule({
+    name: "blockquoted-widget",
+    severity: Rule.Severity.WARNING,
+    selector: "blockQuote widget",
+    message: `Blockquoted widget:
+widgets should not be indented.`,
+}) as Rule;

package/src/rules/double-spacing-after-terminal.ts ADDED Viewed

@@ -0,0 +1,11 @@
+/* eslint-disable no-useless-escape */
+import Rule from "../rule";
+export default Rule.makeRule({
+    name: "double-spacing-after-terminal",
+    severity: Rule.Severity.BULK_WARNING,
+    selector: "paragraph",
+    pattern: /[.!\?] {2}/i,
+    message: `Use a single space after a sentence-ending period, or
+any other kind of terminal punctuation.`,
+}) as Rule;

package/src/rules/extra-content-spacing.ts ADDED Viewed

@@ -0,0 +1,11 @@
+import Rule from "../rule";
+export default Rule.makeRule({
+    name: "extra-content-spacing",
+    selector: "paragraph",
+    pattern: /\s+$/,
+    applies: function (context) {
+        return context.contentType === "article";
+    },
+    message: `No extra whitespace at the end of content blocks.`,
+}) as Rule;

package/src/rules/heading-level-1.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import Rule from "../rule";
+export default Rule.makeRule({
+    name: "heading-level-1",
+    severity: Rule.Severity.WARNING,
+    selector: "heading",
+    lint: function (state, content, nodes, match) {
+        if (nodes[0].level === 1) {
+            return `Don't use level-1 headings:
+Begin headings with two or more # characters.`;
+        }
+    },
+}) as Rule;

package/src/rules/heading-level-skip.ts ADDED Viewed

@@ -0,0 +1,19 @@
+import Rule from "../rule";
+export default Rule.makeRule({
+    name: "heading-level-skip",
+    severity: Rule.Severity.WARNING,
+    selector: "heading ~ heading",
+    lint: function (state, content, nodes, match) {
+        const currentHeading = nodes[1];
+        const previousHeading = nodes[0];
+        // A heading can have a level less than, the same as
+        // or one more than the previous heading. But going up
+        // by 2 or more levels is not right
+        if (currentHeading.level > previousHeading.level + 1) {
+            return `Skipped heading level:
+this heading is level ${currentHeading.level} but
+the previous heading was level ${previousHeading.level}`;
+        }
+    },
+}) as Rule;

package/src/rules/heading-sentence-case.ts ADDED Viewed

@@ -0,0 +1,10 @@
+import Rule from "../rule";
+export default Rule.makeRule({
+    name: "heading-sentence-case",
+    severity: Rule.Severity.GUIDELINE,
+    selector: "heading",
+    pattern: /^\W*[a-z]/, // first letter is lowercase
+    message: `First letter is lowercase:
+the first letter of a heading should be capitalized.`,
+}) as Rule;

package/src/rules/heading-title-case.ts ADDED Viewed

@@ -0,0 +1,68 @@
+import Rule from "../rule";
+// These are 3-letter and longer words that we would not expect to be
+// capitalized even in a title-case heading.  See
+// http://blog.apastyle.org/apastyle/2012/03/title-case-and-sentence-case-capitalization-in-apa-style.html
+const littleWords = {
+    and: true,
+    nor: true,
+    but: true,
+    the: true,
+    for: true,
+} as const;
+function isCapitalized(word: any) {
+    const c = word[0];
+    return c === c.toUpperCase();
+}
+export default Rule.makeRule({
+    name: "heading-title-case",
+    severity: Rule.Severity.GUIDELINE,
+    selector: "heading",
+    pattern: /[^\s:]\s+[A-Z]+[a-z]/,
+    locale: "en",
+    lint: function (state, content, nodes, match) {
+        // We want to assert that heading text is in sentence case, not
+        // title case. The pattern above requires a capital letter at the
+        // start of the heading and allows them after a colon, or in
+        // acronyms that are all capitalized.
+        //
+        // But we can't warn just because the pattern matched because
+        // proper nouns are also allowed bo be capitalized. We're not
+        // going to do dictionary lookup to check for proper nouns, so
+        // we try a heuristic: if the title is more than 3 words long
+        // and if all the words are capitalized or are on the list of
+        // words that don't get capitalized, then we'll assume that
+        // the heading is incorrectly in title case and will warn.
+        // But if there is at least one non-capitalized long word then
+        // we're not in title case and we should not warn.
+        //
+        // TODO(davidflanagan): if this rule causes a lot of false
+        // positives, we should tweak it or remove it. Note that it will
+        // fail for headings like "World War II in Russia"
+        //
+        // TODO(davidflanagan): This rule is specific to English.
+        // It is marked with a locale property above, but that is NYI
+        //
+        // for APA style rules for title case
+        const heading = content.trim();
+        let words = heading.split(/\s+/);
+        // Remove the first word and the little words
+        words.shift();
+        words = words.filter(
+            // eslint-disable-next-line no-prototype-builtins
+            (w) => w.length > 2 && !littleWords.hasOwnProperty(w),
+        );
+        // If there are at least 3 remaining words and all
+        // are capitalized, then the heading is in title case.
+        if (words.length >= 3 && words.every((w) => isCapitalized(w))) {
+            return `Title-case heading:
+This heading appears to be in title-case, but should be sentence-case.
+Only capitalize the first letter and proper nouns.`;
+        }
+    },
+}) as Rule;