ilib-lint 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,15 @@
1
1
  Release Notes
2
2
  =============
3
3
 
4
+ ### v2.3.0
5
+
6
+ - implemented the XML match rule. If there are XML tags and entities in the
7
+ source, then the translations must match. The order of XML tags can change,
8
+ as the grammar of other languages might require that, but the number and
9
+ type of XML tags must match or an error will recorded.
10
+ - this rule will also record an error if the XML in the source is
11
+ well-formed, but the XML in the translation is not
12
+
4
13
  ### v2.2.1
5
14
 
6
15
  - fixed the output from the LintableFile class so that if there is only one
@@ -0,0 +1,58 @@
1
+ # resource-xml
2
+
3
+ If the source string contains XML-like tags, then the translation must contain
4
+ the same tags. The tags themselves may be reordered or nested differently than
5
+ in the source, but:
6
+
7
+ - they should include the same number of tags
8
+ - the tags should have the same name as ones in the source
9
+ - the XML must be well-formed. That is, tags are nested properly and every
10
+ open tag has a corresponding closing tag
11
+ - unnamed tags such as `<>` and `</>` are not allowed
12
+
13
+ Self closing tags such as `<p/>` are allowed.
14
+
15
+ Example of correctly matched tags in a German translation:
16
+
17
+ - source: `You must <b>wait</b> for the <a href="url">job</a>.`
18
+ - target: `Sie müssen auf den <a href="url">Job</a> <b>warten</b>.`
19
+
20
+ Example of incorrectly matched tags in a German translation:
21
+
22
+ - source: `You must <b>wait</b> for the <a href="url">job</a>.`
23
+ - target: `Sie <b>müssen</c> auf den <a href="url">Job</a> <c>warten</c>.`
24
+
25
+ Problems in the above translation:
26
+
27
+ 1. The `<b>` tag has a closing `</c>` tag making it is not well-formed
28
+ 2. The number of tags is different than the source
29
+ 3. The names of tags are different than the source
30
+
31
+ ## Exceptions for HTML Tags
32
+
33
+ HTML4 tags that are commonly written without a closing tag are allowed.
34
+ The code first checks if the tags are well-formed already. If not, then it
35
+ treats these HTML tags as if they were a self-closing tag without having
36
+ the trailing slash inside the angle brackets.
37
+
38
+ Example: `<p>` (start paragraph) is treated as it is were `<p/>`
39
+
40
+ Here is the list of HTML4 tags that are treated as if they were self-closing
41
+ if they are not well-formed:
42
+
43
+ - `<area>`
44
+ - `<base>`
45
+ - `<bdi>`
46
+ - `<bdo>`
47
+ - `<br>`
48
+ - `<embed>`
49
+ - `<hr>`
50
+ - `<img>`
51
+ - `<input>`
52
+ - `<li>`
53
+ - `<link>`
54
+ - `<option>`
55
+ - `<p>`
56
+ - `<param>`
57
+ - `<source>`
58
+ - `<track>`
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ilib-lint",
3
- "version": "2.2.1",
3
+ "version": "2.3.0",
4
4
  "module": "./src/index.js",
5
5
  "type": "module",
6
6
  "bin": "./src/index.js",
@@ -61,16 +61,16 @@
61
61
  },
62
62
  "devDependencies": {
63
63
  "@tsconfig/node14": "^14.1.2",
64
- "@types/node": "^20.14.10",
64
+ "@types/node": "^14.0.0",
65
65
  "docdash": "^2.0.2",
66
- "ilib-lint-plugin-test": "file:test/ilib-lint-plugin-test",
67
- "ilib-lint-plugin-obsolete": "file:test/ilib-lint-plugin-obsolete",
68
66
  "i18nlint-plugin-test-old": "file:test/i18nlint-plugin-test-old",
67
+ "ilib-lint-plugin-obsolete": "file:test/ilib-lint-plugin-obsolete",
68
+ "ilib-lint-plugin-test": "file:test/ilib-lint-plugin-test",
69
69
  "jest": "^29.7.0",
70
70
  "jsdoc": "^4.0.3",
71
- "jsdoc-to-markdown": "^8.0.1",
71
+ "jsdoc-to-markdown": "^8.0.3",
72
72
  "npm-run-all": "^4.1.5",
73
- "typescript": "^5.5.3"
73
+ "typescript": "^5.5.4"
74
74
  },
75
75
  "dependencies": {
76
76
  "@formatjs/intl": "^2.10.4",
@@ -78,11 +78,12 @@
78
78
  "ilib-lint-common": "^3.0.0",
79
79
  "ilib-locale": "^1.2.2",
80
80
  "ilib-localeinfo": "^1.1.0",
81
- "ilib-tools-common": "^1.10.0",
81
+ "ilib-tools-common": "^1.11.0",
82
82
  "intl-messageformat": "^10.5",
83
83
  "json5": "^2.2.3",
84
84
  "log4js": "^6.9.1",
85
85
  "micromatch": "^4.0.7",
86
- "options-parser": "^0.4.0"
86
+ "options-parser": "^0.4.0",
87
+ "xml-js": "^1.6.11"
87
88
  }
88
89
  }
@@ -61,7 +61,8 @@ class AnsiConsoleFormatter extends Formatter {
61
61
  `;
62
62
 
63
63
  // output ascii terminal escape sequences
64
- output = output.replace(/<e\d><\/e\d>/g, "\u001B[91m \u001B[0m");
64
+ output = output.replace(/<e\d><\/e\d>/g, "\u001B[91m␣\u001B[0m");
65
+ output = output.replace(/<e\d\/>/g, "\u001B[91m␣\u001B[0m");
65
66
  output = output.replace(/<e\d>/g, "\u001B[91m");
66
67
  output = output.replace(/<\/e\d>/g, "\u001B[0m");
67
68
  if (typeof(result.rule.getLink) === 'function' && result.rule.getLink()) {
@@ -36,6 +36,7 @@ import ResourceSourceICUPluralSyntax from '../rules/ResourceSourceICUPluralSynta
36
36
  import ResourceSourceICUPluralParams from '../rules/ResourceSourceICUPluralParams.js';
37
37
  import ResourceSourceICUPluralCategories from '../rules/ResourceSourceICUPluralCategories.js';
38
38
  import ResourceSourceICUUnexplainedParams from '../rules/ResourceSourceICUUnexplainedParams.js';
39
+ import ResourceXML from '../rules/ResourceXML.js';
39
40
 
40
41
  // built-in declarative rules
41
42
  export const regexRules = [
@@ -241,6 +242,7 @@ export const builtInRulesets = {
241
242
  "resource-completeness": true,
242
243
  "resource-no-translation": true,
243
244
  "resource-icu-plurals-translated": true,
245
+ "resource-xml": true,
244
246
 
245
247
  // declarative rules from above
246
248
  "resource-url-match": true,
@@ -252,7 +254,7 @@ export const builtInRulesets = {
252
254
  "resource-no-space-between-double-and-single-byte-character": true,
253
255
  "resource-no-halfwidth-kana-characters": true,
254
256
  "resource-no-double-byte-space": true,
255
- "resource-no-space-with-fullwidth-punctuation": true,
257
+ "resource-no-space-with-fullwidth-punctuation": true
256
258
  },
257
259
 
258
260
  source: {
@@ -310,6 +312,7 @@ class BuiltinPlugin extends Plugin {
310
312
  ResourceSourceICUPluralParams,
311
313
  ResourceSourceICUPluralCategories,
312
314
  ResourceSourceICUUnexplainedParams,
315
+ ResourceXML,
313
316
  ...regexRules
314
317
  ];
315
318
  }
@@ -0,0 +1,244 @@
1
+ /*
2
+ * ResourceXML.js - rule to check that XML in the translations match
3
+ * XML in the source
4
+ *
5
+ * Copyright © 2024 JEDLSoft
6
+ *
7
+ * Licensed under the Apache License, Version 2.0 (the "License");
8
+ * you may not use this file except in compliance with the License.
9
+ * You may obtain a copy of the License at
10
+ *
11
+ * http://www.apache.org/licenses/LICENSE-2.0
12
+ *
13
+ * Unless required by applicable law or agreed to in writing, software
14
+ * distributed under the License is distributed on an "AS IS" BASIS,
15
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ *
17
+ * See the License for the specific language governing permissions and
18
+ * limitations under the License.
19
+ */
20
+
21
+ import { Result } from 'ilib-lint-common';
22
+ import { xml2js } from 'xml-js';
23
+ import { selfClosingTags } from 'ilib-tools-common';
24
+ import ResourceRule from './ResourceRule.js';
25
+
26
+ const htmlTags = Object.keys(selfClosingTags).concat(["p", "li"]);
27
+ const selfClosingRe = new RegExp(`<(${htmlTags.join('|')})>`, "g");
28
+ const endTagRe = new RegExp(`</(${htmlTags.join('|')})>`);
29
+ const unnamedTagRe = /<\/?>/;
30
+
31
+ /**
32
+ * @class Represent an ilib-lint rule.
33
+ */
34
+ class ResourceXML extends ResourceRule {
35
+ /**
36
+ * Make a new rule instance.
37
+ * @constructor
38
+ */
39
+ constructor(options) {
40
+ super(options);
41
+ this.name = "resource-xml";
42
+ this.description = "Ensure that XML in translated resources match the source";
43
+ this.sourceLocale = (options && options.sourceLocale) || "en-US";
44
+ this.link = "https://gihub.com/ilib-js/ilib-lint/blob/main/docs/resource-xml.md";
45
+ }
46
+
47
+ /**
48
+ * @private
49
+ * @param {Node} node a node in the AST
50
+ * @param {Object} elements an object that maps each element found to the number of times it
51
+ * has been found
52
+ */
53
+ countElements(node, elements) {
54
+ if (Array.isArray(node)) {
55
+ for (let i in node) {
56
+ this.countElements(node[i], elements);
57
+ }
58
+ } else {
59
+ if (node.type === "element") {
60
+ if (!elements[node.name]) {
61
+ elements[node.name] = 1;
62
+ } else {
63
+ elements[node.name]++;
64
+ }
65
+ }
66
+ if (node.elements) {
67
+ this.countElements(node.elements, elements);
68
+ }
69
+ }
70
+ }
71
+
72
+ /**
73
+ * @private
74
+ * @param {Node} sourceAst the root of the AST of the source string
75
+ * @param {Node} targetAst the root of the AST of the target string
76
+ * @param {Resource} resource the resource instance where the source
77
+ * and target strings came from
78
+ */
79
+ matchElements(sourceAst, targetAst, resource) {
80
+ // first traverse the source tree looking for elements to count
81
+ let sourceElements = {}, targetElements = {};
82
+ let problems = [];
83
+
84
+ if (sourceAst?.elements?.length > 0) {
85
+ this.countElements(sourceAst?.elements, sourceElements);
86
+ if (targetAst?.elements?.length > 0) {
87
+ this.countElements(targetAst?.elements, targetElements);
88
+ }
89
+
90
+ for (let element in sourceElements) {
91
+ if (!targetElements[element] || sourceElements[element] !== targetElements[element]) {
92
+ let opts = {
93
+ severity: "error",
94
+ rule: this,
95
+ description: `The number of XML <${element}> elements in the target (${targetElements[element] ?? 0}) does not match the number in the source (${sourceElements[element]}).`,
96
+ id: resource.getKey(),
97
+ highlight: `Target: ${resource.getTarget()}<e0/>`,
98
+ pathName: resource.getPath(),
99
+ source: resource.getSource(),
100
+ locale: resource.getTargetLocale()
101
+ };
102
+ problems.push(new Result(opts));
103
+ }
104
+ }
105
+
106
+ for (let element in targetElements) {
107
+ if (!sourceElements[element]) {
108
+ const re = new RegExp(`<(?<tag>\/?${element}\/?)>`, "g");
109
+ const highlight =
110
+ resource.getTarget().replace(re, "<e0><$<tag>></e0>");
111
+ let opts = {
112
+ severity: "error",
113
+ rule: this,
114
+ description: `The XML element <${element}> in the target does not appear in the source.`,
115
+ id: resource.getKey(),
116
+ highlight: `Target: ${highlight}`,
117
+ pathName: resource.getPath(),
118
+ source: resource.getSource(),
119
+ locale: resource.getTargetLocale()
120
+ };
121
+ problems.push(new Result(opts));
122
+ }
123
+ }
124
+ }
125
+
126
+ return problems;
127
+ }
128
+
129
+ /**
130
+ * Sometimes, the xml tags are really html, which has notorious problems
131
+ * with unclosed tags being considered valid, such as the <p> or
132
+ * <br> tags. The xml parser we are using does not recognize html,
133
+ * so we have to convert the unclosed html tags into valid xml before we
134
+ * attempt to parse them. This function does that by making those tags into
135
+ * self-closing tags. <p> becomes <p/>
136
+ *
137
+ * Note that if there is a <p> tag, we have to make sure there is also no
138
+ * </p> in the string as that is valid xml already. We should only convert
139
+ * the <p> tags when there are no </p> tags to go with it.
140
+ *
141
+ * @private
142
+ * @param {string} string the string to convert
143
+ * @returns {string}
144
+ */
145
+ convertUnclosedTags(string) {
146
+ let converted = string;
147
+
148
+ if (!endTagRe.test(string)) {
149
+ converted = string.replace(selfClosingRe, "<$1/>");
150
+ }
151
+ return converted;
152
+ }
153
+
154
+ /**
155
+ * @override
156
+ */
157
+ matchString({source, target, resource}) {
158
+ if (!target) return; // can't check "nothing" !
159
+ let srcObj, tgtObj;
160
+ let problems = [];
161
+ const prefix = '<?xml version="1.0" encoding="UTF-8"?><root>';
162
+ const suffix = '</root>';
163
+
164
+ // convert html tags to valid xml tags and wrap the strings with a prefix
165
+ // and suffix so that it forms a whole xml document before we attempt to
166
+ // call the parser on them
167
+ const wrappedSource = `${prefix}${this.convertUnclosedTags(source)}${suffix}`;
168
+ const wrappedTarget = `${prefix}${this.convertUnclosedTags(target)}${suffix}`;
169
+
170
+ // First, check the source string for problems. If there are any,
171
+ // don't even bother checking the target string for problems because
172
+ // we don't even know if they are valid problems. The translators may
173
+ // just have echoed the problems already in the source. There will be
174
+ // another rule that checks the well-formedness of the source string
175
+ // for the engineers to fix. It is not the job of this rule to report
176
+ // on the well-formedness of the source.
177
+ try {
178
+ srcObj = xml2js(wrappedSource, {
179
+ trim: false
180
+ });
181
+ } catch (e) {
182
+ // source is not well-formed, so don't even
183
+ // attempt to parse the target! Just bail.
184
+ return undefined;
185
+ }
186
+
187
+ try {
188
+ // Second, tags that have no name are a special type of un-well-formedness
189
+ // that we want to call out separately. If the target contains them, the
190
+ // xml2js parser below will find it, but it will show as an unclosed tag error.
191
+ // While that is true, it's a poor error message that doesn't help the
192
+ // translators fix the real problem, which is the unnamed tag.
193
+ if (unnamedTagRe.test(target)) {
194
+ const highlight =
195
+ target.replace(/(<\/?>)/g, "<e0>$1</e0>");
196
+ let opts = {
197
+ severity: "error",
198
+ rule: this,
199
+ description: `Empty XML elements <> and </> are not allowed in the target.`,
200
+ id: resource.getKey(),
201
+ highlight: `Target: ${highlight}`,
202
+ pathName: resource.getPath(),
203
+ source: resource.getSource(),
204
+ locale: resource.getTargetLocale()
205
+ };
206
+ problems.push(new Result(opts));
207
+ }
208
+
209
+ // Third, parse the target string for well-formedness. If it does not parse properly,
210
+ // it throws the exception handled below
211
+ tgtObj = xml2js(wrappedTarget, {
212
+ trim: false
213
+ });
214
+
215
+ // And finally match the xml elements/tags from the source to the target
216
+ problems = problems.concat(this.matchElements(srcObj, tgtObj, resource));
217
+ } catch (e) {
218
+ const lines = e.message.split(/\n/g);
219
+ // find the column number in the 3rd line of the exception message and subtract off
220
+ // the length of the prefix text we added in wrappedTarget
221
+ const column = parseInt(lines[2].substring(8)) - prefix.length;
222
+ // create the highlight, but make sure to escape any less than characters so that
223
+ // it does not conflict with the highlight
224
+ const highlight = column >= target.length ?
225
+ target + '<e0/>' :
226
+ target.substring(0, column) + '<e0>' + target[column] + '</e0>' + target.substring(column+1);
227
+ let opts = {
228
+ severity: "error",
229
+ rule: this,
230
+ description: `XML in translation is not well-formed. Error: ${lines[0]}`,
231
+ id: resource.getKey(),
232
+ highlight: `Target: ${highlight}`,
233
+ pathName: resource.getPath(),
234
+ source: resource.getSource(),
235
+ locale: resource.getTargetLocale()
236
+ };
237
+ problems.push(new Result(opts));
238
+ }
239
+
240
+ return problems.length < 2 ? problems[0] : problems;
241
+ }
242
+ }
243
+
244
+ export default ResourceXML;