massbank 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/validation/rules/accession-match-rule.d.ts.map +1 -1
- package/lib/validation/rules/accession-match-rule.js +16 -3
- package/lib/validation/rules/accession-match-rule.js.map +1 -1
- package/lib/validation/rules/non-standard-chars-rule.d.ts.map +1 -1
- package/lib/validation/rules/non-standard-chars-rule.js +17 -1
- package/lib/validation/rules/non-standard-chars-rule.js.map +1 -1
- package/lib/validation/rules/serialization-rule.d.ts.map +1 -1
- package/lib/validation/rules/serialization-rule.js +24 -1
- package/lib/validation/rules/serialization-rule.js.map +1 -1
- package/lib/validation/rules/unrecognized-field-rule.d.ts +9 -0
- package/lib/validation/rules/unrecognized-field-rule.d.ts.map +1 -1
- package/lib/validation/rules/unrecognized-field-rule.js +63 -1
- package/lib/validation/rules/unrecognized-field-rule.js.map +1 -1
- package/lib/validator/validator.d.ts.map +1 -1
- package/lib/validator/validator.js +2 -1
- package/lib/validator/validator.js.map +1 -1
- package/package.json +1 -1
- package/src/validation/rules/accession-match-rule.ts +21 -4
- package/src/validation/rules/non-standard-chars-rule.ts +21 -2
- package/src/validation/rules/serialization-rule.ts +29 -2
- package/src/validation/rules/unrecognized-field-rule.ts +71 -1
- package/src/validator/validator.ts +3 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"accession-match-rule.d.ts","sourceRoot":"","sources":["../../../src/validation/rules/accession-match-rule.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"accession-match-rule.d.ts","sourceRoot":"","sources":["../../../src/validation/rules/accession-match-rule.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,KAAK,EAAE,eAAe,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACzE,OAAO,KAAK,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAqB/E;;;;GAIG;AACH,qBAAa,kBAAmB,YAAW,eAAe;IACxD,QAAQ,CACN,MAAM,EAAE,MAAM,EACd,aAAa,EAAE,MAAM,EACrB,QAAQ,EAAE,MAAM,EAEhB,QAAQ,EAAE,qBAAqB,GAC9B,eAAe,EAAE;IAgBpB,WAAW,IAAI,iBAAiB,EAAE;CAGnC"}
|
|
@@ -1,4 +1,17 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Get the filename from a path (browser-compatible alternative to path.basename)
|
|
3
|
+
*/
|
|
4
|
+
function getBasename(filepath) {
|
|
5
|
+
const lastSlash = Math.max(filepath.lastIndexOf('/'), filepath.lastIndexOf('\\'));
|
|
6
|
+
return lastSlash >= 0 ? filepath.slice(lastSlash + 1) : filepath;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Remove the extension from a filename (browser-compatible alternative to path.extname)
|
|
10
|
+
*/
|
|
11
|
+
function removeExtension(filename) {
|
|
12
|
+
const lastDot = filename.lastIndexOf('.');
|
|
13
|
+
return lastDot > 0 ? filename.slice(0, lastDot) : filename;
|
|
14
|
+
}
|
|
2
15
|
/**
|
|
3
16
|
* Validates that ACCESSION matches the filename
|
|
4
17
|
* Only validates ACCESSION-filename matching.
|
|
@@ -9,12 +22,12 @@ export class AccessionMatchRule {
|
|
|
9
22
|
// eslint-disable-next-line @typescript-eslint/no-unused-vars
|
|
10
23
|
_options) {
|
|
11
24
|
const errors = [];
|
|
12
|
-
const basename =
|
|
25
|
+
const basename = removeExtension(getBasename(filename));
|
|
13
26
|
if (record.ACCESSION !== basename) {
|
|
14
27
|
errors.push({
|
|
15
28
|
file: filename,
|
|
16
29
|
line: 1,
|
|
17
|
-
message: `ACCESSION ${record.ACCESSION}
|
|
30
|
+
message: `ACCESSION mismatch: File is named '${basename}.txt' but ACCESSION field is '${record.ACCESSION}'. Fix: Either rename the file to '${record.ACCESSION}.txt' or change ACCESSION field to '${basename}'.`,
|
|
18
31
|
type: 'validation',
|
|
19
32
|
});
|
|
20
33
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"accession-match-rule.js","sourceRoot":"","sources":["../../../src/validation/rules/accession-match-rule.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"accession-match-rule.js","sourceRoot":"","sources":["../../../src/validation/rules/accession-match-rule.ts"],"names":[],"mappings":"AAIA;;GAEG;AACH,SAAS,WAAW,CAAC,QAAgB;IACnC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CACxB,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,EACzB,QAAQ,CAAC,WAAW,CAAC,IAAI,CAAC,CAC3B,CAAC;IACF,OAAO,SAAS,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;AACnE,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,QAAgB;IACvC,MAAM,OAAO,GAAG,QAAQ,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC1C,OAAO,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;AAC7D,CAAC;AAED;;;;GAIG;AACH,MAAM,OAAO,kBAAkB;IAC7B,QAAQ,CACN,MAAc,EACd,aAAqB,EACrB,QAAgB;IAChB,6DAA6D;IAC7D,QAA+B;QAE/B,MAAM,MAAM,GAAsB,EAAE,CAAC;QACrC,MAAM,QAAQ,GAAG,eAAe,CAAC,WAAW,CAAC,QAAQ,CAAC,CAAC,CAAC;QAExD,IAAI,MAAM,CAAC,SAAS,KAAK,QAAQ,EAAE,CAAC;YAClC,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,CAAC;gBACP,OAAO,EAAE,sCAAsC,QAAQ,iCAAiC,MAAM,CAAC,SAAS,sCAAsC,MAAM,CAAC,SAAS,uCAAuC,QAAQ,IAAI;gBACjN,IAAI,EAAE,YAAY;aACnB,CAAC,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,WAAW;QACT,OAAO,EAAE,CAAC;IACZ,CAAC;CACF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"non-standard-chars-rule.d.ts","sourceRoot":"","sources":["../../../src/validation/rules/non-standard-chars-rule.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,KAAK,EAAE,eAAe,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACzE,OAAO,KAAK,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAa/E;;;;GAIG;AACH,qBAAa,oBAAqB,YAAW,eAAe;IAC1D,QAAQ,CACN,OAAO,EAAE,MAAM,EACf,aAAa,EAAE,MAAM,EACrB,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,qBAAqB,GAC9B,eAAe,EAAE;IAQpB,WAAW,CACT,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,MAAM,EAEhB,QAAQ,EAAE,qBAAqB,GAC9B,iBAAiB,EAAE;
|
|
1
|
+
{"version":3,"file":"non-standard-chars-rule.d.ts","sourceRoot":"","sources":["../../../src/validation/rules/non-standard-chars-rule.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,KAAK,EAAE,eAAe,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACzE,OAAO,KAAK,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAa/E;;;;GAIG;AACH,qBAAa,oBAAqB,YAAW,eAAe;IAC1D,QAAQ,CACN,OAAO,EAAE,MAAM,EACf,aAAa,EAAE,MAAM,EACrB,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,qBAAqB,GAC9B,eAAe,EAAE;IAQpB,WAAW,CACT,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,MAAM,EAEhB,QAAQ,EAAE,qBAAqB,GAC9B,iBAAiB,EAAE;CA4CvB"}
|
|
@@ -32,11 +32,27 @@ export class NonStandardCharsRule {
|
|
|
32
32
|
const match = NON_STANDARD_CHARS_PATTERN.exec(originalText);
|
|
33
33
|
if (match?.index !== undefined) {
|
|
34
34
|
const { line, column } = PositionUtils.getLineColumn(originalText, match.index);
|
|
35
|
+
const char = match[0] || '';
|
|
36
|
+
const codePoint = char.codePointAt(0);
|
|
37
|
+
const hex = codePoint
|
|
38
|
+
? `U+${codePoint.toString(16).toUpperCase().padStart(4, '0')}`
|
|
39
|
+
: '';
|
|
40
|
+
// Common replacements
|
|
41
|
+
const suggestions = new Map([
|
|
42
|
+
['\u2014', 'Replace em-dash with hyphen'],
|
|
43
|
+
['\u201C', 'Replace fancy opening quote with straight quote (")'],
|
|
44
|
+
['\u201D', 'Replace fancy closing quote with straight quote (")'],
|
|
45
|
+
['\u2018', "Replace fancy opening apostrophe with straight quote (')"],
|
|
46
|
+
['\u2019', "Replace fancy closing apostrophe with straight quote (')"],
|
|
47
|
+
['\u2022', 'Replace bullet point with hyphen (-)'],
|
|
48
|
+
['\u00AE', 'Replace registered symbol with (R)'],
|
|
49
|
+
]);
|
|
50
|
+
const suggestion = suggestions.get(char) || 'Replace with standard ASCII character';
|
|
35
51
|
warnings.push({
|
|
36
52
|
file: filename,
|
|
37
53
|
line,
|
|
38
54
|
column,
|
|
39
|
-
message:
|
|
55
|
+
message: `Non-standard character '${char}' (${hex}) found. ${suggestion}`,
|
|
40
56
|
});
|
|
41
57
|
}
|
|
42
58
|
return warnings;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"non-standard-chars-rule.js","sourceRoot":"","sources":["../../../src/validation/rules/non-standard-chars-rule.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAKtD;;;;;;;GAOG;AACH,MAAM,0BAA0B,GAC9B,yDAAyD,CAAC;AAE5D;;;;GAIG;AACH,MAAM,OAAO,oBAAoB;IAC/B,QAAQ,CACN,OAAe,EACf,aAAqB,EACrB,SAAiB,EACjB,QAA+B;QAE/B,KAAK,OAAO,CAAC;QACb,KAAK,aAAa,CAAC;QACnB,KAAK,SAAS,CAAC;QACf,KAAK,QAAQ,CAAC;QACd,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,WAAW,CACT,MAAc,EACd,YAAoB,EACpB,QAAgB;IAChB,6DAA6D;IAC7D,QAA+B;QAE/B,MAAM,QAAQ,GAAwB,EAAE,CAAC;QAEzC,0BAA0B;QAC1B,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YACtB,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,KAAK,GAAG,0BAA0B,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC5D,IAAI,KAAK,EAAE,KAAK,KAAK,SAAS,EAAE,CAAC;YAC/B,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,aAAa,CAAC,aAAa,CAClD,YAAY,EACZ,KAAK,CAAC,KAAK,CACZ,CAAC;YACF,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,QAAQ;gBACd,IAAI;gBACJ,MAAM;gBACN,OAAO,
|
|
1
|
+
{"version":3,"file":"non-standard-chars-rule.js","sourceRoot":"","sources":["../../../src/validation/rules/non-standard-chars-rule.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAKtD;;;;;;;GAOG;AACH,MAAM,0BAA0B,GAC9B,yDAAyD,CAAC;AAE5D;;;;GAIG;AACH,MAAM,OAAO,oBAAoB;IAC/B,QAAQ,CACN,OAAe,EACf,aAAqB,EACrB,SAAiB,EACjB,QAA+B;QAE/B,KAAK,OAAO,CAAC;QACb,KAAK,aAAa,CAAC;QACnB,KAAK,SAAS,CAAC;QACf,KAAK,QAAQ,CAAC;QACd,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,WAAW,CACT,MAAc,EACd,YAAoB,EACpB,QAAgB;IAChB,6DAA6D;IAC7D,QAA+B;QAE/B,MAAM,QAAQ,GAAwB,EAAE,CAAC;QAEzC,0BAA0B;QAC1B,IAAI,MAAM,CAAC,UAAU,EAAE,CAAC;YACtB,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,MAAM,KAAK,GAAG,0BAA0B,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;QAC5D,IAAI,KAAK,EAAE,KAAK,KAAK,SAAS,EAAE,CAAC;YAC/B,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,aAAa,CAAC,aAAa,CAClD,YAAY,EACZ,KAAK,CAAC,KAAK,CACZ,CAAC;YACF,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC;YACtC,MAAM,GAAG,GAAG,SAAS;gBACnB,CAAC,CAAC,KAAK,SAAS,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;gBAC9D,CAAC,CAAC,EAAE,CAAC;YAEP,sBAAsB;YACtB,MAAM,WAAW,GAAG,IAAI,GAAG,CAAiB;gBAC1C,CAAC,QAAQ,EAAE,6BAA6B,CAAC;gBACzC,CAAC,QAAQ,EAAE,qDAAqD,CAAC;gBACjE,CAAC,QAAQ,EAAE,qDAAqD,CAAC;gBACjE,CAAC,QAAQ,EAAE,0DAA0D,CAAC;gBACtE,CAAC,QAAQ,EAAE,0DAA0D,CAAC;gBACtE,CAAC,QAAQ,EAAE,sCAAsC,CAAC;gBAClD,CAAC,QAAQ,EAAE,oCAAoC,CAAC;aACjD,CAAC,CAAC;YAEH,MAAM,UAAU,GACd,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,uCAAuC,CAAC;YAEnE,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,QAAQ;gBACd,IAAI;gBACJ,MAAM;gBACN,OAAO,EAAE,2BAA2B,IAAI,MAAM,GAAG,YAAY,UAAU,EAAE;aAC1E,CAAC,CAAC;QACL,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;CACF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"serialization-rule.d.ts","sourceRoot":"","sources":["../../../src/validation/rules/serialization-rule.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAE9C,OAAO,KAAK,EAAE,eAAe,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACzE,OAAO,KAAK,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAE/E;;;;GAIG;AACH,qBAAa,iBAAkB,YAAW,eAAe;IACvD,QAAQ,CACN,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,MAAM,EAEhB,QAAQ,EAAE,qBAAqB,GAC9B,eAAe,EAAE;
|
|
1
|
+
{"version":3,"file":"serialization-rule.d.ts","sourceRoot":"","sources":["../../../src/validation/rules/serialization-rule.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAE9C,OAAO,KAAK,EAAE,eAAe,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACzE,OAAO,KAAK,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAE/E;;;;GAIG;AACH,qBAAa,iBAAkB,YAAW,eAAe;IACvD,QAAQ,CACN,MAAM,EAAE,MAAM,EACd,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,MAAM,EAEhB,QAAQ,EAAE,qBAAqB,GAC9B,eAAe,EAAE;IAkEpB,WAAW,IAAI,iBAAiB,EAAE;IAIlC,OAAO,CAAC,mBAAmB;CAY5B"}
|
|
@@ -18,11 +18,34 @@ export class SerializationRule {
|
|
|
18
18
|
const diffPosition = this.findFirstDifference(normalizedOriginal, serialized);
|
|
19
19
|
if (diffPosition !== -1) {
|
|
20
20
|
const { line, column } = PositionUtils.getLineColumn(normalizedOriginal, diffPosition);
|
|
21
|
+
// Get context around the difference for better error message
|
|
22
|
+
const originalLine = normalizedOriginal.split('\n')[line - 1] || '';
|
|
23
|
+
const serializedLine = serialized.split('\n')[line - 1] || '';
|
|
24
|
+
let message = 'File formatting issue detected (round-trip validation failed).';
|
|
25
|
+
// Provide specific guidance based on common issues
|
|
26
|
+
if (originalLine !== serializedLine) {
|
|
27
|
+
if (originalLine.includes(' ') && serializedLine.includes(' ')) {
|
|
28
|
+
message += ' Check for extra spaces or inconsistent spacing.';
|
|
29
|
+
}
|
|
30
|
+
else if (!serializedLine.trim()) {
|
|
31
|
+
message +=
|
|
32
|
+
' This line may contain unrecognized fields that were ignored during parsing.';
|
|
33
|
+
}
|
|
34
|
+
else {
|
|
35
|
+
const expectedPreview = serializedLine.length > 80
|
|
36
|
+
? `${serializedLine.slice(0, 77)}...`
|
|
37
|
+
: serializedLine;
|
|
38
|
+
const foundPreview = originalLine.length > 80
|
|
39
|
+
? `${originalLine.slice(0, 77)}...`
|
|
40
|
+
: originalLine;
|
|
41
|
+
message += ` Expected (${serializedLine.length} chars): "${expectedPreview}" but found (${originalLine.length} chars): "${foundPreview}"`;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
21
44
|
errors.push({
|
|
22
45
|
file: filename,
|
|
23
46
|
line,
|
|
24
47
|
column,
|
|
25
|
-
message
|
|
48
|
+
message,
|
|
26
49
|
type: 'serialization',
|
|
27
50
|
});
|
|
28
51
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"serialization-rule.js","sourceRoot":"","sources":["../../../src/validation/rules/serialization-rule.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAEtD,OAAO,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAI5D;;;;GAIG;AACH,MAAM,OAAO,iBAAiB;IAC5B,QAAQ,CACN,MAAc,EACd,YAAoB,EACpB,QAAgB;IAChB,6DAA6D;IAC7D,QAA+B;QAE/B,MAAM,MAAM,GAAsB,EAAE,CAAC;QAErC,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;YAC3C,6DAA6D;YAC7D,MAAM,kBAAkB,GAAG,YAAY,CAAC,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;YAEhE,wBAAwB;YACxB,MAAM,YAAY,GAAG,IAAI,CAAC,mBAAmB,CAC3C,kBAAkB,EAClB,UAAU,CACX,CAAC;YAEF,IAAI,YAAY,KAAK,CAAC,CAAC,EAAE,CAAC;gBACxB,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,aAAa,CAAC,aAAa,CAClD,kBAAkB,EAClB,YAAY,CACb,CAAC;
|
|
1
|
+
{"version":3,"file":"serialization-rule.js","sourceRoot":"","sources":["../../../src/validation/rules/serialization-rule.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAEtD,OAAO,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAI5D;;;;GAIG;AACH,MAAM,OAAO,iBAAiB;IAC5B,QAAQ,CACN,MAAc,EACd,YAAoB,EACpB,QAAgB;IAChB,6DAA6D;IAC7D,QAA+B;QAE/B,MAAM,MAAM,GAAsB,EAAE,CAAC;QAErC,IAAI,CAAC;YACH,MAAM,UAAU,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;YAC3C,6DAA6D;YAC7D,MAAM,kBAAkB,GAAG,YAAY,CAAC,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;YAEhE,wBAAwB;YACxB,MAAM,YAAY,GAAG,IAAI,CAAC,mBAAmB,CAC3C,kBAAkB,EAClB,UAAU,CACX,CAAC;YAEF,IAAI,YAAY,KAAK,CAAC,CAAC,EAAE,CAAC;gBACxB,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,aAAa,CAAC,aAAa,CAClD,kBAAkB,EAClB,YAAY,CACb,CAAC;gBAEF,6DAA6D;gBAC7D,MAAM,YAAY,GAAG,kBAAkB,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBACpE,MAAM,cAAc,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;gBAE9D,IAAI,OAAO,GACT,gEAAgE,CAAC;gBAEnE,mDAAmD;gBACnD,IAAI,YAAY,KAAK,cAAc,EAAE,CAAC;oBACpC,IAAI,YAAY,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,cAAc,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;wBAChE,OAAO,IAAI,kDAAkD,CAAC;oBAChE,CAAC;yBAAM,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,EAAE,CAAC;wBAClC,OAAO;4BACL,8EAA8E,CAAC;oBACnF,CAAC;yBAAM,CAAC;wBACN,MAAM,eAAe,GACnB,cAAc,CAAC,MAAM,GAAG,EAAE;4BACxB,CAAC,CAAC,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK;4BACrC,CAAC,CAAC,cAAc,CAAC;wBACrB,MAAM,YAAY,GAChB,YAAY,CAAC,MAAM,GAAG,EAAE;4BACtB,CAAC,CAAC,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK;4BACnC,CAAC,CAAC,YAAY,CAAC;wBACnB,OAAO,IAAI,cAAc,cAAc,CAAC,MAAM,aAAa,eAAe,gBAAgB,YAAY,CAAC,MAAM,aAAa,YAAY,GAAG,CAAC;oBAC5I,CAAC;gBACH,CAAC;gBAED,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,QAAQ;oBACd,IAAI;oBACJ,MAAM;oBACN,OAAO;oBACP,IAAI,EAAE,eAAe;iBACtB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,QAAQ;gBACd,OAAO,EAAE,yBAAyB,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,EAAE;gBAC5F,IAAI,EAAE,eAAe;aACtB,CAAC,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,WAAW;QACT,OAAO,EAAE,CAAC;IACZ,CAAC;IAEO,mBAAmB,CAAC,IAAY,EAAE,IAAY;QACpD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACrD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC;gBACxB,OAAO,CAAC,CAAC;YACX,CAAC;QACH,CAAC;QACD,IAAI,IAAI,CAAC,MAAM,KAAK,IAAI,CAAC,MAAM,EAAE,CAAC;YAChC,OAAO,SAAS,CAAC;QACnB,CAAC;QACD,OAAO,CAAC,CAAC,CAAC;IACZ,CAAC;CACF"}
|
|
@@ -12,5 +12,14 @@ export declare class UnrecognizedFieldRule implements IValidationRule {
|
|
|
12
12
|
private readonly recognizedFields;
|
|
13
13
|
validate(_record: Record, _originalText: string, _filename: string, _options?: ValidationRuleOptions): never[];
|
|
14
14
|
getWarnings(_record: Record, originalText: string, filename: string, _options?: ValidationRuleOptions): ValidationWarning[];
|
|
15
|
+
/**
|
|
16
|
+
* Find a similar field name for typo suggestions
|
|
17
|
+
* Uses simple Levenshtein distance for similarity
|
|
18
|
+
*/
|
|
19
|
+
private findSimilarField;
|
|
20
|
+
/**
|
|
21
|
+
* Calculate Levenshtein distance between two strings
|
|
22
|
+
*/
|
|
23
|
+
private levenshteinDistance;
|
|
15
24
|
}
|
|
16
25
|
//# sourceMappingURL=unrecognized-field-rule.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"unrecognized-field-rule.d.ts","sourceRoot":"","sources":["../../../src/validation/rules/unrecognized-field-rule.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACxD,OAAO,KAAK,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAE/E;;;GAGG;AACH,qBAAa,qBAAsB,YAAW,eAAe;IAC3D;;OAEG;IACH,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAsC9B;IAEH,QAAQ,CAEN,OAAO,EAAE,MAAM,EAEf,aAAa,EAAE,MAAM,EAErB,SAAS,EAAE,MAAM,EAEjB,QAAQ,CAAC,EAAE,qBAAqB,GAC/B,KAAK,EAAE;IAKV,WAAW,CACT,OAAO,EAAE,MAAM,EACf,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,MAAM,EAEhB,QAAQ,CAAC,EAAE,qBAAqB,GAC/B,iBAAiB,EAAE;
|
|
1
|
+
{"version":3,"file":"unrecognized-field-rule.d.ts","sourceRoot":"","sources":["../../../src/validation/rules/unrecognized-field-rule.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAC9C,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,gBAAgB,CAAC;AACxD,OAAO,KAAK,EAAE,eAAe,EAAE,qBAAqB,EAAE,MAAM,kBAAkB,CAAC;AAE/E;;;GAGG;AACH,qBAAa,qBAAsB,YAAW,eAAe;IAC3D;;OAEG;IACH,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAsC9B;IAEH,QAAQ,CAEN,OAAO,EAAE,MAAM,EAEf,aAAa,EAAE,MAAM,EAErB,SAAS,EAAE,MAAM,EAEjB,QAAQ,CAAC,EAAE,qBAAqB,GAC/B,KAAK,EAAE;IAKV,WAAW,CACT,OAAO,EAAE,MAAM,EACf,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,MAAM,EAEhB,QAAQ,CAAC,EAAE,qBAAqB,GAC/B,iBAAiB,EAAE;IA0CtB;;;OAGG;IACH,OAAO,CAAC,gBAAgB;IAgBxB;;OAEG;IACH,OAAO,CAAC,mBAAmB;CAkC5B"}
|
|
@@ -74,14 +74,76 @@ export class UnrecognizedFieldRule {
|
|
|
74
74
|
const key = line.slice(0, colonIndex).trim();
|
|
75
75
|
// Check if this is an unrecognized field
|
|
76
76
|
if (!this.recognizedFields.has(key)) {
|
|
77
|
+
// Suggest similar field names for common typos
|
|
78
|
+
// Normalize to uppercase for comparison to catch case errors
|
|
79
|
+
const suggestion = this.findSimilarField(key.toUpperCase());
|
|
80
|
+
let message = `Unrecognized field '${key}'. Not a valid MassBank 2.6.0 field.`;
|
|
81
|
+
if (suggestion) {
|
|
82
|
+
message += ` Did you mean '${suggestion}'?`;
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
message +=
|
|
86
|
+
' Remove this line or check the MassBank format specification.';
|
|
87
|
+
}
|
|
77
88
|
warnings.push({
|
|
78
89
|
file: filename,
|
|
79
90
|
line: i + 1,
|
|
80
|
-
message
|
|
91
|
+
message,
|
|
81
92
|
});
|
|
82
93
|
}
|
|
83
94
|
}
|
|
84
95
|
return warnings;
|
|
85
96
|
}
|
|
97
|
+
/**
|
|
98
|
+
* Find a similar field name for typo suggestions
|
|
99
|
+
* Uses simple Levenshtein distance for similarity
|
|
100
|
+
*/
|
|
101
|
+
findSimilarField(input) {
|
|
102
|
+
let bestMatch = null;
|
|
103
|
+
let bestDistance = Number.POSITIVE_INFINITY;
|
|
104
|
+
for (const field of this.recognizedFields) {
|
|
105
|
+
const distance = this.levenshteinDistance(input, field);
|
|
106
|
+
// Only suggest if very similar (distance <= 2)
|
|
107
|
+
if (distance <= 2 && distance < bestDistance) {
|
|
108
|
+
bestDistance = distance;
|
|
109
|
+
bestMatch = field;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return bestMatch;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Calculate Levenshtein distance between two strings
|
|
116
|
+
*/
|
|
117
|
+
levenshteinDistance(a, b) {
|
|
118
|
+
const matrix = [];
|
|
119
|
+
for (let i = 0; i <= b.length; i++) {
|
|
120
|
+
matrix[i] = [i];
|
|
121
|
+
}
|
|
122
|
+
const firstRow = matrix[0];
|
|
123
|
+
if (firstRow) {
|
|
124
|
+
for (let j = 0; j <= a.length; j++) {
|
|
125
|
+
firstRow[j] = j;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
for (let i = 1; i <= b.length; i++) {
|
|
129
|
+
const currentRow = matrix[i];
|
|
130
|
+
const prevRow = matrix[i - 1];
|
|
131
|
+
if (!currentRow || !prevRow)
|
|
132
|
+
continue;
|
|
133
|
+
for (let j = 1; j <= a.length; j++) {
|
|
134
|
+
if (b.charAt(i - 1) === a.charAt(j - 1)) {
|
|
135
|
+
currentRow[j] = prevRow[j - 1] ?? 0;
|
|
136
|
+
}
|
|
137
|
+
else {
|
|
138
|
+
const substitution = (prevRow[j - 1] ?? 0) + 1;
|
|
139
|
+
const insertion = (currentRow[j - 1] ?? 0) + 1;
|
|
140
|
+
const deletion = (prevRow[j] ?? 0) + 1;
|
|
141
|
+
currentRow[j] = Math.min(substitution, insertion, deletion);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
const lastRow = matrix[b.length];
|
|
146
|
+
return lastRow?.[a.length] ?? Number.POSITIVE_INFINITY;
|
|
147
|
+
}
|
|
86
148
|
}
|
|
87
149
|
//# sourceMappingURL=unrecognized-field-rule.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"unrecognized-field-rule.js","sourceRoot":"","sources":["../../../src/validation/rules/unrecognized-field-rule.ts"],"names":[],"mappings":"AAIA;;;GAGG;AACH,MAAM,OAAO,qBAAqB;IAChC;;OAEG;IACc,gBAAgB,GAAG,IAAI,GAAG,CAAC;QAC1C,gBAAgB;QAChB,WAAW;QACX,cAAc;QACd,MAAM;QACN,SAAS;QACT,SAAS;QACT,WAAW;QACX,aAAa;QACb,SAAS;QACT,SAAS;QACT,YAAY;QACZ,2BAA2B;QAC3B,SAAS;QACT,mBAAmB;QACnB,YAAY;QACZ,eAAe;QACf,WAAW;QACX,UAAU;QACV,SAAS;QACT,8BAA8B;QAC9B,eAAe;QACf,oBAAoB;QACpB,sBAAsB;QACtB,mBAAmB;QACnB,2BAA2B;QAC3B,gBAAgB;QAChB,oBAAoB;QACpB,mBAAmB;QACnB,WAAW;QACX,aAAa;QACb,SAAS;QACT,eAAe;QACf,wBAAwB;QACxB,oBAAoB;QACpB,YAAY;QACZ,SAAS;QACT,WAAW;KACZ,CAAC,CAAC;IAEH,QAAQ;IACN,6DAA6D;IAC7D,OAAe;IACf,6DAA6D;IAC7D,aAAqB;IACrB,6DAA6D;IAC7D,SAAiB;IACjB,6DAA6D;IAC7D,QAAgC;QAEhC,+CAA+C;QAC/C,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,WAAW,CACT,OAAe,EACf,YAAoB,EACpB,QAAgB;IAChB,6DAA6D;IAC7D,QAAgC;QAEhC,MAAM,QAAQ,GAAwB,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;YAC9B,IAAI,CAAC,IAAI,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBACpD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACrC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;gBACtB,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC;YAE7C,yCAAyC;YACzC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;gBACpC,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,QAAQ;oBACd,IAAI,EAAE,CAAC,GAAG,CAAC;oBACX,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"unrecognized-field-rule.js","sourceRoot":"","sources":["../../../src/validation/rules/unrecognized-field-rule.ts"],"names":[],"mappings":"AAIA;;;GAGG;AACH,MAAM,OAAO,qBAAqB;IAChC;;OAEG;IACc,gBAAgB,GAAG,IAAI,GAAG,CAAC;QAC1C,gBAAgB;QAChB,WAAW;QACX,cAAc;QACd,MAAM;QACN,SAAS;QACT,SAAS;QACT,WAAW;QACX,aAAa;QACb,SAAS;QACT,SAAS;QACT,YAAY;QACZ,2BAA2B;QAC3B,SAAS;QACT,mBAAmB;QACnB,YAAY;QACZ,eAAe;QACf,WAAW;QACX,UAAU;QACV,SAAS;QACT,8BAA8B;QAC9B,eAAe;QACf,oBAAoB;QACpB,sBAAsB;QACtB,mBAAmB;QACnB,2BAA2B;QAC3B,gBAAgB;QAChB,oBAAoB;QACpB,mBAAmB;QACnB,WAAW;QACX,aAAa;QACb,SAAS;QACT,eAAe;QACf,wBAAwB;QACxB,oBAAoB;QACpB,YAAY;QACZ,SAAS;QACT,WAAW;KACZ,CAAC,CAAC;IAEH,QAAQ;IACN,6DAA6D;IAC7D,OAAe;IACf,6DAA6D;IAC7D,aAAqB;IACrB,6DAA6D;IAC7D,SAAiB;IACjB,6DAA6D;IAC7D,QAAgC;QAEhC,+CAA+C;QAC/C,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,WAAW,CACT,OAAe,EACf,YAAoB,EACpB,QAAgB;IAChB,6DAA6D;IAC7D,QAAgC;QAEhC,MAAM,QAAQ,GAAwB,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEvC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;YAC9B,IAAI,CAAC,IAAI,IAAI,IAAI,KAAK,IAAI,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBACpD,SAAS;YACX,CAAC;YAED,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YACrC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;gBACtB,SAAS;YACX,CAAC;YAED,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC;YAE7C,yCAAyC;YACzC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;gBACpC,+CAA+C;gBAC/C,6DAA6D;gBAC7D,MAAM,UAAU,GAAG,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC;gBAC5D,IAAI,OAAO,GAAG,uBAAuB,GAAG,sCAAsC,CAAC;gBAE/E,IAAI,UAAU,EAAE,CAAC;oBACf,OAAO,IAAI,kBAAkB,UAAU,IAAI,CAAC;gBAC9C,CAAC;qBAAM,CAAC;oBACN,OAAO;wBACL,+DAA+D,CAAC;gBACpE,CAAC;gBAED,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,QAAQ;oBACd,IAAI,EAAE,CAAC,GAAG,CAAC;oBACX,OAAO;iBACR,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;;OAGG;IACK,gBAAgB,CAAC,KAAa;QACpC,IAAI,SAAS,GAAkB,IAAI,CAAC;QACpC,IAAI,YAAY,GAAG,MAAM,CAAC,iBAAiB,CAAC;QAE5C,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC1C,MAAM,QAAQ,GAAG,IAAI,CAAC,mBAAmB,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YACxD,+CAA+C;YAC/C,IAAI,QAAQ,IAAI,CAAC,IAAI,QAAQ,GAAG,YAAY,EAAE,CAAC;gBAC7C,YAAY,GAAG,QAAQ,CAAC;gBACxB,SAAS,GAAG,KAAK,CAAC;YACpB,CAAC;QACH,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;IAED;;OAEG;IACK,mBAAmB,CAAC,CAAS,EAAE,CAAS;QAC9C,MAAM,MAAM,GAAe,EAAE,CAAC;QAE9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAC3B,IAAI,QAAQ,EAAE,CAAC;YACb,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACnC,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YAClB,CAAC;QACH,CAAC;QAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,MAAM,UAAU,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YAC7B,MAAM,OAAO,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAC9B,IAAI,CAAC,UAAU,IAAI,CAAC,OAAO;gBAAE,SAAS;YAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACnC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;oBACxC,UAAU,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC;gBACtC,CAAC;qBAAM,CAAC;oBACN,MAAM,YAAY,GAAG,CAAC,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;oBAC/C,MAAM,SAAS,GAAG,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;oBAC/C,MAAM,QAAQ,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;oBACvC,UAAU,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,YAAY,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;gBAC9D,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC;QACjC,OAAO,OAAO,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,iBAAiB,CAAC;IACzD,CAAC;CACF"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validator.d.ts","sourceRoot":"","sources":["../../src/validator/validator.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAEV,iBAAiB,EACjB,gBAAgB,EAEjB,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"validator.d.ts","sourceRoot":"","sources":["../../src/validator/validator.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAEV,iBAAiB,EACjB,gBAAgB,EAEjB,MAAM,aAAa,CAAC;AAGrB;;;;;GAKG;AACH,wBAAsB,QAAQ,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,gBAAgB,CAAC,CAiG3B;AAED;;;;;;;;GAQG;AACH,wBAAsB,eAAe,CACnC,OAAO,EAAE,MAAM,EACf,QAAQ,EAAE,MAAM,EAChB,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,gBAAgB,CAAC,CA0D3B"}
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { ParseException, parseRecord } from '../parser/index.js';
|
|
2
2
|
import { RecordValidator } from '../validation/index.js';
|
|
3
|
-
import { FileUtils } from './file-utils.js';
|
|
4
3
|
/**
|
|
5
4
|
* Validate a single MassBank record file
|
|
6
5
|
* @param filePath - Path to the .txt file
|
|
@@ -15,6 +14,8 @@ export async function validate(filePath, options = {}) {
|
|
|
15
14
|
if (options.logger) {
|
|
16
15
|
options.logger.info(`Validating file: ${filePath}`);
|
|
17
16
|
}
|
|
17
|
+
// Dynamic import to avoid pulling node:fs/promises into browser bundles
|
|
18
|
+
const { FileUtils } = await import('./file-utils.js');
|
|
18
19
|
// Read file
|
|
19
20
|
let fileContent;
|
|
20
21
|
try {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/validator/validator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAOjE,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAEzD
|
|
1
|
+
{"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/validator/validator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAOjE,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAEzD;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,QAAgB,EAChB,UAA6B,EAAE;IAE/B,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,MAAM,QAAQ,GAAwB,EAAE,CAAC;IACzC,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,yBAAyB;IACzB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,oBAAoB,QAAQ,EAAE,CAAC,CAAC;IACtD,CAAC;IAED,wEAAwE;IACxE,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;IAEtD,YAAY;IACZ,IAAI,WAAmB,CAAC;IACxB,IAAI,CAAC;QACH,WAAW,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACnD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;QACzE,OAAO;YACL,OAAO,EAAE,KAAK;YACd,MAAM,EAAE;gBACN;oBACE,IAAI,EAAE,QAAQ;oBACd,OAAO,EAAE,wBAAwB,OAAO,EAAE;oBAC1C,IAAI,EAAE,OAAO;iBACd;aACF;YACD,QAAQ,EAAE,EAAE;YACZ,UAAU,EAAE,EAAE;YACd,cAAc,EAAE,CAAC;SAClB,CAAC;IACJ,CAAC;IAED,mBAAmB;IACnB,IAAI,MAAM,CAAC;IACX,IAAI,CAAC;QACH,MAAM,GAAG,WAAW,CAAC,WAAW,CAAC,CAAC;IACpC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,cAAc,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,KAAK,CAAC,UAAU,CAAC,IAAI;gBAC3B,MAAM,EAAE,KAAK,CAAC,UAAU,CAAC,MAAM;gBAC/B,OAAO,EAAE,KAAK,CAAC,UAAU,CAAC,OAAO;gBACjC,IAAI,EAAE,OAAO;aACd,CAAC,CAAC;YACH,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,MAAM;gBACN,QAAQ;gBACR,UAAU;gBACV,cAAc,EAAE,CAAC;aAClB,CAAC;QACJ,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;IAED,yBAAyB;IACzB,MAAM,eAAe,GAAG,IAAI,eAAe,EAAE,CAAC;IAC9C,MAAM,KAAK,GAAG,eAAe,CAAC,QAAQ,EAAE,CAAC;IAEzC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE;YAC9D,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,MAAM,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;QAE3B,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE;YACnE,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,QAAQ,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;IACjC,CAAC;IAED,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC;IAEpC,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,2BAA2B,QAAQ,EAAE,CAAC,CAAC;QAC7D,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,2BAA2B,QAAQ,SAAS,MAAM,CAAC,MAAM,WAAW,CACrE,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO;QACL,OAAO;QACP,MAAM;QACN,QAAQ;QACR,UAAU;QACV,cAAc,EAAE,CAAC;KAClB,CAAC;AACJ,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,OAAe,EACf,QAAgB,EAChB,UAA6B,EAAE;IAE/B,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,MAAM,QAAQ,GAAwB,EAAE,CAAC;IACzC,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,mBAAmB;IACnB,IAAI,MAAM,CAAC;IACX,IAAI,CAAC;QACH,MAAM,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IAChC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,cAAc,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,KAAK,CAAC,UAAU,CAAC,IAAI;gBAC3B,MAAM,EAAE,KAAK,CAAC,UAAU,CAAC,MAAM;gBAC/B,OAAO,EAAE,KAAK,CAAC,UAAU,CAAC,OAAO;gBACjC,IAAI,EAAE,OAAO;aACd,CAAC,CAAC;YACH,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,MAAM;gBACN,QAAQ;gBACR,UAAU;gBACV,cAAc,EAAE,CAAC;aAClB,CAAC;QACJ,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;IAED,yBAAyB;IACzB,MAAM,eAAe,GAAG,IAAI,eAAe,EAAE,CAAC;IAC9C,MAAM,KAAK,GAAG,eAAe,CAAC,QAAQ,EAAE,CAAC;IAEzC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE;YAC1D,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,MAAM,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;QAE3B,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE;YAC/D,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,QAAQ,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;IACjC,CAAC;IAED,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC;IAEpC,OAAO;QACL,OAAO;QACP,MAAM;QACN,QAAQ;QACR,UAAU;QACV,cAAc,EAAE,CAAC;KAClB,CAAC;AACJ,CAAC"}
|
package/package.json
CHANGED
|
@@ -1,9 +1,26 @@
|
|
|
1
|
-
import path from 'node:path';
|
|
2
|
-
|
|
3
1
|
import type { Record } from '../../record.js';
|
|
4
2
|
import type { ValidationError, ValidationWarning } from '../../types.js';
|
|
5
3
|
import type { IValidationRule, ValidationRuleOptions } from '../interfaces.js';
|
|
6
4
|
|
|
5
|
+
/**
|
|
6
|
+
* Get the filename from a path (browser-compatible alternative to path.basename)
|
|
7
|
+
*/
|
|
8
|
+
function getBasename(filepath: string): string {
|
|
9
|
+
const lastSlash = Math.max(
|
|
10
|
+
filepath.lastIndexOf('/'),
|
|
11
|
+
filepath.lastIndexOf('\\'),
|
|
12
|
+
);
|
|
13
|
+
return lastSlash >= 0 ? filepath.slice(lastSlash + 1) : filepath;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Remove the extension from a filename (browser-compatible alternative to path.extname)
|
|
18
|
+
*/
|
|
19
|
+
function removeExtension(filename: string): string {
|
|
20
|
+
const lastDot = filename.lastIndexOf('.');
|
|
21
|
+
return lastDot > 0 ? filename.slice(0, lastDot) : filename;
|
|
22
|
+
}
|
|
23
|
+
|
|
7
24
|
/**
|
|
8
25
|
* Validates that ACCESSION matches the filename
|
|
9
26
|
* Only validates ACCESSION-filename matching.
|
|
@@ -18,13 +35,13 @@ export class AccessionMatchRule implements IValidationRule {
|
|
|
18
35
|
_options: ValidationRuleOptions,
|
|
19
36
|
): ValidationError[] {
|
|
20
37
|
const errors: ValidationError[] = [];
|
|
21
|
-
const basename =
|
|
38
|
+
const basename = removeExtension(getBasename(filename));
|
|
22
39
|
|
|
23
40
|
if (record.ACCESSION !== basename) {
|
|
24
41
|
errors.push({
|
|
25
42
|
file: filename,
|
|
26
43
|
line: 1,
|
|
27
|
-
message: `ACCESSION ${record.ACCESSION}
|
|
44
|
+
message: `ACCESSION mismatch: File is named '${basename}.txt' but ACCESSION field is '${record.ACCESSION}'. Fix: Either rename the file to '${record.ACCESSION}.txt' or change ACCESSION field to '${basename}'.`,
|
|
28
45
|
type: 'validation',
|
|
29
46
|
});
|
|
30
47
|
}
|
|
@@ -53,12 +53,31 @@ export class NonStandardCharsRule implements IValidationRule {
|
|
|
53
53
|
originalText,
|
|
54
54
|
match.index,
|
|
55
55
|
);
|
|
56
|
+
const char = match[0] || '';
|
|
57
|
+
const codePoint = char.codePointAt(0);
|
|
58
|
+
const hex = codePoint
|
|
59
|
+
? `U+${codePoint.toString(16).toUpperCase().padStart(4, '0')}`
|
|
60
|
+
: '';
|
|
61
|
+
|
|
62
|
+
// Common replacements
|
|
63
|
+
const suggestions = new Map<string, string>([
|
|
64
|
+
['\u2014', 'Replace em-dash with hyphen'],
|
|
65
|
+
['\u201C', 'Replace fancy opening quote with straight quote (")'],
|
|
66
|
+
['\u201D', 'Replace fancy closing quote with straight quote (")'],
|
|
67
|
+
['\u2018', "Replace fancy opening apostrophe with straight quote (')"],
|
|
68
|
+
['\u2019', "Replace fancy closing apostrophe with straight quote (')"],
|
|
69
|
+
['\u2022', 'Replace bullet point with hyphen (-)'],
|
|
70
|
+
['\u00AE', 'Replace registered symbol with (R)'],
|
|
71
|
+
]);
|
|
72
|
+
|
|
73
|
+
const suggestion =
|
|
74
|
+
suggestions.get(char) || 'Replace with standard ASCII character';
|
|
75
|
+
|
|
56
76
|
warnings.push({
|
|
57
77
|
file: filename,
|
|
58
78
|
line,
|
|
59
79
|
column,
|
|
60
|
-
message:
|
|
61
|
-
'Non standard ASCII character found. This might be an error. Please check carefully.',
|
|
80
|
+
message: `Non-standard character '${char}' (${hex}) found. ${suggestion}`,
|
|
62
81
|
});
|
|
63
82
|
}
|
|
64
83
|
|
|
@@ -35,12 +35,39 @@ export class SerializationRule implements IValidationRule {
|
|
|
35
35
|
normalizedOriginal,
|
|
36
36
|
diffPosition,
|
|
37
37
|
);
|
|
38
|
+
|
|
39
|
+
// Get context around the difference for better error message
|
|
40
|
+
const originalLine = normalizedOriginal.split('\n')[line - 1] || '';
|
|
41
|
+
const serializedLine = serialized.split('\n')[line - 1] || '';
|
|
42
|
+
|
|
43
|
+
let message =
|
|
44
|
+
'File formatting issue detected (round-trip validation failed).';
|
|
45
|
+
|
|
46
|
+
// Provide specific guidance based on common issues
|
|
47
|
+
if (originalLine !== serializedLine) {
|
|
48
|
+
if (originalLine.includes(' ') && serializedLine.includes(' ')) {
|
|
49
|
+
message += ' Check for extra spaces or inconsistent spacing.';
|
|
50
|
+
} else if (!serializedLine.trim()) {
|
|
51
|
+
message +=
|
|
52
|
+
' This line may contain unrecognized fields that were ignored during parsing.';
|
|
53
|
+
} else {
|
|
54
|
+
const expectedPreview =
|
|
55
|
+
serializedLine.length > 80
|
|
56
|
+
? `${serializedLine.slice(0, 77)}...`
|
|
57
|
+
: serializedLine;
|
|
58
|
+
const foundPreview =
|
|
59
|
+
originalLine.length > 80
|
|
60
|
+
? `${originalLine.slice(0, 77)}...`
|
|
61
|
+
: originalLine;
|
|
62
|
+
message += ` Expected (${serializedLine.length} chars): "${expectedPreview}" but found (${originalLine.length} chars): "${foundPreview}"`;
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
38
66
|
errors.push({
|
|
39
67
|
file: filename,
|
|
40
68
|
line,
|
|
41
69
|
column,
|
|
42
|
-
message
|
|
43
|
-
'File content differs from generated record string. This might be a code problem. Please Report!',
|
|
70
|
+
message,
|
|
44
71
|
type: 'serialization',
|
|
45
72
|
});
|
|
46
73
|
}
|
|
@@ -89,14 +89,84 @@ export class UnrecognizedFieldRule implements IValidationRule {
|
|
|
89
89
|
|
|
90
90
|
// Check if this is an unrecognized field
|
|
91
91
|
if (!this.recognizedFields.has(key)) {
|
|
92
|
+
// Suggest similar field names for common typos
|
|
93
|
+
// Normalize to uppercase for comparison to catch case errors
|
|
94
|
+
const suggestion = this.findSimilarField(key.toUpperCase());
|
|
95
|
+
let message = `Unrecognized field '${key}'. Not a valid MassBank 2.6.0 field.`;
|
|
96
|
+
|
|
97
|
+
if (suggestion) {
|
|
98
|
+
message += ` Did you mean '${suggestion}'?`;
|
|
99
|
+
} else {
|
|
100
|
+
message +=
|
|
101
|
+
' Remove this line or check the MassBank format specification.';
|
|
102
|
+
}
|
|
103
|
+
|
|
92
104
|
warnings.push({
|
|
93
105
|
file: filename,
|
|
94
106
|
line: i + 1,
|
|
95
|
-
message
|
|
107
|
+
message,
|
|
96
108
|
});
|
|
97
109
|
}
|
|
98
110
|
}
|
|
99
111
|
|
|
100
112
|
return warnings;
|
|
101
113
|
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Find a similar field name for typo suggestions
|
|
117
|
+
* Uses simple Levenshtein distance for similarity
|
|
118
|
+
*/
|
|
119
|
+
private findSimilarField(input: string): string | null {
|
|
120
|
+
let bestMatch: string | null = null;
|
|
121
|
+
let bestDistance = Number.POSITIVE_INFINITY;
|
|
122
|
+
|
|
123
|
+
for (const field of this.recognizedFields) {
|
|
124
|
+
const distance = this.levenshteinDistance(input, field);
|
|
125
|
+
// Only suggest if very similar (distance <= 2)
|
|
126
|
+
if (distance <= 2 && distance < bestDistance) {
|
|
127
|
+
bestDistance = distance;
|
|
128
|
+
bestMatch = field;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return bestMatch;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Calculate Levenshtein distance between two strings
|
|
137
|
+
*/
|
|
138
|
+
private levenshteinDistance(a: string, b: string): number {
|
|
139
|
+
const matrix: number[][] = [];
|
|
140
|
+
|
|
141
|
+
for (let i = 0; i <= b.length; i++) {
|
|
142
|
+
matrix[i] = [i];
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const firstRow = matrix[0];
|
|
146
|
+
if (firstRow) {
|
|
147
|
+
for (let j = 0; j <= a.length; j++) {
|
|
148
|
+
firstRow[j] = j;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
for (let i = 1; i <= b.length; i++) {
|
|
153
|
+
const currentRow = matrix[i];
|
|
154
|
+
const prevRow = matrix[i - 1];
|
|
155
|
+
if (!currentRow || !prevRow) continue;
|
|
156
|
+
|
|
157
|
+
for (let j = 1; j <= a.length; j++) {
|
|
158
|
+
if (b.charAt(i - 1) === a.charAt(j - 1)) {
|
|
159
|
+
currentRow[j] = prevRow[j - 1] ?? 0;
|
|
160
|
+
} else {
|
|
161
|
+
const substitution = (prevRow[j - 1] ?? 0) + 1;
|
|
162
|
+
const insertion = (currentRow[j - 1] ?? 0) + 1;
|
|
163
|
+
const deletion = (prevRow[j] ?? 0) + 1;
|
|
164
|
+
currentRow[j] = Math.min(substitution, insertion, deletion);
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const lastRow = matrix[b.length];
|
|
170
|
+
return lastRow?.[a.length] ?? Number.POSITIVE_INFINITY;
|
|
171
|
+
}
|
|
102
172
|
}
|
|
@@ -7,8 +7,6 @@ import type {
|
|
|
7
7
|
} from '../types.js';
|
|
8
8
|
import { RecordValidator } from '../validation/index.js';
|
|
9
9
|
|
|
10
|
-
import { FileUtils } from './file-utils.js';
|
|
11
|
-
|
|
12
10
|
/**
|
|
13
11
|
* Validate a single MassBank record file
|
|
14
12
|
* @param filePath - Path to the .txt file
|
|
@@ -28,6 +26,9 @@ export async function validate(
|
|
|
28
26
|
options.logger.info(`Validating file: ${filePath}`);
|
|
29
27
|
}
|
|
30
28
|
|
|
29
|
+
// Dynamic import to avoid pulling node:fs/promises into browser bundles
|
|
30
|
+
const { FileUtils } = await import('./file-utils.js');
|
|
31
|
+
|
|
31
32
|
// Read file
|
|
32
33
|
let fileContent: string;
|
|
33
34
|
try {
|