massbank 0.0.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +108 -5
- package/lib/index.d.ts +4 -3
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +4 -5
- package/lib/index.js.map +1 -1
- package/lib/parser/exceptions.d.ts +9 -0
- package/lib/parser/exceptions.d.ts.map +1 -0
- package/lib/parser/exceptions.js +17 -0
- package/lib/parser/exceptions.js.map +1 -0
- package/lib/parser/field-parsers.d.ts +53 -0
- package/lib/parser/field-parsers.d.ts.map +1 -0
- package/lib/parser/field-parsers.js +218 -0
- package/lib/parser/field-parsers.js.map +1 -0
- package/lib/parser/index.d.ts +5 -0
- package/lib/parser/index.d.ts.map +1 -0
- package/lib/parser/index.js +4 -0
- package/lib/parser/index.js.map +1 -0
- package/lib/parser/interfaces.d.ts +46 -0
- package/lib/parser/interfaces.d.ts.map +1 -0
- package/lib/parser/interfaces.js +2 -0
- package/lib/parser/interfaces.js.map +1 -0
- package/lib/parser/position-utils.d.ts +39 -0
- package/lib/parser/position-utils.d.ts.map +1 -0
- package/lib/parser/position-utils.js +102 -0
- package/lib/parser/position-utils.js.map +1 -0
- package/lib/parser/record-parser.d.ts +29 -0
- package/lib/parser/record-parser.d.ts.map +1 -0
- package/lib/parser/record-parser.js +104 -0
- package/lib/parser/record-parser.js.map +1 -0
- package/lib/parser/table-parsers.d.ts +27 -0
- package/lib/parser/table-parsers.d.ts.map +1 -0
- package/lib/parser/table-parsers.js +178 -0
- package/lib/parser/table-parsers.js.map +1 -0
- package/lib/record.d.ts +55 -0
- package/lib/record.d.ts.map +1 -0
- package/lib/record.js +5 -0
- package/lib/record.js.map +1 -0
- package/lib/serializer/index.d.ts +3 -0
- package/lib/serializer/index.d.ts.map +1 -0
- package/lib/serializer/index.js +2 -0
- package/lib/serializer/index.js.map +1 -0
- package/lib/serializer/interfaces.d.ts +14 -0
- package/lib/serializer/interfaces.d.ts.map +1 -0
- package/lib/serializer/interfaces.js +2 -0
- package/lib/serializer/interfaces.js.map +1 -0
- package/lib/serializer/record-serializer.d.ts +24 -0
- package/lib/serializer/record-serializer.d.ts.map +1 -0
- package/lib/serializer/record-serializer.js +176 -0
- package/lib/serializer/record-serializer.js.map +1 -0
- package/lib/splash/index.d.ts +3 -0
- package/lib/splash/index.d.ts.map +1 -0
- package/lib/splash/index.js +2 -0
- package/lib/splash/index.js.map +1 -0
- package/lib/splash/interfaces.d.ts +22 -0
- package/lib/splash/interfaces.d.ts.map +1 -0
- package/lib/splash/interfaces.js +2 -0
- package/lib/splash/interfaces.js.map +1 -0
- package/lib/splash/splash-validator.d.ts +31 -0
- package/lib/splash/splash-validator.d.ts.map +1 -0
- package/lib/splash/splash-validator.js +79 -0
- package/lib/splash/splash-validator.js.map +1 -0
- package/lib/types.d.ts +92 -0
- package/lib/types.d.ts.map +1 -0
- package/lib/types.js +2 -0
- package/lib/types.js.map +1 -0
- package/lib/validation/index.d.ts +4 -0
- package/lib/validation/index.d.ts.map +1 -0
- package/lib/validation/index.js +3 -0
- package/lib/validation/index.js.map +1 -0
- package/lib/validation/interfaces.d.ts +24 -0
- package/lib/validation/interfaces.d.ts.map +1 -0
- package/lib/validation/interfaces.js +2 -0
- package/lib/validation/interfaces.js.map +1 -0
- package/lib/validation/rules/accession-match-rule.d.ts +13 -0
- package/lib/validation/rules/accession-match-rule.d.ts.map +1 -0
- package/lib/validation/rules/accession-match-rule.js +40 -0
- package/lib/validation/rules/accession-match-rule.js.map +1 -0
- package/lib/validation/rules/index.d.ts +5 -0
- package/lib/validation/rules/index.d.ts.map +1 -0
- package/lib/validation/rules/index.js +5 -0
- package/lib/validation/rules/index.js.map +1 -0
- package/lib/validation/rules/non-standard-chars-rule.d.ts +13 -0
- package/lib/validation/rules/non-standard-chars-rule.d.ts.map +1 -0
- package/lib/validation/rules/non-standard-chars-rule.js +61 -0
- package/lib/validation/rules/non-standard-chars-rule.js.map +1 -0
- package/lib/validation/rules/serialization-rule.d.ts +14 -0
- package/lib/validation/rules/serialization-rule.d.ts.map +1 -0
- package/lib/validation/rules/serialization-rule.js +78 -0
- package/lib/validation/rules/serialization-rule.js.map +1 -0
- package/lib/validation/rules/unrecognized-field-rule.d.ts +25 -0
- package/lib/validation/rules/unrecognized-field-rule.d.ts.map +1 -0
- package/lib/validation/rules/unrecognized-field-rule.js +149 -0
- package/lib/validation/rules/unrecognized-field-rule.js.map +1 -0
- package/lib/validation/validator.d.ts +18 -0
- package/lib/validation/validator.d.ts.map +1 -0
- package/lib/validation/validator.js +30 -0
- package/lib/validation/validator.js.map +1 -0
- package/lib/validator/file-utils.d.ts +13 -0
- package/lib/validator/file-utils.d.ts.map +1 -0
- package/lib/validator/file-utils.js +24 -0
- package/lib/validator/file-utils.js.map +1 -0
- package/lib/validator/index.d.ts +2 -0
- package/lib/validator/index.d.ts.map +1 -0
- package/lib/validator/index.js +2 -0
- package/lib/validator/index.js.map +1 -0
- package/lib/validator/validator.d.ts +19 -0
- package/lib/validator/validator.d.ts.map +1 -0
- package/lib/validator/validator.js +159 -0
- package/lib/validator/validator.js.map +1 -0
- package/package.json +1 -1
- package/src/index.ts +13 -5
- package/src/parser/exceptions.ts +24 -0
- package/src/parser/field-parsers.ts +237 -0
- package/src/parser/index.ts +8 -0
- package/src/parser/interfaces.ts +56 -0
- package/src/parser/position-utils.ts +130 -0
- package/src/parser/record-parser.ts +155 -0
- package/src/parser/table-parsers.ts +217 -0
- package/src/record.ts +71 -0
- package/src/serializer/index.ts +6 -0
- package/src/serializer/interfaces.ts +14 -0
- package/src/serializer/record-serializer.ts +192 -0
- package/src/splash/index.ts +2 -0
- package/src/splash/interfaces.ts +20 -0
- package/src/splash/splash-validator.ts +95 -0
- package/src/types.ts +96 -0
- package/src/validation/index.ts +3 -0
- package/src/validation/interfaces.ts +36 -0
- package/src/validation/rules/accession-match-rule.ts +55 -0
- package/src/validation/rules/index.ts +4 -0
- package/src/validation/rules/non-standard-chars-rule.ts +86 -0
- package/src/validation/rules/serialization-rule.ts +101 -0
- package/src/validation/rules/unrecognized-field-rule.ts +172 -0
- package/src/validation/validator.ts +39 -0
- package/src/validator/file-utils.ts +25 -0
- package/src/validator/index.ts +1 -0
- package/src/validator/validator.ts +189 -0
- package/lib/isValid.d.ts +0 -12
- package/lib/isValid.d.ts.map +0 -1
- package/lib/isValid.js +0 -15
- package/lib/isValid.js.map +0 -1
- package/src/isValid.ts +0 -22
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import { ParseException, parseRecord } from '../parser/index.js';
|
|
2
|
+
import { RecordValidator } from '../validation/index.js';
|
|
3
|
+
/**
|
|
4
|
+
* Validate a single MassBank record file
|
|
5
|
+
* @param filePath - Path to the .txt file
|
|
6
|
+
* @param options - Validation options (legacy mode, logger)
|
|
7
|
+
* @returns ValidationResult with errors, warnings, and accession
|
|
8
|
+
*/
|
|
9
|
+
export async function validate(filePath, options = {}) {
|
|
10
|
+
const errors = [];
|
|
11
|
+
const warnings = [];
|
|
12
|
+
const accessions = [];
|
|
13
|
+
// Log if logger provided
|
|
14
|
+
if (options.logger) {
|
|
15
|
+
options.logger.info(`Validating file: ${filePath}`);
|
|
16
|
+
}
|
|
17
|
+
// Dynamic import to avoid pulling node:fs/promises into browser bundles
|
|
18
|
+
const { FileUtils } = await import('./file-utils.js');
|
|
19
|
+
// Read file
|
|
20
|
+
let fileContent;
|
|
21
|
+
try {
|
|
22
|
+
fileContent = await FileUtils.readFile(filePath);
|
|
23
|
+
}
|
|
24
|
+
catch (error) {
|
|
25
|
+
const message = error instanceof Error ? error.message : 'Unknown error';
|
|
26
|
+
return {
|
|
27
|
+
success: false,
|
|
28
|
+
errors: [
|
|
29
|
+
{
|
|
30
|
+
file: filePath,
|
|
31
|
+
message: `Failed to read file: ${message}`,
|
|
32
|
+
type: 'other',
|
|
33
|
+
},
|
|
34
|
+
],
|
|
35
|
+
warnings: [],
|
|
36
|
+
accessions: [],
|
|
37
|
+
filesProcessed: 0,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
// Parse the record
|
|
41
|
+
let record;
|
|
42
|
+
try {
|
|
43
|
+
record = parseRecord(fileContent);
|
|
44
|
+
}
|
|
45
|
+
catch (error) {
|
|
46
|
+
if (error instanceof ParseException) {
|
|
47
|
+
errors.push({
|
|
48
|
+
file: filePath,
|
|
49
|
+
line: error.parseError.line,
|
|
50
|
+
column: error.parseError.column,
|
|
51
|
+
message: error.parseError.message,
|
|
52
|
+
type: 'parse',
|
|
53
|
+
});
|
|
54
|
+
return {
|
|
55
|
+
success: false,
|
|
56
|
+
errors,
|
|
57
|
+
warnings,
|
|
58
|
+
accessions,
|
|
59
|
+
filesProcessed: 1,
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
throw error;
|
|
63
|
+
}
|
|
64
|
+
// Apply validation rules
|
|
65
|
+
const recordValidator = new RecordValidator();
|
|
66
|
+
const rules = recordValidator.getRules();
|
|
67
|
+
for (const rule of rules) {
|
|
68
|
+
const ruleErrors = rule.validate(record, fileContent, filePath, {
|
|
69
|
+
legacy: options.legacy,
|
|
70
|
+
});
|
|
71
|
+
errors.push(...ruleErrors);
|
|
72
|
+
const ruleWarnings = rule.getWarnings(record, fileContent, filePath, {
|
|
73
|
+
legacy: options.legacy,
|
|
74
|
+
});
|
|
75
|
+
warnings.push(...ruleWarnings);
|
|
76
|
+
}
|
|
77
|
+
if (record.ACCESSION) {
|
|
78
|
+
accessions.push(record.ACCESSION);
|
|
79
|
+
}
|
|
80
|
+
const success = errors.length === 0;
|
|
81
|
+
if (options.logger) {
|
|
82
|
+
if (success) {
|
|
83
|
+
options.logger.info(`✓ Validation passed for ${filePath}`);
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
options.logger.error(`✗ Validation failed for ${filePath} with ${errors.length} error(s)`);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return {
|
|
90
|
+
success,
|
|
91
|
+
errors,
|
|
92
|
+
warnings,
|
|
93
|
+
accessions,
|
|
94
|
+
filesProcessed: 1,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Validate in-memory MassBank record content
|
|
99
|
+
* Useful for browser or API use-cases where the record content is not
|
|
100
|
+
* coming from the filesystem.
|
|
101
|
+
* @param content - Full record text to validate
|
|
102
|
+
* @param filename - Logical filename for error reporting (e.g. 'user-upload.txt')
|
|
103
|
+
* @param options - Validation options (legacy mode, logger)
|
|
104
|
+
* @returns ValidationResult with errors, warnings, and accession
|
|
105
|
+
*/
|
|
106
|
+
export async function validateContent(content, filename, options = {}) {
|
|
107
|
+
const errors = [];
|
|
108
|
+
const warnings = [];
|
|
109
|
+
const accessions = [];
|
|
110
|
+
// Parse the record
|
|
111
|
+
let record;
|
|
112
|
+
try {
|
|
113
|
+
record = parseRecord(content);
|
|
114
|
+
}
|
|
115
|
+
catch (error) {
|
|
116
|
+
if (error instanceof ParseException) {
|
|
117
|
+
errors.push({
|
|
118
|
+
file: filename,
|
|
119
|
+
line: error.parseError.line,
|
|
120
|
+
column: error.parseError.column,
|
|
121
|
+
message: error.parseError.message,
|
|
122
|
+
type: 'parse',
|
|
123
|
+
});
|
|
124
|
+
return {
|
|
125
|
+
success: false,
|
|
126
|
+
errors,
|
|
127
|
+
warnings,
|
|
128
|
+
accessions,
|
|
129
|
+
filesProcessed: 1,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
throw error;
|
|
133
|
+
}
|
|
134
|
+
// Apply validation rules
|
|
135
|
+
const recordValidator = new RecordValidator();
|
|
136
|
+
const rules = recordValidator.getRules();
|
|
137
|
+
for (const rule of rules) {
|
|
138
|
+
const ruleErrors = rule.validate(record, content, filename, {
|
|
139
|
+
legacy: options.legacy,
|
|
140
|
+
});
|
|
141
|
+
errors.push(...ruleErrors);
|
|
142
|
+
const ruleWarnings = rule.getWarnings(record, content, filename, {
|
|
143
|
+
legacy: options.legacy,
|
|
144
|
+
});
|
|
145
|
+
warnings.push(...ruleWarnings);
|
|
146
|
+
}
|
|
147
|
+
if (record.ACCESSION) {
|
|
148
|
+
accessions.push(record.ACCESSION);
|
|
149
|
+
}
|
|
150
|
+
const success = errors.length === 0;
|
|
151
|
+
return {
|
|
152
|
+
success,
|
|
153
|
+
errors,
|
|
154
|
+
warnings,
|
|
155
|
+
accessions,
|
|
156
|
+
filesProcessed: 1,
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
//# sourceMappingURL=validator.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"validator.js","sourceRoot":"","sources":["../../src/validator/validator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAOjE,OAAO,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAC;AAEzD;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,QAAQ,CAC5B,QAAgB,EAChB,UAA6B,EAAE;IAE/B,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,MAAM,QAAQ,GAAwB,EAAE,CAAC;IACzC,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,yBAAyB;IACzB,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,oBAAoB,QAAQ,EAAE,CAAC,CAAC;IACtD,CAAC;IAED,wEAAwE;IACxE,MAAM,EAAE,SAAS,EAAE,GAAG,MAAM,MAAM,CAAC,iBAAiB,CAAC,CAAC;IAEtD,YAAY;IACZ,IAAI,WAAmB,CAAC;IACxB,IAAI,CAAC;QACH,WAAW,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACnD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;QACzE,OAAO;YACL,OAAO,EAAE,KAAK;YACd,MAAM,EAAE;gBACN;oBACE,IAAI,EAAE,QAAQ;oBACd,OAAO,EAAE,wBAAwB,OAAO,EAAE;oBAC1C,IAAI,EAAE,OAAO;iBACd;aACF;YACD,QAAQ,EAAE,EAAE;YACZ,UAAU,EAAE,EAAE;YACd,cAAc,EAAE,CAAC;SAClB,CAAC;IACJ,CAAC;IAED,mBAAmB;IACnB,IAAI,MAAM,CAAC;IACX,IAAI,CAAC;QACH,MAAM,GAAG,WAAW,CAAC,WAAW,CAAC,CAAC;IACpC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,cAAc,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,KAAK,CAAC,UAAU,CAAC,IAAI;gBAC3B,MAAM,EAAE,KAAK,CAAC,UAAU,CAAC,MAAM;gBAC/B,OAAO,EAAE,KAAK,CAAC,UAAU,CAAC,OAAO;gBACjC,IAAI,EAAE,OAAO;aACd,CAAC,CAAC;YACH,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,MAAM;gBACN,QAAQ;gBACR,UAAU;gBACV,cAAc,EAAE,CAAC;aAClB,CAAC;QACJ,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;IAED,yBAAyB;IACzB,MAAM,eAAe,GAAG,IAAI,eAAe,EAAE,CAAC;IAC9C,MAAM,KAAK,GAAG,eAAe,CAAC,QAAQ,EAAE,CAAC;IAEzC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE;YAC9D,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,MAAM,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;QAE3B,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE;YACnE,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,QAAQ,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;IACjC,CAAC;IAED,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC;IAEpC,IAAI,OAAO,CAAC,MAAM,EAAE,CAAC;QACnB,IAAI,OAAO,EAAE,CAAC;YACZ,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,2BAA2B,QAAQ,EAAE,CAAC,CAAC;QAC7D,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,MAAM,CAAC,KAAK,CAClB,2BAA2B,QAAQ,SAAS,MAAM,CAAC,MAAM,WAAW,CACrE,CAAC;QACJ,CAAC;IACH,CAAC;IAED,OAAO;QACL,OAAO;QACP,MAAM;QACN,QAAQ;QACR,UAAU;QACV,cAAc,EAAE,CAAC;KAClB,CAAC;AACJ,CAAC;AAED;;;;;;;;GAQG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CACnC,OAAe,EACf,QAAgB,EAChB,UAA6B,EAAE;IAE/B,MAAM,MAAM,GAAsB,EAAE,CAAC;IACrC,MAAM,QAAQ,GAAwB,EAAE,CAAC;IACzC,MAAM,UAAU,GAAa,EAAE,CAAC;IAEhC,mBAAmB;IACnB,IAAI,MAAM,CAAC;IACX,IAAI,CAAC;QACH,MAAM,GAAG,WAAW,CAAC,OAAO,CAAC,CAAC;IAChC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,IAAI,KAAK,YAAY,cAAc,EAAE,CAAC;YACpC,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,QAAQ;gBACd,IAAI,EAAE,KAAK,CAAC,UAAU,CAAC,IAAI;gBAC3B,MAAM,EAAE,KAAK,CAAC,UAAU,CAAC,MAAM;gBAC/B,OAAO,EAAE,KAAK,CAAC,UAAU,CAAC,OAAO;gBACjC,IAAI,EAAE,OAAO;aACd,CAAC,CAAC;YACH,OAAO;gBACL,OAAO,EAAE,KAAK;gBACd,MAAM;gBACN,QAAQ;gBACR,UAAU;gBACV,cAAc,EAAE,CAAC;aAClB,CAAC;QACJ,CAAC;QACD,MAAM,KAAK,CAAC;IACd,CAAC;IAED,yBAAyB;IACzB,MAAM,eAAe,GAAG,IAAI,eAAe,EAAE,CAAC;IAC9C,MAAM,KAAK,GAAG,eAAe,CAAC,QAAQ,EAAE,CAAC;IAEzC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,UAAU,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE;YAC1D,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,MAAM,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;QAE3B,MAAM,YAAY,GAAG,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE;YAC/D,MAAM,EAAE,OAAO,CAAC,MAAM;SACvB,CAAC,CAAC;QACH,QAAQ,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;IACjC,CAAC;IAED,IAAI,MAAM,CAAC,SAAS,EAAE,CAAC;QACrB,UAAU,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,KAAK,CAAC,CAAC;IAEpC,OAAO;QACL,OAAO;QACP,MAAM;QACN,QAAQ;QACR,UAAU;QACV,cAAc,EAAE,CAAC;KAClB,CAAC;AACJ,CAAC"}
|
package/package.json
CHANGED
package/src/index.ts
CHANGED
|
@@ -1,7 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
3
|
-
*
|
|
2
|
+
* MassBank validation library
|
|
3
|
+
* Main entry point for the package
|
|
4
4
|
*/
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
}
|
|
5
|
+
|
|
6
|
+
// Validation functions
|
|
7
|
+
export { validate, validateContent } from './validator/index.js';
|
|
8
|
+
|
|
9
|
+
// Types for reading validation results
|
|
10
|
+
export type {
|
|
11
|
+
ValidationError,
|
|
12
|
+
ValidationOptions,
|
|
13
|
+
ValidationResult,
|
|
14
|
+
ValidationWarning,
|
|
15
|
+
} from './types.js';
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { ParseError } from '../types.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Exception thrown when parsing fails
|
|
5
|
+
*/
|
|
6
|
+
export class ParseException extends Error {
|
|
7
|
+
readonly parseError: ParseError;
|
|
8
|
+
constructor(parseError: ParseError, message?: string) {
|
|
9
|
+
super(message || parseError.message);
|
|
10
|
+
this.parseError = parseError;
|
|
11
|
+
this.name = 'ParseException';
|
|
12
|
+
// Maintains proper stack trace for where our error was thrown (only available on V8)
|
|
13
|
+
const ErrorConstructor = Error as typeof Error & {
|
|
14
|
+
captureStackTrace?: (
|
|
15
|
+
error: Error,
|
|
16
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
17
|
+
constructor: new (...args: any[]) => Error,
|
|
18
|
+
) => void;
|
|
19
|
+
};
|
|
20
|
+
if (typeof ErrorConstructor.captureStackTrace === 'function') {
|
|
21
|
+
ErrorConstructor.captureStackTrace(this, ParseException);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
}
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import type { Record } from '../record.js';
|
|
2
|
+
|
|
3
|
+
import type { IFieldParser } from './interfaces.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Base class for field parsers
|
|
7
|
+
*/
|
|
8
|
+
abstract class BaseFieldParser implements IFieldParser {
|
|
9
|
+
abstract canParse(key: string): boolean;
|
|
10
|
+
abstract parse(key: string, value: string, record: Record): void;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Header field parser
|
|
15
|
+
*/
|
|
16
|
+
export class HeaderFieldParser extends BaseFieldParser {
|
|
17
|
+
canParse(key: string): boolean {
|
|
18
|
+
const headerFields = [
|
|
19
|
+
'ACCESSION',
|
|
20
|
+
'RECORD_TITLE',
|
|
21
|
+
'DATE',
|
|
22
|
+
'AUTHORS',
|
|
23
|
+
'LICENSE',
|
|
24
|
+
'COPYRIGHT',
|
|
25
|
+
'PUBLICATION',
|
|
26
|
+
'PROJECT',
|
|
27
|
+
'COMMENT',
|
|
28
|
+
'DEPRECATED',
|
|
29
|
+
];
|
|
30
|
+
return headerFields.includes(key);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
parse(key: string, value: string, record: Record): void {
|
|
34
|
+
switch (key) {
|
|
35
|
+
case 'ACCESSION':
|
|
36
|
+
record.ACCESSION = value;
|
|
37
|
+
break;
|
|
38
|
+
case 'RECORD_TITLE':
|
|
39
|
+
record.RECORD_TITLE = value;
|
|
40
|
+
break;
|
|
41
|
+
case 'DATE':
|
|
42
|
+
record.DATE = value;
|
|
43
|
+
break;
|
|
44
|
+
case 'AUTHORS':
|
|
45
|
+
record.AUTHORS = value;
|
|
46
|
+
break;
|
|
47
|
+
case 'LICENSE':
|
|
48
|
+
record.LICENSE = value;
|
|
49
|
+
break;
|
|
50
|
+
case 'COPYRIGHT':
|
|
51
|
+
record.COPYRIGHT = value;
|
|
52
|
+
break;
|
|
53
|
+
case 'PUBLICATION':
|
|
54
|
+
record.PUBLICATION = value;
|
|
55
|
+
break;
|
|
56
|
+
case 'PROJECT':
|
|
57
|
+
record.PROJECT = value;
|
|
58
|
+
break;
|
|
59
|
+
case 'COMMENT':
|
|
60
|
+
if (!record.COMMENT) {
|
|
61
|
+
record.COMMENT = [];
|
|
62
|
+
}
|
|
63
|
+
record.COMMENT.push(value);
|
|
64
|
+
break;
|
|
65
|
+
case 'DEPRECATED':
|
|
66
|
+
record.DEPRECATED = value;
|
|
67
|
+
break;
|
|
68
|
+
default:
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Compound (CH$) field parser
|
|
76
|
+
*/
|
|
77
|
+
export class CompoundFieldParser extends BaseFieldParser {
|
|
78
|
+
canParse(key: string): boolean {
|
|
79
|
+
return key.startsWith('CH$');
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
parse(key: string, value: string, record: Record): void {
|
|
83
|
+
switch (key) {
|
|
84
|
+
case 'CH$NAME':
|
|
85
|
+
if (!record.CH$NAME) {
|
|
86
|
+
record.CH$NAME = [];
|
|
87
|
+
}
|
|
88
|
+
record.CH$NAME.push(value);
|
|
89
|
+
break;
|
|
90
|
+
case 'CH$COMPOUND_CLASS':
|
|
91
|
+
record.CH$COMPOUND_CLASS = value;
|
|
92
|
+
break;
|
|
93
|
+
case 'CH$FORMULA':
|
|
94
|
+
record.CH$FORMULA = value;
|
|
95
|
+
break;
|
|
96
|
+
case 'CH$EXACT_MASS':
|
|
97
|
+
record.CH$EXACT_MASS = value;
|
|
98
|
+
break;
|
|
99
|
+
case 'CH$SMILES':
|
|
100
|
+
record.CH$SMILES = value;
|
|
101
|
+
break;
|
|
102
|
+
case 'CH$IUPAC':
|
|
103
|
+
record.CH$IUPAC = value;
|
|
104
|
+
break;
|
|
105
|
+
case 'CH$LINK':
|
|
106
|
+
if (!record.CH$LINK) {
|
|
107
|
+
record.CH$LINK = [];
|
|
108
|
+
}
|
|
109
|
+
record.CH$LINK.push(value);
|
|
110
|
+
break;
|
|
111
|
+
default:
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Analytical conditions (AC$) field parser
|
|
119
|
+
*/
|
|
120
|
+
export class AnalyticalConditionsFieldParser extends BaseFieldParser {
|
|
121
|
+
canParse(key: string): boolean {
|
|
122
|
+
return key.startsWith('AC$');
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
parse(key: string, value: string, record: Record): void {
|
|
126
|
+
switch (key) {
|
|
127
|
+
case 'AC$INSTRUMENT':
|
|
128
|
+
record.AC$INSTRUMENT = value;
|
|
129
|
+
break;
|
|
130
|
+
case 'AC$INSTRUMENT_TYPE':
|
|
131
|
+
record.AC$INSTRUMENT_TYPE = value;
|
|
132
|
+
break;
|
|
133
|
+
case 'AC$MASS_SPECTROMETRY':
|
|
134
|
+
if (!record.AC$MASS_SPECTROMETRY) {
|
|
135
|
+
record.AC$MASS_SPECTROMETRY = [];
|
|
136
|
+
}
|
|
137
|
+
record.AC$MASS_SPECTROMETRY.push(value);
|
|
138
|
+
break;
|
|
139
|
+
case 'AC$CHROMATOGRAPHY':
|
|
140
|
+
if (!record.AC$CHROMATOGRAPHY) {
|
|
141
|
+
record.AC$CHROMATOGRAPHY = [];
|
|
142
|
+
}
|
|
143
|
+
record.AC$CHROMATOGRAPHY.push(value);
|
|
144
|
+
break;
|
|
145
|
+
default:
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Mass spectrometry (MS$) field parser
|
|
153
|
+
*/
|
|
154
|
+
export class MassSpectrometryFieldParser extends BaseFieldParser {
|
|
155
|
+
canParse(key: string): boolean {
|
|
156
|
+
return key.startsWith('MS$');
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
parse(key: string, value: string, record: Record): void {
|
|
160
|
+
switch (key) {
|
|
161
|
+
case 'MS$FOCUSED_ION':
|
|
162
|
+
if (!record.MS$FOCUSED_ION) {
|
|
163
|
+
record.MS$FOCUSED_ION = [];
|
|
164
|
+
}
|
|
165
|
+
record.MS$FOCUSED_ION.push(value);
|
|
166
|
+
break;
|
|
167
|
+
case 'MS$DATA_PROCESSING':
|
|
168
|
+
if (!record.MS$DATA_PROCESSING) {
|
|
169
|
+
record.MS$DATA_PROCESSING = [];
|
|
170
|
+
}
|
|
171
|
+
record.MS$DATA_PROCESSING.push(value);
|
|
172
|
+
break;
|
|
173
|
+
default:
|
|
174
|
+
break;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Peak (PK$) field parser (non-table fields)
|
|
181
|
+
*/
|
|
182
|
+
export class PeakFieldParser extends BaseFieldParser {
|
|
183
|
+
canParse(key: string): boolean {
|
|
184
|
+
return (
|
|
185
|
+
key.startsWith('PK$') && key !== 'PK$PEAK' && key !== 'PK$ANNOTATION'
|
|
186
|
+
);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
parse(key: string, value: string, record: Record): void {
|
|
190
|
+
switch (key) {
|
|
191
|
+
case 'PK$SPLASH':
|
|
192
|
+
record.PK$SPLASH = value;
|
|
193
|
+
break;
|
|
194
|
+
case 'PK$NUM_PEAK': {
|
|
195
|
+
const numPeak = Number.parseInt(value, 10);
|
|
196
|
+
if (Number.isNaN(numPeak)) {
|
|
197
|
+
throw new Error(`Invalid PK$NUM_PEAK value: ${value}`);
|
|
198
|
+
}
|
|
199
|
+
record.PK$NUM_PEAK = numPeak;
|
|
200
|
+
break;
|
|
201
|
+
}
|
|
202
|
+
default:
|
|
203
|
+
break;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Species (SP$) field parser
|
|
210
|
+
*/
|
|
211
|
+
export class SpeciesFieldParser extends BaseFieldParser {
|
|
212
|
+
canParse(key: string): boolean {
|
|
213
|
+
return key.startsWith('SP$');
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
parse(key: string, value: string, record: Record): void {
|
|
217
|
+
switch (key) {
|
|
218
|
+
case 'SP$SCIENTIFIC_NAME':
|
|
219
|
+
record.SP$SCIENTIFIC_NAME = value;
|
|
220
|
+
break;
|
|
221
|
+
case 'SP$LINEAGE':
|
|
222
|
+
record.SP$LINEAGE = value;
|
|
223
|
+
break;
|
|
224
|
+
case 'SP$LINK':
|
|
225
|
+
if (!record.SP$LINK) {
|
|
226
|
+
record.SP$LINK = [];
|
|
227
|
+
}
|
|
228
|
+
record.SP$LINK.push(value);
|
|
229
|
+
break;
|
|
230
|
+
case 'SP$SAMPLE':
|
|
231
|
+
record.SP$SAMPLE = value;
|
|
232
|
+
break;
|
|
233
|
+
default:
|
|
234
|
+
break;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export { RecordParser, createParser, parseRecord } from './record-parser.js';
|
|
2
|
+
export { ParseException } from './exceptions.js';
|
|
3
|
+
export type {
|
|
4
|
+
IFieldParser,
|
|
5
|
+
IRecordParser,
|
|
6
|
+
ITableParser,
|
|
7
|
+
} from './interfaces.js';
|
|
8
|
+
export { PositionUtils } from './position-utils.js';
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type { Record } from '../record.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Interface for parsing MassBank records
|
|
5
|
+
*/
|
|
6
|
+
export interface IRecordParser {
|
|
7
|
+
/**
|
|
8
|
+
* Parse a MassBank record string into a Record object
|
|
9
|
+
* @param text - The MassBank record text
|
|
10
|
+
* @returns The parsed Record object
|
|
11
|
+
* @throws {import('./exceptions.js').ParseException} if parsing fails
|
|
12
|
+
*/
|
|
13
|
+
parse(text: string): Record;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Interface for parsing field values
|
|
18
|
+
*/
|
|
19
|
+
export interface IFieldParser {
|
|
20
|
+
/**
|
|
21
|
+
* Check if this parser can handle the given key
|
|
22
|
+
*/
|
|
23
|
+
canParse(key: string): boolean;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Parse the field value
|
|
27
|
+
*/
|
|
28
|
+
parse(key: string, value: string, record: Record): void;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Interface for parsing table data (peaks, annotations)
|
|
33
|
+
*/
|
|
34
|
+
export interface ITableParser {
|
|
35
|
+
/**
|
|
36
|
+
* Check if this parser can handle the given key
|
|
37
|
+
*/
|
|
38
|
+
canParse(key: string): boolean;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Parse table data starting from the given line index
|
|
42
|
+
* @param key - The field key
|
|
43
|
+
* @param lines - All lines
|
|
44
|
+
* @param startIndex - Index of first data line (header is at startIndex - 1)
|
|
45
|
+
* @param record - Record to populate
|
|
46
|
+
* @param headerLine - The full header line (e.g., "PK$ANNOTATION: m/z ion")
|
|
47
|
+
* @returns The number of lines consumed
|
|
48
|
+
*/
|
|
49
|
+
parse(
|
|
50
|
+
key: string,
|
|
51
|
+
lines: string[],
|
|
52
|
+
startIndex: number,
|
|
53
|
+
record: Record,
|
|
54
|
+
headerLine?: string,
|
|
55
|
+
): number;
|
|
56
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import type { ParseError } from '../types.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Utility functions for position calculations
|
|
5
|
+
* Handle position/line/column calculations
|
|
6
|
+
*/
|
|
7
|
+
export const PositionUtils = {
|
|
8
|
+
/**
|
|
9
|
+
* Get character position from line and column (both 1-based).
|
|
10
|
+
* Converts 1-based line/column to a 0-based character offset in the text.
|
|
11
|
+
* @param text - The full text being parsed (used to determine actual newline lengths)
|
|
12
|
+
* @param lineIndex - Line number (1-based)
|
|
13
|
+
* @param column - Column number (1-based)
|
|
14
|
+
* @returns 0-based character position in the original text
|
|
15
|
+
*/
|
|
16
|
+
getPosition(text: string, lineIndex: number, column: number): number {
|
|
17
|
+
// Convert 1-based line/column to 0-based for internal calculation
|
|
18
|
+
const zeroBasedLine = lineIndex - 1;
|
|
19
|
+
const zeroBasedColumn = column - 1;
|
|
20
|
+
|
|
21
|
+
// Match lines with their actual newline separators to get correct lengths
|
|
22
|
+
const lineRegex = /^(?<content>.*?)(?<newline>\r\n|\r|\n|$)/gm;
|
|
23
|
+
let match: RegExpExecArray | null;
|
|
24
|
+
let offset = 0;
|
|
25
|
+
let currentLine = 0;
|
|
26
|
+
|
|
27
|
+
while ((match = lineRegex.exec(text)) !== null) {
|
|
28
|
+
if (currentLine === zeroBasedLine) {
|
|
29
|
+
// Found the target line, add the column offset
|
|
30
|
+
return offset + zeroBasedColumn;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const lineContent = match.groups?.content ?? '';
|
|
34
|
+
const newlineSeparator = match.groups?.newline ?? '';
|
|
35
|
+
|
|
36
|
+
// Move offset past the line content and its newline separator
|
|
37
|
+
offset += lineContent.length + newlineSeparator.length;
|
|
38
|
+
currentLine++;
|
|
39
|
+
|
|
40
|
+
// If we've processed all text, break to avoid infinite loop
|
|
41
|
+
if (
|
|
42
|
+
newlineSeparator === '' ||
|
|
43
|
+
match.index + match[0].length >= text.length
|
|
44
|
+
) {
|
|
45
|
+
break;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// If we didn't find the line, return the current offset plus column
|
|
50
|
+
return offset + zeroBasedColumn;
|
|
51
|
+
},
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Create a ParseError from a 0-based character position.
|
|
55
|
+
* Converts the position to 1-based line and column numbers.
|
|
56
|
+
* Clamps position to valid range [0, text.length] to ensure correct line/column calculation.
|
|
57
|
+
* @param text - The full text being parsed
|
|
58
|
+
* @param position - 0-based character offset in the text
|
|
59
|
+
* @param message - Error message describing the problem
|
|
60
|
+
* @returns ParseError with 1-based line and column
|
|
61
|
+
*/
|
|
62
|
+
createParseError(
|
|
63
|
+
text: string,
|
|
64
|
+
position: number,
|
|
65
|
+
message: string,
|
|
66
|
+
): ParseError {
|
|
67
|
+
// Clamp position to valid range to prevent invalid line/column calculation
|
|
68
|
+
const clampedPosition = Math.max(0, Math.min(position, text.length));
|
|
69
|
+
|
|
70
|
+
// Delegate to getLineColumn to avoid duplicating logic
|
|
71
|
+
const { line, column } = this.getLineColumn(text, clampedPosition);
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
position: clampedPosition,
|
|
75
|
+
message,
|
|
76
|
+
line,
|
|
77
|
+
column,
|
|
78
|
+
};
|
|
79
|
+
},
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Get line and column from a 0-based character position.
|
|
83
|
+
* Converts the position to 1-based line and column numbers.
|
|
84
|
+
* Clamps position to valid range [0, text.length] and correctly handles the last line.
|
|
85
|
+
* @param text - The full text being parsed
|
|
86
|
+
* @param position - 0-based character offset in the text
|
|
87
|
+
* @returns Object with 1-based line and column
|
|
88
|
+
*/
|
|
89
|
+
getLineColumn(
|
|
90
|
+
text: string,
|
|
91
|
+
position: number,
|
|
92
|
+
): { line: number; column: number } {
|
|
93
|
+
// Clamp position to valid range [0, text.length]
|
|
94
|
+
const clampedPosition = Math.max(0, Math.min(position, text.length));
|
|
95
|
+
|
|
96
|
+
// Match lines with their actual newline separators to get correct lengths
|
|
97
|
+
// This regex captures line content followed by the newline (CRLF, LF, or CR)
|
|
98
|
+
const lineRegex = /^(?<content>.*?)(?<newline>\r\n|\r|\n|$)/gm;
|
|
99
|
+
let match: RegExpExecArray | null;
|
|
100
|
+
let offset = 0;
|
|
101
|
+
let lineNumber = 1;
|
|
102
|
+
|
|
103
|
+
while ((match = lineRegex.exec(text)) !== null) {
|
|
104
|
+
const lineContent = match.groups?.content ?? '';
|
|
105
|
+
const newlineSeparator = match.groups?.newline ?? '';
|
|
106
|
+
const lineEndOffset = offset + lineContent.length;
|
|
107
|
+
|
|
108
|
+
// Check if the position is within this line's content
|
|
109
|
+
if (clampedPosition <= lineEndOffset) {
|
|
110
|
+
const column = clampedPosition - offset + 1; // 1-based
|
|
111
|
+
return { line: lineNumber, column };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Move offset past the line content and its newline separator
|
|
115
|
+
offset = lineEndOffset + newlineSeparator.length;
|
|
116
|
+
lineNumber++;
|
|
117
|
+
|
|
118
|
+
// If we've processed all text, break to avoid infinite loop
|
|
119
|
+
if (
|
|
120
|
+
newlineSeparator === '' ||
|
|
121
|
+
match.index + match[0].length >= text.length
|
|
122
|
+
) {
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Fallback for empty text or edge cases: return line 1, column 1
|
|
128
|
+
return { line: 1, column: 1 };
|
|
129
|
+
},
|
|
130
|
+
};
|