massbank 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +108 -5
- package/lib/index.d.ts +4 -3
- package/lib/index.d.ts.map +1 -1
- package/lib/index.js +4 -5
- package/lib/index.js.map +1 -1
- package/lib/parser/exceptions.d.ts +9 -0
- package/lib/parser/exceptions.d.ts.map +1 -0
- package/lib/parser/exceptions.js +17 -0
- package/lib/parser/exceptions.js.map +1 -0
- package/lib/parser/field-parsers.d.ts +53 -0
- package/lib/parser/field-parsers.d.ts.map +1 -0
- package/lib/parser/field-parsers.js +218 -0
- package/lib/parser/field-parsers.js.map +1 -0
- package/lib/parser/index.d.ts +5 -0
- package/lib/parser/index.d.ts.map +1 -0
- package/lib/parser/index.js +4 -0
- package/lib/parser/index.js.map +1 -0
- package/lib/parser/interfaces.d.ts +46 -0
- package/lib/parser/interfaces.d.ts.map +1 -0
- package/lib/parser/interfaces.js +2 -0
- package/lib/parser/interfaces.js.map +1 -0
- package/lib/parser/position-utils.d.ts +39 -0
- package/lib/parser/position-utils.d.ts.map +1 -0
- package/lib/parser/position-utils.js +102 -0
- package/lib/parser/position-utils.js.map +1 -0
- package/lib/parser/record-parser.d.ts +29 -0
- package/lib/parser/record-parser.d.ts.map +1 -0
- package/lib/parser/record-parser.js +104 -0
- package/lib/parser/record-parser.js.map +1 -0
- package/lib/parser/table-parsers.d.ts +27 -0
- package/lib/parser/table-parsers.d.ts.map +1 -0
- package/lib/parser/table-parsers.js +178 -0
- package/lib/parser/table-parsers.js.map +1 -0
- package/lib/record.d.ts +55 -0
- package/lib/record.d.ts.map +1 -0
- package/lib/record.js +5 -0
- package/lib/record.js.map +1 -0
- package/lib/serializer/index.d.ts +3 -0
- package/lib/serializer/index.d.ts.map +1 -0
- package/lib/serializer/index.js +2 -0
- package/lib/serializer/index.js.map +1 -0
- package/lib/serializer/interfaces.d.ts +14 -0
- package/lib/serializer/interfaces.d.ts.map +1 -0
- package/lib/serializer/interfaces.js +2 -0
- package/lib/serializer/interfaces.js.map +1 -0
- package/lib/serializer/record-serializer.d.ts +24 -0
- package/lib/serializer/record-serializer.d.ts.map +1 -0
- package/lib/serializer/record-serializer.js +176 -0
- package/lib/serializer/record-serializer.js.map +1 -0
- package/lib/splash/index.d.ts +3 -0
- package/lib/splash/index.d.ts.map +1 -0
- package/lib/splash/index.js +2 -0
- package/lib/splash/index.js.map +1 -0
- package/lib/splash/interfaces.d.ts +22 -0
- package/lib/splash/interfaces.d.ts.map +1 -0
- package/lib/splash/interfaces.js +2 -0
- package/lib/splash/interfaces.js.map +1 -0
- package/lib/splash/splash-validator.d.ts +31 -0
- package/lib/splash/splash-validator.d.ts.map +1 -0
- package/lib/splash/splash-validator.js +79 -0
- package/lib/splash/splash-validator.js.map +1 -0
- package/lib/types.d.ts +92 -0
- package/lib/types.d.ts.map +1 -0
- package/lib/types.js +2 -0
- package/lib/types.js.map +1 -0
- package/lib/validation/index.d.ts +4 -0
- package/lib/validation/index.d.ts.map +1 -0
- package/lib/validation/index.js +3 -0
- package/lib/validation/index.js.map +1 -0
- package/lib/validation/interfaces.d.ts +24 -0
- package/lib/validation/interfaces.d.ts.map +1 -0
- package/lib/validation/interfaces.js +2 -0
- package/lib/validation/interfaces.js.map +1 -0
- package/lib/validation/rules/accession-match-rule.d.ts +13 -0
- package/lib/validation/rules/accession-match-rule.d.ts.map +1 -0
- package/lib/validation/rules/accession-match-rule.js +27 -0
- package/lib/validation/rules/accession-match-rule.js.map +1 -0
- package/lib/validation/rules/index.d.ts +5 -0
- package/lib/validation/rules/index.d.ts.map +1 -0
- package/lib/validation/rules/index.js +5 -0
- package/lib/validation/rules/index.js.map +1 -0
- package/lib/validation/rules/non-standard-chars-rule.d.ts +13 -0
- package/lib/validation/rules/non-standard-chars-rule.d.ts.map +1 -0
- package/lib/validation/rules/non-standard-chars-rule.js +45 -0
- package/lib/validation/rules/non-standard-chars-rule.js.map +1 -0
- package/lib/validation/rules/serialization-rule.d.ts +14 -0
- package/lib/validation/rules/serialization-rule.d.ts.map +1 -0
- package/lib/validation/rules/serialization-rule.js +55 -0
- package/lib/validation/rules/serialization-rule.js.map +1 -0
- package/lib/validation/rules/unrecognized-field-rule.d.ts +16 -0
- package/lib/validation/rules/unrecognized-field-rule.d.ts.map +1 -0
- package/lib/validation/rules/unrecognized-field-rule.js +87 -0
- package/lib/validation/rules/unrecognized-field-rule.js.map +1 -0
- package/lib/validation/validator.d.ts +18 -0
- package/lib/validation/validator.d.ts.map +1 -0
- package/lib/validation/validator.js +30 -0
- package/lib/validation/validator.js.map +1 -0
- package/lib/validator/file-utils.d.ts +13 -0
- package/lib/validator/file-utils.d.ts.map +1 -0
- package/lib/validator/file-utils.js +24 -0
- package/lib/validator/file-utils.js.map +1 -0
- package/lib/validator/index.d.ts +2 -0
- package/lib/validator/index.d.ts.map +1 -0
- package/lib/validator/index.js +2 -0
- package/lib/validator/index.js.map +1 -0
- package/lib/validator/validator.d.ts +19 -0
- package/lib/validator/validator.d.ts.map +1 -0
- package/lib/validator/validator.js +158 -0
- package/lib/validator/validator.js.map +1 -0
- package/package.json +1 -1
- package/src/index.ts +13 -5
- package/src/parser/exceptions.ts +24 -0
- package/src/parser/field-parsers.ts +237 -0
- package/src/parser/index.ts +8 -0
- package/src/parser/interfaces.ts +56 -0
- package/src/parser/position-utils.ts +130 -0
- package/src/parser/record-parser.ts +155 -0
- package/src/parser/table-parsers.ts +217 -0
- package/src/record.ts +71 -0
- package/src/serializer/index.ts +6 -0
- package/src/serializer/interfaces.ts +14 -0
- package/src/serializer/record-serializer.ts +192 -0
- package/src/splash/index.ts +2 -0
- package/src/splash/interfaces.ts +20 -0
- package/src/splash/splash-validator.ts +95 -0
- package/src/types.ts +96 -0
- package/src/validation/index.ts +3 -0
- package/src/validation/interfaces.ts +36 -0
- package/src/validation/rules/accession-match-rule.ts +38 -0
- package/src/validation/rules/index.ts +4 -0
- package/src/validation/rules/non-standard-chars-rule.ts +67 -0
- package/src/validation/rules/serialization-rule.ts +74 -0
- package/src/validation/rules/unrecognized-field-rule.ts +102 -0
- package/src/validation/validator.ts +39 -0
- package/src/validator/file-utils.ts +25 -0
- package/src/validator/index.ts +1 -0
- package/src/validator/validator.ts +188 -0
- package/lib/isValid.d.ts +0 -12
- package/lib/isValid.d.ts.map +0 -1
- package/lib/isValid.js +0 -15
- package/lib/isValid.js.map +0 -1
- package/src/isValid.ts +0 -22
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
import type { Record } from '../record.js';
|
|
2
|
+
|
|
3
|
+
import type { IFieldParser } from './interfaces.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Base class for field parsers
|
|
7
|
+
*/
|
|
8
|
+
abstract class BaseFieldParser implements IFieldParser {
|
|
9
|
+
abstract canParse(key: string): boolean;
|
|
10
|
+
abstract parse(key: string, value: string, record: Record): void;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Header field parser
|
|
15
|
+
*/
|
|
16
|
+
export class HeaderFieldParser extends BaseFieldParser {
|
|
17
|
+
canParse(key: string): boolean {
|
|
18
|
+
const headerFields = [
|
|
19
|
+
'ACCESSION',
|
|
20
|
+
'RECORD_TITLE',
|
|
21
|
+
'DATE',
|
|
22
|
+
'AUTHORS',
|
|
23
|
+
'LICENSE',
|
|
24
|
+
'COPYRIGHT',
|
|
25
|
+
'PUBLICATION',
|
|
26
|
+
'PROJECT',
|
|
27
|
+
'COMMENT',
|
|
28
|
+
'DEPRECATED',
|
|
29
|
+
];
|
|
30
|
+
return headerFields.includes(key);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
parse(key: string, value: string, record: Record): void {
|
|
34
|
+
switch (key) {
|
|
35
|
+
case 'ACCESSION':
|
|
36
|
+
record.ACCESSION = value;
|
|
37
|
+
break;
|
|
38
|
+
case 'RECORD_TITLE':
|
|
39
|
+
record.RECORD_TITLE = value;
|
|
40
|
+
break;
|
|
41
|
+
case 'DATE':
|
|
42
|
+
record.DATE = value;
|
|
43
|
+
break;
|
|
44
|
+
case 'AUTHORS':
|
|
45
|
+
record.AUTHORS = value;
|
|
46
|
+
break;
|
|
47
|
+
case 'LICENSE':
|
|
48
|
+
record.LICENSE = value;
|
|
49
|
+
break;
|
|
50
|
+
case 'COPYRIGHT':
|
|
51
|
+
record.COPYRIGHT = value;
|
|
52
|
+
break;
|
|
53
|
+
case 'PUBLICATION':
|
|
54
|
+
record.PUBLICATION = value;
|
|
55
|
+
break;
|
|
56
|
+
case 'PROJECT':
|
|
57
|
+
record.PROJECT = value;
|
|
58
|
+
break;
|
|
59
|
+
case 'COMMENT':
|
|
60
|
+
if (!record.COMMENT) {
|
|
61
|
+
record.COMMENT = [];
|
|
62
|
+
}
|
|
63
|
+
record.COMMENT.push(value);
|
|
64
|
+
break;
|
|
65
|
+
case 'DEPRECATED':
|
|
66
|
+
record.DEPRECATED = value;
|
|
67
|
+
break;
|
|
68
|
+
default:
|
|
69
|
+
break;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
/**
|
|
75
|
+
* Compound (CH$) field parser
|
|
76
|
+
*/
|
|
77
|
+
export class CompoundFieldParser extends BaseFieldParser {
|
|
78
|
+
canParse(key: string): boolean {
|
|
79
|
+
return key.startsWith('CH$');
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
parse(key: string, value: string, record: Record): void {
|
|
83
|
+
switch (key) {
|
|
84
|
+
case 'CH$NAME':
|
|
85
|
+
if (!record.CH$NAME) {
|
|
86
|
+
record.CH$NAME = [];
|
|
87
|
+
}
|
|
88
|
+
record.CH$NAME.push(value);
|
|
89
|
+
break;
|
|
90
|
+
case 'CH$COMPOUND_CLASS':
|
|
91
|
+
record.CH$COMPOUND_CLASS = value;
|
|
92
|
+
break;
|
|
93
|
+
case 'CH$FORMULA':
|
|
94
|
+
record.CH$FORMULA = value;
|
|
95
|
+
break;
|
|
96
|
+
case 'CH$EXACT_MASS':
|
|
97
|
+
record.CH$EXACT_MASS = value;
|
|
98
|
+
break;
|
|
99
|
+
case 'CH$SMILES':
|
|
100
|
+
record.CH$SMILES = value;
|
|
101
|
+
break;
|
|
102
|
+
case 'CH$IUPAC':
|
|
103
|
+
record.CH$IUPAC = value;
|
|
104
|
+
break;
|
|
105
|
+
case 'CH$LINK':
|
|
106
|
+
if (!record.CH$LINK) {
|
|
107
|
+
record.CH$LINK = [];
|
|
108
|
+
}
|
|
109
|
+
record.CH$LINK.push(value);
|
|
110
|
+
break;
|
|
111
|
+
default:
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Analytical conditions (AC$) field parser
|
|
119
|
+
*/
|
|
120
|
+
export class AnalyticalConditionsFieldParser extends BaseFieldParser {
|
|
121
|
+
canParse(key: string): boolean {
|
|
122
|
+
return key.startsWith('AC$');
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
parse(key: string, value: string, record: Record): void {
|
|
126
|
+
switch (key) {
|
|
127
|
+
case 'AC$INSTRUMENT':
|
|
128
|
+
record.AC$INSTRUMENT = value;
|
|
129
|
+
break;
|
|
130
|
+
case 'AC$INSTRUMENT_TYPE':
|
|
131
|
+
record.AC$INSTRUMENT_TYPE = value;
|
|
132
|
+
break;
|
|
133
|
+
case 'AC$MASS_SPECTROMETRY':
|
|
134
|
+
if (!record.AC$MASS_SPECTROMETRY) {
|
|
135
|
+
record.AC$MASS_SPECTROMETRY = [];
|
|
136
|
+
}
|
|
137
|
+
record.AC$MASS_SPECTROMETRY.push(value);
|
|
138
|
+
break;
|
|
139
|
+
case 'AC$CHROMATOGRAPHY':
|
|
140
|
+
if (!record.AC$CHROMATOGRAPHY) {
|
|
141
|
+
record.AC$CHROMATOGRAPHY = [];
|
|
142
|
+
}
|
|
143
|
+
record.AC$CHROMATOGRAPHY.push(value);
|
|
144
|
+
break;
|
|
145
|
+
default:
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Mass spectrometry (MS$) field parser
|
|
153
|
+
*/
|
|
154
|
+
export class MassSpectrometryFieldParser extends BaseFieldParser {
|
|
155
|
+
canParse(key: string): boolean {
|
|
156
|
+
return key.startsWith('MS$');
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
parse(key: string, value: string, record: Record): void {
|
|
160
|
+
switch (key) {
|
|
161
|
+
case 'MS$FOCUSED_ION':
|
|
162
|
+
if (!record.MS$FOCUSED_ION) {
|
|
163
|
+
record.MS$FOCUSED_ION = [];
|
|
164
|
+
}
|
|
165
|
+
record.MS$FOCUSED_ION.push(value);
|
|
166
|
+
break;
|
|
167
|
+
case 'MS$DATA_PROCESSING':
|
|
168
|
+
if (!record.MS$DATA_PROCESSING) {
|
|
169
|
+
record.MS$DATA_PROCESSING = [];
|
|
170
|
+
}
|
|
171
|
+
record.MS$DATA_PROCESSING.push(value);
|
|
172
|
+
break;
|
|
173
|
+
default:
|
|
174
|
+
break;
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Peak (PK$) field parser (non-table fields)
|
|
181
|
+
*/
|
|
182
|
+
export class PeakFieldParser extends BaseFieldParser {
|
|
183
|
+
canParse(key: string): boolean {
|
|
184
|
+
return (
|
|
185
|
+
key.startsWith('PK$') && key !== 'PK$PEAK' && key !== 'PK$ANNOTATION'
|
|
186
|
+
);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
parse(key: string, value: string, record: Record): void {
|
|
190
|
+
switch (key) {
|
|
191
|
+
case 'PK$SPLASH':
|
|
192
|
+
record.PK$SPLASH = value;
|
|
193
|
+
break;
|
|
194
|
+
case 'PK$NUM_PEAK': {
|
|
195
|
+
const numPeak = Number.parseInt(value, 10);
|
|
196
|
+
if (Number.isNaN(numPeak)) {
|
|
197
|
+
throw new Error(`Invalid PK$NUM_PEAK value: ${value}`);
|
|
198
|
+
}
|
|
199
|
+
record.PK$NUM_PEAK = numPeak;
|
|
200
|
+
break;
|
|
201
|
+
}
|
|
202
|
+
default:
|
|
203
|
+
break;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Species (SP$) field parser
|
|
210
|
+
*/
|
|
211
|
+
export class SpeciesFieldParser extends BaseFieldParser {
|
|
212
|
+
canParse(key: string): boolean {
|
|
213
|
+
return key.startsWith('SP$');
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
parse(key: string, value: string, record: Record): void {
|
|
217
|
+
switch (key) {
|
|
218
|
+
case 'SP$SCIENTIFIC_NAME':
|
|
219
|
+
record.SP$SCIENTIFIC_NAME = value;
|
|
220
|
+
break;
|
|
221
|
+
case 'SP$LINEAGE':
|
|
222
|
+
record.SP$LINEAGE = value;
|
|
223
|
+
break;
|
|
224
|
+
case 'SP$LINK':
|
|
225
|
+
if (!record.SP$LINK) {
|
|
226
|
+
record.SP$LINK = [];
|
|
227
|
+
}
|
|
228
|
+
record.SP$LINK.push(value);
|
|
229
|
+
break;
|
|
230
|
+
case 'SP$SAMPLE':
|
|
231
|
+
record.SP$SAMPLE = value;
|
|
232
|
+
break;
|
|
233
|
+
default:
|
|
234
|
+
break;
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export { RecordParser, createParser, parseRecord } from './record-parser.js';
|
|
2
|
+
export { ParseException } from './exceptions.js';
|
|
3
|
+
export type {
|
|
4
|
+
IFieldParser,
|
|
5
|
+
IRecordParser,
|
|
6
|
+
ITableParser,
|
|
7
|
+
} from './interfaces.js';
|
|
8
|
+
export { PositionUtils } from './position-utils.js';
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type { Record } from '../record.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Interface for parsing MassBank records
|
|
5
|
+
*/
|
|
6
|
+
export interface IRecordParser {
|
|
7
|
+
/**
|
|
8
|
+
* Parse a MassBank record string into a Record object
|
|
9
|
+
* @param text - The MassBank record text
|
|
10
|
+
* @returns The parsed Record object
|
|
11
|
+
* @throws {import('./exceptions.js').ParseException} if parsing fails
|
|
12
|
+
*/
|
|
13
|
+
parse(text: string): Record;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Interface for parsing field values
|
|
18
|
+
*/
|
|
19
|
+
export interface IFieldParser {
|
|
20
|
+
/**
|
|
21
|
+
* Check if this parser can handle the given key
|
|
22
|
+
*/
|
|
23
|
+
canParse(key: string): boolean;
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Parse the field value
|
|
27
|
+
*/
|
|
28
|
+
parse(key: string, value: string, record: Record): void;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Interface for parsing table data (peaks, annotations)
|
|
33
|
+
*/
|
|
34
|
+
export interface ITableParser {
|
|
35
|
+
/**
|
|
36
|
+
* Check if this parser can handle the given key
|
|
37
|
+
*/
|
|
38
|
+
canParse(key: string): boolean;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Parse table data starting from the given line index
|
|
42
|
+
* @param key - The field key
|
|
43
|
+
* @param lines - All lines
|
|
44
|
+
* @param startIndex - Index of first data line (header is at startIndex - 1)
|
|
45
|
+
* @param record - Record to populate
|
|
46
|
+
* @param headerLine - The full header line (e.g., "PK$ANNOTATION: m/z ion")
|
|
47
|
+
* @returns The number of lines consumed
|
|
48
|
+
*/
|
|
49
|
+
parse(
|
|
50
|
+
key: string,
|
|
51
|
+
lines: string[],
|
|
52
|
+
startIndex: number,
|
|
53
|
+
record: Record,
|
|
54
|
+
headerLine?: string,
|
|
55
|
+
): number;
|
|
56
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import type { ParseError } from '../types.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Utility functions for position calculations
|
|
5
|
+
* Handle position/line/column calculations
|
|
6
|
+
*/
|
|
7
|
+
export const PositionUtils = {
|
|
8
|
+
/**
|
|
9
|
+
* Get character position from line and column (both 1-based).
|
|
10
|
+
* Converts 1-based line/column to a 0-based character offset in the text.
|
|
11
|
+
* @param text - The full text being parsed (used to determine actual newline lengths)
|
|
12
|
+
* @param lineIndex - Line number (1-based)
|
|
13
|
+
* @param column - Column number (1-based)
|
|
14
|
+
* @returns 0-based character position in the original text
|
|
15
|
+
*/
|
|
16
|
+
getPosition(text: string, lineIndex: number, column: number): number {
|
|
17
|
+
// Convert 1-based line/column to 0-based for internal calculation
|
|
18
|
+
const zeroBasedLine = lineIndex - 1;
|
|
19
|
+
const zeroBasedColumn = column - 1;
|
|
20
|
+
|
|
21
|
+
// Match lines with their actual newline separators to get correct lengths
|
|
22
|
+
const lineRegex = /^(?<content>.*?)(?<newline>\r\n|\r|\n|$)/gm;
|
|
23
|
+
let match: RegExpExecArray | null;
|
|
24
|
+
let offset = 0;
|
|
25
|
+
let currentLine = 0;
|
|
26
|
+
|
|
27
|
+
while ((match = lineRegex.exec(text)) !== null) {
|
|
28
|
+
if (currentLine === zeroBasedLine) {
|
|
29
|
+
// Found the target line, add the column offset
|
|
30
|
+
return offset + zeroBasedColumn;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
const lineContent = match.groups?.content ?? '';
|
|
34
|
+
const newlineSeparator = match.groups?.newline ?? '';
|
|
35
|
+
|
|
36
|
+
// Move offset past the line content and its newline separator
|
|
37
|
+
offset += lineContent.length + newlineSeparator.length;
|
|
38
|
+
currentLine++;
|
|
39
|
+
|
|
40
|
+
// If we've processed all text, break to avoid infinite loop
|
|
41
|
+
if (
|
|
42
|
+
newlineSeparator === '' ||
|
|
43
|
+
match.index + match[0].length >= text.length
|
|
44
|
+
) {
|
|
45
|
+
break;
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// If we didn't find the line, return the current offset plus column
|
|
50
|
+
return offset + zeroBasedColumn;
|
|
51
|
+
},
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Create a ParseError from a 0-based character position.
|
|
55
|
+
* Converts the position to 1-based line and column numbers.
|
|
56
|
+
* Clamps position to valid range [0, text.length] to ensure correct line/column calculation.
|
|
57
|
+
* @param text - The full text being parsed
|
|
58
|
+
* @param position - 0-based character offset in the text
|
|
59
|
+
* @param message - Error message describing the problem
|
|
60
|
+
* @returns ParseError with 1-based line and column
|
|
61
|
+
*/
|
|
62
|
+
createParseError(
|
|
63
|
+
text: string,
|
|
64
|
+
position: number,
|
|
65
|
+
message: string,
|
|
66
|
+
): ParseError {
|
|
67
|
+
// Clamp position to valid range to prevent invalid line/column calculation
|
|
68
|
+
const clampedPosition = Math.max(0, Math.min(position, text.length));
|
|
69
|
+
|
|
70
|
+
// Delegate to getLineColumn to avoid duplicating logic
|
|
71
|
+
const { line, column } = this.getLineColumn(text, clampedPosition);
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
position: clampedPosition,
|
|
75
|
+
message,
|
|
76
|
+
line,
|
|
77
|
+
column,
|
|
78
|
+
};
|
|
79
|
+
},
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Get line and column from a 0-based character position.
|
|
83
|
+
* Converts the position to 1-based line and column numbers.
|
|
84
|
+
* Clamps position to valid range [0, text.length] and correctly handles the last line.
|
|
85
|
+
* @param text - The full text being parsed
|
|
86
|
+
* @param position - 0-based character offset in the text
|
|
87
|
+
* @returns Object with 1-based line and column
|
|
88
|
+
*/
|
|
89
|
+
getLineColumn(
|
|
90
|
+
text: string,
|
|
91
|
+
position: number,
|
|
92
|
+
): { line: number; column: number } {
|
|
93
|
+
// Clamp position to valid range [0, text.length]
|
|
94
|
+
const clampedPosition = Math.max(0, Math.min(position, text.length));
|
|
95
|
+
|
|
96
|
+
// Match lines with their actual newline separators to get correct lengths
|
|
97
|
+
// This regex captures line content followed by the newline (CRLF, LF, or CR)
|
|
98
|
+
const lineRegex = /^(?<content>.*?)(?<newline>\r\n|\r|\n|$)/gm;
|
|
99
|
+
let match: RegExpExecArray | null;
|
|
100
|
+
let offset = 0;
|
|
101
|
+
let lineNumber = 1;
|
|
102
|
+
|
|
103
|
+
while ((match = lineRegex.exec(text)) !== null) {
|
|
104
|
+
const lineContent = match.groups?.content ?? '';
|
|
105
|
+
const newlineSeparator = match.groups?.newline ?? '';
|
|
106
|
+
const lineEndOffset = offset + lineContent.length;
|
|
107
|
+
|
|
108
|
+
// Check if the position is within this line's content
|
|
109
|
+
if (clampedPosition <= lineEndOffset) {
|
|
110
|
+
const column = clampedPosition - offset + 1; // 1-based
|
|
111
|
+
return { line: lineNumber, column };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Move offset past the line content and its newline separator
|
|
115
|
+
offset = lineEndOffset + newlineSeparator.length;
|
|
116
|
+
lineNumber++;
|
|
117
|
+
|
|
118
|
+
// If we've processed all text, break to avoid infinite loop
|
|
119
|
+
if (
|
|
120
|
+
newlineSeparator === '' ||
|
|
121
|
+
match.index + match[0].length >= text.length
|
|
122
|
+
) {
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// Fallback for empty text or edge cases: return line 1, column 1
|
|
128
|
+
return { line: 1, column: 1 };
|
|
129
|
+
},
|
|
130
|
+
};
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import type { Record } from '../record.js';
|
|
2
|
+
|
|
3
|
+
import { ParseException } from './exceptions.js';
|
|
4
|
+
import {
|
|
5
|
+
AnalyticalConditionsFieldParser,
|
|
6
|
+
CompoundFieldParser,
|
|
7
|
+
HeaderFieldParser,
|
|
8
|
+
MassSpectrometryFieldParser,
|
|
9
|
+
PeakFieldParser,
|
|
10
|
+
SpeciesFieldParser,
|
|
11
|
+
} from './field-parsers.js';
|
|
12
|
+
import type {
|
|
13
|
+
IFieldParser,
|
|
14
|
+
IRecordParser,
|
|
15
|
+
ITableParser,
|
|
16
|
+
} from './interfaces.js';
|
|
17
|
+
import { PositionUtils } from './position-utils.js';
|
|
18
|
+
import { AnnotationTableParser, PeakTableParser } from './table-parsers.js';
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Main MassBank record parser
|
|
22
|
+
* responsible for parsing
|
|
23
|
+
* Uses interfaces for field/table parsers
|
|
24
|
+
* Easy to extend with new parsers
|
|
25
|
+
*/
|
|
26
|
+
export class RecordParser implements IRecordParser {
|
|
27
|
+
private readonly fieldParsers: IFieldParser[];
|
|
28
|
+
private readonly tableParsers: ITableParser[];
|
|
29
|
+
|
|
30
|
+
constructor() {
|
|
31
|
+
// Initialize parsers (could be injected via DI in the future)
|
|
32
|
+
this.fieldParsers = [
|
|
33
|
+
new HeaderFieldParser(),
|
|
34
|
+
new CompoundFieldParser(),
|
|
35
|
+
new AnalyticalConditionsFieldParser(),
|
|
36
|
+
new MassSpectrometryFieldParser(),
|
|
37
|
+
new PeakFieldParser(),
|
|
38
|
+
new SpeciesFieldParser(),
|
|
39
|
+
];
|
|
40
|
+
|
|
41
|
+
this.tableParsers = [new PeakTableParser(), new AnnotationTableParser()];
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Parse a MassBank record string into a Record object
|
|
46
|
+
* @param text
|
|
47
|
+
*/
|
|
48
|
+
parse(text: string): Record {
|
|
49
|
+
const lines = text.split(/\r?\n/);
|
|
50
|
+
const record: Record = {
|
|
51
|
+
ACCESSION: '',
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
let i = 0;
|
|
55
|
+
|
|
56
|
+
while (i < lines.length) {
|
|
57
|
+
const line = lines[i];
|
|
58
|
+
if (!line) {
|
|
59
|
+
i++;
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const trimmed = line.trim();
|
|
64
|
+
|
|
65
|
+
// Skip empty lines
|
|
66
|
+
if (trimmed === '') {
|
|
67
|
+
i++;
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Check for record terminator
|
|
72
|
+
if (trimmed === '//') {
|
|
73
|
+
break;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Parse key-value pairs
|
|
77
|
+
const colonIndex = line.indexOf(':');
|
|
78
|
+
if (colonIndex === -1) {
|
|
79
|
+
// Line without colon is invalid unless we're in a table context
|
|
80
|
+
// (which is handled by table parsers before we get here)
|
|
81
|
+
throw new ParseException(
|
|
82
|
+
PositionUtils.createParseError(
|
|
83
|
+
text,
|
|
84
|
+
PositionUtils.getPosition(text, i + 1, 1),
|
|
85
|
+
'Invalid line format: expected "KEY: value" or table data',
|
|
86
|
+
),
|
|
87
|
+
);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const key = line.slice(0, Math.max(0, colonIndex)).trim();
|
|
91
|
+
const value = line.slice(Math.max(0, colonIndex + 1)).trim();
|
|
92
|
+
|
|
93
|
+
// Try table parsers first (they handle multi-line data)
|
|
94
|
+
const tableParser = this.tableParsers.find((p) => p.canParse(key));
|
|
95
|
+
if (tableParser) {
|
|
96
|
+
const headerLine = line; // Current line is the header
|
|
97
|
+
i++; // Skip header line
|
|
98
|
+
const linesConsumed = tableParser.parse(
|
|
99
|
+
key,
|
|
100
|
+
lines,
|
|
101
|
+
i,
|
|
102
|
+
record,
|
|
103
|
+
headerLine,
|
|
104
|
+
);
|
|
105
|
+
i += linesConsumed;
|
|
106
|
+
continue;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Try field parsers
|
|
110
|
+
const fieldParser = this.fieldParsers.find((p) => p.canParse(key));
|
|
111
|
+
if (fieldParser) {
|
|
112
|
+
try {
|
|
113
|
+
fieldParser.parse(key, value, record);
|
|
114
|
+
} catch (error) {
|
|
115
|
+
const message =
|
|
116
|
+
error instanceof Error ? error.message : 'Unknown parsing error';
|
|
117
|
+
throw new ParseException(
|
|
118
|
+
PositionUtils.createParseError(
|
|
119
|
+
text,
|
|
120
|
+
PositionUtils.getPosition(text, i + 1, colonIndex + 2),
|
|
121
|
+
message,
|
|
122
|
+
),
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
i++;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (!record.ACCESSION) {
|
|
131
|
+
throw new ParseException(
|
|
132
|
+
PositionUtils.createParseError(text, 0, 'ACCESSION field is required'),
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
return record;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Factory function to create a parser instance
|
|
142
|
+
* Follows Factory Pattern
|
|
143
|
+
*/
|
|
144
|
+
export function createParser(): IRecordParser {
|
|
145
|
+
return new RecordParser();
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Convenience function to parse a record
|
|
150
|
+
* @param text
|
|
151
|
+
*/
|
|
152
|
+
export function parseRecord(text: string): Record {
|
|
153
|
+
const parser = createParser();
|
|
154
|
+
return parser.parse(text);
|
|
155
|
+
}
|