sdf-parser 7.0.5 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ /**
2
+ * A Web Streams API `TransformStream` that splits an incoming text stream on
3
+ * the `$$$$` SDF record delimiter and emits individual molfile strings.
4
+ *
5
+ * Entries shorter than 40 characters are discarded.
6
+ * @example
7
+ * ```ts
8
+ * const stream = readStream.pipeThrough(new MolfileStream());
9
+ * for await (const molfile of stream) {
10
+ * console.log(molfile);
11
+ * }
12
+ * ```
13
+ */
14
+ export declare class MolfileStream extends TransformStream<string, string> {
15
+ #private;
16
+ constructor(_options?: {
17
+ eol?: string;
18
+ });
19
+ }
20
+ //# sourceMappingURL=MolfileStream.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"MolfileStream.d.ts","sourceRoot":"","sources":["../src/MolfileStream.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AACH,qBAAa,aAAc,SAAQ,eAAe,CAAC,MAAM,EAAE,MAAM,CAAC;;gBAIpD,QAAQ,CAAC,EAAE;QAAE,GAAG,CAAC,EAAE,MAAM,CAAA;KAAE;CAmCxC"}
@@ -0,0 +1,52 @@
1
+ /**
2
+ * A Web Streams API `TransformStream` that splits an incoming text stream on
3
+ * the `$$$$` SDF record delimiter and emits individual molfile strings.
4
+ *
5
+ * Entries shorter than 40 characters are discarded.
6
+ * @example
7
+ * ```ts
8
+ * const stream = readStream.pipeThrough(new MolfileStream());
9
+ * for await (const molfile of stream) {
10
+ * console.log(molfile);
11
+ * }
12
+ * ```
13
+ */
14
+ export class MolfileStream extends TransformStream {
15
+ #buffer = [];
16
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
17
+ constructor(_options) {
18
+ super({
19
+ transform: (chunk, controller) => {
20
+ this.#buffer.push(chunk);
21
+ const combined = this.#buffer.join('');
22
+ this.#buffer.length = 0;
23
+ let begin = 0;
24
+ let index = 0;
25
+ while ((index = combined.indexOf('$$$$', index)) !== -1) {
26
+ const endOfDelimiter = combined.indexOf('\n', index);
27
+ if (endOfDelimiter === -1) {
28
+ index = begin;
29
+ break;
30
+ }
31
+ const eolLength = combined[endOfDelimiter - 1] === '\r' ? 2 : 1;
32
+ // Remove the last eol before enqueuing
33
+ if (index - eolLength - begin > 40) {
34
+ controller.enqueue(combined.slice(begin, index - eolLength));
35
+ }
36
+ index = endOfDelimiter + eolLength;
37
+ begin = index;
38
+ }
39
+ if (begin < combined.length) {
40
+ this.#buffer.push(combined.slice(begin));
41
+ }
42
+ },
43
+ flush: (controller) => {
44
+ const remaining = this.#buffer.join('');
45
+ if (remaining && remaining.length > 40) {
46
+ controller.enqueue(remaining);
47
+ }
48
+ },
49
+ });
50
+ }
51
+ }
52
+ //# sourceMappingURL=MolfileStream.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"MolfileStream.js","sourceRoot":"","sources":["../src/MolfileStream.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AACH,MAAM,OAAO,aAAc,SAAQ,eAA+B;IACvD,OAAO,GAAa,EAAE,CAAC;IAEhC,6DAA6D;IAC7D,YAAY,QAA2B;QACrC,KAAK,CAAC;YACJ,SAAS,EAAE,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE;gBAC/B,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACvC,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;gBAExB,IAAI,KAAK,GAAG,CAAC,CAAC;gBACd,IAAI,KAAK,GAAG,CAAC,CAAC;gBACd,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;oBACxD,MAAM,cAAc,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;oBACrD,IAAI,cAAc,KAAK,CAAC,CAAC,EAAE,CAAC;wBAC1B,KAAK,GAAG,KAAK,CAAC;wBACd,MAAM;oBACR,CAAC;oBACD,MAAM,SAAS,GAAG,QAAQ,CAAC,cAAc,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBAChE,uCAAuC;oBACvC,IAAI,KAAK,GAAG,SAAS,GAAG,KAAK,GAAG,EAAE,EAAE,CAAC;wBACnC,UAAU,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC;oBAC/D,CAAC;oBACD,KAAK,GAAG,cAAc,GAAG,SAAS,CAAC;oBACnC,KAAK,GAAG,KAAK,CAAC;gBAChB,CAAC;gBACD,IAAI,KAAK,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;oBAC5B,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;gBAC3C,CAAC;YACH,CAAC;YACD,KAAK,EAAE,CAAC,UAAU,EAAE,EAAE;gBACpB,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACxC,IAAI,SAAS,IAAI,SAAS,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;oBACvC,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;gBAChC,CAAC;YACH,CAAC;SACF,CAAC,CAAC;IACL,CAAC;CACF"}
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Get the [start, end] boundaries of each SDF entry in a string.
3
+ *
4
+ * Uses `indexOf` for fast splitting without regex overhead.
5
+ * @param string - The full SDF string.
6
+ * @param substring - The delimiter to search for (e.g. `'\n$$$$'`).
7
+ * @param eol - The end-of-line character used to skip past the delimiter line.
8
+ * @returns An array of `[start, end]` index pairs, one per SDF entry.
9
+ */
10
+ export declare function getEntriesBoundaries(string: string, substring: string, eol: string): Array<[number, number]>;
11
+ //# sourceMappingURL=getEntriesBoundaries.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"getEntriesBoundaries.d.ts","sourceRoot":"","sources":["../src/getEntriesBoundaries.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,wBAAgB,oBAAoB,CAClC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,GAAG,EAAE,MAAM,GACV,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAoBzB"}
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Get the [start, end] boundaries of each SDF entry in a string.
3
+ *
4
+ * Uses `indexOf` for fast splitting without regex overhead.
5
+ * @param string - The full SDF string.
6
+ * @param substring - The delimiter to search for (e.g. `'\n$$$$'`).
7
+ * @param eol - The end-of-line character used to skip past the delimiter line.
8
+ * @returns An array of `[start, end]` index pairs, one per SDF entry.
9
+ */
10
+ export function getEntriesBoundaries(string, substring, eol) {
11
+ const res = [];
12
+ let previous = 0;
13
+ let next = 0;
14
+ while (next !== -1) {
15
+ next = string.indexOf(substring, previous);
16
+ if (next !== -1) {
17
+ res.push([previous, next]);
18
+ const nextMatch = string.indexOf(eol, next + substring.length);
19
+ if (nextMatch === -1) {
20
+ next = -1;
21
+ }
22
+ else {
23
+ previous = nextMatch + eol.length;
24
+ next = previous;
25
+ }
26
+ }
27
+ else {
28
+ res.push([previous, string.length]);
29
+ }
30
+ }
31
+ return res;
32
+ }
33
+ //# sourceMappingURL=getEntriesBoundaries.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"getEntriesBoundaries.js","sourceRoot":"","sources":["../src/getEntriesBoundaries.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,MAAM,UAAU,oBAAoB,CAClC,MAAc,EACd,SAAiB,EACjB,GAAW;IAEX,MAAM,GAAG,GAA4B,EAAE,CAAC;IACxC,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,OAAO,IAAI,KAAK,CAAC,CAAC,EAAE,CAAC;QACnB,IAAI,GAAG,MAAM,CAAC,OAAO,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC3C,IAAI,IAAI,KAAK,CAAC,CAAC,EAAE,CAAC;YAChB,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC;YAC3B,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,EAAE,IAAI,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;YAC/D,IAAI,SAAS,KAAK,CAAC,CAAC,EAAE,CAAC;gBACrB,IAAI,GAAG,CAAC,CAAC,CAAC;YACZ,CAAC;iBAAM,CAAC;gBACN,QAAQ,GAAG,SAAS,GAAG,GAAG,CAAC,MAAM,CAAC;gBAClC,IAAI,GAAG,QAAQ,CAAC;YAClB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
package/lib/index.d.ts ADDED
@@ -0,0 +1,4 @@
1
+ export * from './parse.ts';
2
+ export * from './iterator.ts';
3
+ export * from './MolfileStream.ts';
4
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,oBAAoB,CAAC"}
package/lib/index.js CHANGED
@@ -1,302 +1,4 @@
1
- 'use strict';
2
-
3
- var ensureString = require('ensure-string');
4
- var dynamicTyping = require('dynamic-typing');
5
-
6
- /**
7
- *
8
- * @param {*} string
9
- * @param {*} substring
10
- * @param {*} eol
11
- * @returns
12
- */
13
- function getEntriesBoundaries(string, substring, eol) {
14
- const res = [];
15
- let previous = 0;
16
- let next = 0;
17
- while (next !== -1) {
18
- next = string.indexOf(substring, previous);
19
- if (next !== -1) {
20
- res.push([previous, next]);
21
- const nextMatch = string.indexOf(eol, next + substring.length);
22
- if (nextMatch === -1) {
23
- next = -1;
24
- } else {
25
- previous = nextMatch + eol.length;
26
- next = previous;
27
- }
28
- } else {
29
- res.push([previous, string.length]);
30
- }
31
- }
32
- return res;
33
- }
34
-
35
- /**
36
- * Parse the molfile and the properties with > < labels >
37
- * @param {string} sdfPart
38
- * @param {*} labels
39
- * @param {*} currentLabels
40
- * @param {object} options
41
- * @returns
42
- */
43
- function getMolecule$1(sdfPart, labels, currentLabels, options) {
44
- let parts = sdfPart.split(`${options.eol}>`);
45
- if (parts.length === 0 || parts[0].length <= 5) return;
46
- let molecule = { molfile: parts[0] + options.eol };
47
- for (let j = 1; j < parts.length; j++) {
48
- let lines = parts[j].split(options.eol);
49
- let from = lines[0].indexOf('<');
50
- let to = lines[0].indexOf('>');
51
- let label = lines[0].slice(from + 1, to);
52
- currentLabels.push(label);
53
- if (!labels[label]) {
54
- labels[label] = {
55
- counter: 0,
56
- isNumeric: options.dynamicTyping,
57
- keep: false,
58
- };
59
- if (
60
- (!options.exclude || !options.exclude.includes(label)) &&
61
- (!options.include || options.include.includes(label))
62
- ) {
63
- labels[label].keep = true;
64
- if (options.modifiers[label]) {
65
- labels[label].modifier = options.modifiers[label];
66
- }
67
- if (options.forEach[label]) {
68
- labels[label].forEach = options.forEach[label];
69
- }
70
- }
71
- }
72
- if (labels[label].keep) {
73
- for (let k = 1; k < lines.length - 1; k++) {
74
- if (molecule[label]) {
75
- molecule[label] += options.eol + lines[k];
76
- } else {
77
- molecule[label] = lines[k];
78
- }
79
- }
80
- if (labels[label].modifier) {
81
- let modifiedValue = labels[label].modifier(molecule[label]);
82
- if (modifiedValue === undefined || modifiedValue === null) {
83
- delete molecule[label];
84
- } else {
85
- molecule[label] = modifiedValue;
86
- }
87
- }
88
- if (
89
- labels[label].isNumeric &&
90
- (!Number.isFinite(+molecule[label]) || molecule[label].match(/^0[0-9]/))
91
- ) {
92
- labels[label].isNumeric = false;
93
- }
94
- }
95
- }
96
- return molecule;
97
- }
98
-
99
- /**
100
- * Parse a SDF file
101
- * @param {string|ArrayBuffer|Uint8Array} sdf - SDF file to parse
102
- * @param {object} [options={}]
103
- * @param {string[]} [options.include] - List of fields to include
104
- * @param {string[]} [options.exclude] - List of fields to exclude
105
- * @param {Function} [options.filter] - Callback allowing to filter the molecules
106
- * @param {boolean} [options.dynamicTyping] - Dynamically type the data
107
- * @param {object} [options.modifiers] - Object containing callbacks to apply on some specific fields
108
- * @param {boolean} [options.mixedEOL=false] - Set to true if you know there is a mixture between \r\n and \n
109
- * @param {string} [options.eol] - Specify the end of line character. Default will be the one found in the file
110
- * @returns {object} - Object containing the molecules, the labels and the statistics
111
- */
112
- function parse(sdf, options = {}) {
113
- options = { ...options };
114
- if (options.modifiers === undefined) options.modifiers = {};
115
- if (options.forEach === undefined) options.forEach = {};
116
- if (options.dynamicTyping === undefined) options.dynamicTyping = true;
117
-
118
- sdf = ensureString.ensureString(sdf);
119
- if (typeof sdf !== 'string') {
120
- throw new TypeError('Parameter "sdf" must be a string');
121
- }
122
-
123
- if (options.eol === undefined) {
124
- options.eol = '\n';
125
- if (options.mixedEOL) {
126
- sdf = sdf.replaceAll('\r\n', '\n');
127
- sdf = sdf.replaceAll('\r', '\n');
128
- } else {
129
- // we will find the delimiter in order to be much faster and not use regular expression
130
- let header = new Set(sdf.slice(0, 1000));
131
- if (header.has('\r\n')) {
132
- options.eol = '\r\n';
133
- } else if (header.has('\r')) {
134
- options.eol = '\r';
135
- }
136
- }
137
- }
138
-
139
- let entriesBoundaries = getEntriesBoundaries(
140
- sdf,
141
- `${options.eol}$$$$`,
142
- options.eol,
143
- );
144
- let molecules = [];
145
- let labels = {};
146
-
147
- let start = Date.now();
148
-
149
- for (let i = 0; i < entriesBoundaries.length; i++) {
150
- let sdfPart = sdf.slice(...entriesBoundaries[i]);
151
- if (sdfPart.length < 40) continue;
152
- let currentLabels = [];
153
- const molecule = getMolecule$1(sdfPart, labels, currentLabels, options);
154
- if (!molecule) continue;
155
- if (!options.filter || options.filter(molecule)) {
156
- molecules.push(molecule);
157
- // only now we can increase the counter
158
- for (let j = 0; j < currentLabels.length; j++) {
159
- labels[currentLabels[j]].counter++;
160
- }
161
- }
162
- }
163
- // all numeric fields should be converted to numbers
164
- for (let label in labels) {
165
- let currentLabel = labels[label];
166
- if (currentLabel.isNumeric) {
167
- currentLabel.minValue = Infinity;
168
- currentLabel.maxValue = -Infinity;
169
- for (let j = 0; j < molecules.length; j++) {
170
- if (molecules[j][label]) {
171
- let value = Number.parseFloat(molecules[j][label]);
172
- molecules[j][label] = value;
173
- if (value > currentLabel.maxValue) {
174
- currentLabel.maxValue = value;
175
- }
176
- if (value < currentLabel.minValue) {
177
- currentLabel.minValue = value;
178
- }
179
- }
180
- }
181
- }
182
- }
183
-
184
- // we check that a label is in all the records
185
- for (let key in labels) {
186
- if (labels[key].counter === molecules.length) {
187
- labels[key].always = true;
188
- } else {
189
- labels[key].always = false;
190
- }
191
- }
192
-
193
- let statistics = [];
194
- for (let key in labels) {
195
- let statistic = labels[key];
196
- statistic.label = key;
197
- statistics.push(statistic);
198
- }
199
-
200
- return {
201
- time: Date.now() - start,
202
- molecules,
203
- labels: Object.keys(labels),
204
- statistics,
205
- };
206
- }
207
-
208
- class MolfileStream extends TransformStream {
209
- #buffer = '';
210
-
211
- constructor() {
212
- super({
213
- transform: (chunk, controller) => {
214
- this.#buffer += chunk;
215
- let begin = 0;
216
- let index = 0;
217
- while ((index = this.#buffer.indexOf('$$$$', index)) !== -1) {
218
- // we need to check if the delimiter '\n' is in the current buffer
219
- // if it is not we need to wait for the next chunk
220
- const endOfDelimiter = this.#buffer.indexOf('\n', index);
221
- if (endOfDelimiter === -1) {
222
- index = begin;
223
- break;
224
- }
225
- const eolLength = this.#buffer[endOfDelimiter - 1] === '\r' ? 2 : 1;
226
- // need to remove the last eol because we will split on eol+'>' in getMolecule
227
- if (index - eolLength - begin > 40) {
228
- controller.enqueue(this.#buffer.slice(begin, index - eolLength));
229
- }
230
- index = endOfDelimiter + eolLength;
231
- begin = index;
232
- }
233
- this.#buffer = this.#buffer.slice(begin);
234
- },
235
- flush: (controller) => {
236
- if (this.#buffer && this.#buffer.length > 40) {
237
- controller.enqueue(this.#buffer);
238
- }
239
- },
240
- });
241
- }
242
- }
243
-
244
- /**
245
- * Parse a SDF file as an iterator
246
- * @param {ReadableStream} readStream - SDF file to parse
247
- * @param {object} [options={}] - iterator options
248
- * @param {Function} [options.filter] - Callback allowing to filter the molecules
249
- * @param {string} [options.eol='\n'] - End of line character
250
- * @param {boolean} [options.dynamicTyping] - Dynamically type the data
251
- * @yields {object} - Molecule object
252
- */
253
- async function* iterator(readStream, options = {}) {
254
- const { eol = '\n', dynamicTyping = true } = options;
255
-
256
- const moleculeStream = readStream.pipeThrough(new MolfileStream({ eol }));
257
- for await (const entry of moleculeStream) {
258
- const molecule = getMolecule(entry, {
259
- eol,
260
- dynamicTyping,
261
- });
262
- if (!options.filter || options.filter(molecule)) {
263
- yield molecule;
264
- }
265
- }
266
- }
267
-
268
- /**
269
- * Convert a SDF part to an object
270
- * @param {string} sdfPart - text containing the molfile
271
- * @param {object} options - options
272
- * @param {string} options.eol - end of line character
273
- * @param {boolean} options.dynamicTyping - Dynamically type the data (create numbers and booleans)
274
- * @returns
275
- */
276
- function getMolecule(sdfPart, options) {
277
- const { eol, dynamicTyping: dynamicTyping$1 } = options;
278
- let parts = sdfPart.split(`${eol}>`);
279
- if (parts.length === 0 || parts[0].length <= 5) return;
280
- let molecule = { molfile: parts[0] + eol };
281
- for (let j = 1; j < parts.length; j++) {
282
- let lines = parts[j].split(eol);
283
- let from = lines[0].indexOf('<');
284
- let to = lines[0].indexOf('>');
285
- let label = lines[0].slice(from + 1, to);
286
- for (let k = 1; k < lines.length - 1; k++) {
287
- if (molecule[label]) {
288
- molecule[label] += eol + lines[k];
289
- } else {
290
- molecule[label] = lines[k];
291
- }
292
- }
293
- if (dynamicTyping$1) {
294
- molecule[label] = dynamicTyping.parseString(molecule[label]);
295
- }
296
- }
297
- return molecule;
298
- }
299
-
300
- exports.MolfileStream = MolfileStream;
301
- exports.iterator = iterator;
302
- exports.parse = parse;
1
+ export * from "./parse.js";
2
+ export * from "./iterator.js";
3
+ export * from "./MolfileStream.js";
4
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,oBAAoB,CAAC"}
@@ -0,0 +1,49 @@
1
+ /**
2
+ * A molecule entry returned by the {@link iterator} async generator.
3
+ * The `molfile` field contains the raw V2000/V3000 molfile block.
4
+ * Additional fields are populated from the SDF `> <field>` sections.
5
+ */
6
+ export interface IteratorMolecule {
7
+ /** The raw V2000/V3000 molfile block. */
8
+ molfile: string;
9
+ [label: string]: any;
10
+ }
11
+ /**
12
+ * Options for the {@link iterator} async generator.
13
+ */
14
+ export interface IteratorOptions {
15
+ /**
16
+ * End-of-line character used to split field entries.
17
+ * @default '\n'
18
+ */
19
+ eol?: string;
20
+ /**
21
+ * When `true`, numeric string values are automatically converted to numbers.
22
+ * @default true
23
+ */
24
+ dynamicTyping?: boolean;
25
+ /**
26
+ * A predicate function to filter molecules. Only molecules for which this
27
+ * function returns `true` are yielded.
28
+ */
29
+ filter?: (molecule: IteratorMolecule) => boolean;
30
+ }
31
+ /**
32
+ * Asynchronously iterate over molecules from a text-decoded SDF stream.
33
+ * @param readStream - A `ReadableStream<string>` supplying SDF text content.
34
+ * @param options - Iterator options.
35
+ * @yields {IteratorMolecule} Individual molecule objects.
36
+ * @example
37
+ * ```ts
38
+ * import { openAsBlob } from 'node:fs';
39
+ * import { iterator } from 'sdf-parser';
40
+ *
41
+ * const blob = await openAsBlob('compounds.sdf');
42
+ * const textDecoder = new TextDecoderStream();
43
+ * for await (const molecule of iterator(blob.stream().pipeThrough(textDecoder))) {
44
+ * console.log(molecule.molfile);
45
+ * }
46
+ * ```
47
+ */
48
+ export declare function iterator(readStream: ReadableStream<string>, options?: IteratorOptions): AsyncGenerator<IteratorMolecule>;
49
+ //# sourceMappingURL=iterator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"iterator.d.ts","sourceRoot":"","sources":["../src/iterator.ts"],"names":[],"mappings":"AAIA;;;;GAIG;AACH,MAAM,WAAW,gBAAgB;IAC/B,yCAAyC;IACzC,OAAO,EAAE,MAAM,CAAC;IAEhB,CAAC,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;IACb;;;OAGG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB;;;OAGG;IACH,MAAM,CAAC,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,OAAO,CAAC;CAClD;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAuB,QAAQ,CAC7B,UAAU,EAAE,cAAc,CAAC,MAAM,CAAC,EAClC,OAAO,GAAE,eAAoB,GAC5B,cAAc,CAAC,gBAAgB,CAAC,CASlC"}
@@ -0,0 +1,55 @@
1
+ import { parseString } from 'dynamic-typing';
2
+ import { MolfileStream } from "./MolfileStream.js";
3
+ /**
4
+ * Asynchronously iterate over molecules from a text-decoded SDF stream.
5
+ * @param readStream - A `ReadableStream<string>` supplying SDF text content.
6
+ * @param options - Iterator options.
7
+ * @yields {IteratorMolecule} Individual molecule objects.
8
+ * @example
9
+ * ```ts
10
+ * import { openAsBlob } from 'node:fs';
11
+ * import { iterator } from 'sdf-parser';
12
+ *
13
+ * const blob = await openAsBlob('compounds.sdf');
14
+ * const textDecoder = new TextDecoderStream();
15
+ * for await (const molecule of iterator(blob.stream().pipeThrough(textDecoder))) {
16
+ * console.log(molecule.molfile);
17
+ * }
18
+ * ```
19
+ */
20
+ export async function* iterator(readStream, options = {}) {
21
+ const { eol = '\n', dynamicTyping = true } = options;
22
+ const moleculeStream = readStream.pipeThrough(new MolfileStream({ eol }));
23
+ for await (const entry of moleculeStream) {
24
+ const molecule = parseMolecule(entry, { eol, dynamicTyping });
25
+ if (!options.filter || options.filter(molecule)) {
26
+ yield molecule;
27
+ }
28
+ }
29
+ }
30
+ function parseMolecule(sdfPart, options) {
31
+ const { eol, dynamicTyping } = options;
32
+ const parts = sdfPart.split(`${eol}>`);
33
+ const molecule = {
34
+ molfile: parts.length > 0 && parts[0].length > 5 ? parts[0] + eol : '',
35
+ };
36
+ for (let j = 1; j < parts.length; j++) {
37
+ const lines = parts[j].split(eol);
38
+ const from = lines[0].indexOf('<');
39
+ const to = lines[0].indexOf('>');
40
+ const label = lines[0].slice(from + 1, to);
41
+ for (let k = 1; k < lines.length - 1; k++) {
42
+ if (molecule[label]) {
43
+ molecule[label] = `${molecule[label]}${eol}${lines[k]}`;
44
+ }
45
+ else {
46
+ molecule[label] = lines[k];
47
+ }
48
+ }
49
+ if (dynamicTyping) {
50
+ molecule[label] = parseString(molecule[label]);
51
+ }
52
+ }
53
+ return molecule;
54
+ }
55
+ //# sourceMappingURL=iterator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"iterator.js","sourceRoot":"","sources":["../src/iterator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAE7C,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAmCnD;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,CAAC,KAAK,SAAS,CAAC,CAAC,QAAQ,CAC7B,UAAkC,EAClC,UAA2B,EAAE;IAE7B,MAAM,EAAE,GAAG,GAAG,IAAI,EAAE,aAAa,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC;IACrD,MAAM,cAAc,GAAG,UAAU,CAAC,WAAW,CAAC,IAAI,aAAa,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;IAC1E,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,cAAc,EAAE,CAAC;QACzC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;QAC9D,IAAI,CAAC,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YAChD,MAAM,QAAQ,CAAC;QACjB,CAAC;IACH,CAAC;AACH,CAAC;AAOD,SAAS,aAAa,CACpB,OAAe,EACf,OAA6B;IAE7B,MAAM,EAAE,GAAG,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC;IACvC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC;IACvC,MAAM,QAAQ,GAAqB;QACjC,OAAO,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,EAAE;KACvE,CAAC;IAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAClC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACjC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;QAE3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,IAAI,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACpB,QAAQ,CAAC,KAAK,CAAC,GAAG,GAAG,QAAQ,CAAC,KAAK,CAAW,GAAG,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;YACpE,CAAC;iBAAM,CAAC;gBACN,QAAQ,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,IAAI,aAAa,EAAE,CAAC;YAClB,QAAQ,CAAC,KAAK,CAAC,GAAG,WAAW,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;QACjD,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
package/lib/parse.d.ts ADDED
@@ -0,0 +1,105 @@
1
+ /**
2
+ * A parsed SDF molecule entry. The `molfile` field contains the raw molfile
3
+ * string. Additional fields are populated from the SDF `> <field>` sections.
4
+ */
5
+ export interface Molecule {
6
+ /** The raw V2000/V3000 molfile block. */
7
+ molfile: string;
8
+ [label: string]: any;
9
+ }
10
+ /**
11
+ * Options for the {@link parse} function.
12
+ */
13
+ export interface ParseOptions {
14
+ /**
15
+ * Modifier functions applied to field values after parsing. The function
16
+ * receives the raw string value and may return a transformed value. Returning
17
+ * `undefined` or `null` removes the field from the molecule.
18
+ */
19
+ modifiers?: Record<string, (value: string) => unknown>;
20
+ /**
21
+ * Callback functions called for each field value. The callbacks are stored
22
+ * on the label info and available in statistics.
23
+ */
24
+ forEach?: Record<string, (value: unknown) => void>;
25
+ /**
26
+ * When `true`, numeric string values are automatically converted to numbers.
27
+ * @default true
28
+ */
29
+ dynamicTyping?: boolean;
30
+ /**
31
+ * End-of-line character. Auto-detected from the file content when not set.
32
+ * @default '\n'
33
+ */
34
+ eol?: string;
35
+ /**
36
+ * When `true`, normalises all `\r\n` and `\r` sequences to `\n` before
37
+ * parsing. Useful for SDF files with mixed or Windows-style line endings.
38
+ * @default false
39
+ */
40
+ mixedEOL?: boolean;
41
+ /**
42
+ * Only include fields whose names appear in this list.
43
+ * When combined with `exclude`, the field must satisfy both constraints.
44
+ */
45
+ include?: string[];
46
+ /**
47
+ * Exclude fields whose names appear in this list.
48
+ * When combined with `include`, the field must satisfy both constraints.
49
+ */
50
+ exclude?: string[];
51
+ /**
52
+ * A predicate function to filter molecules. Only molecules for which this
53
+ * function returns `true` are included in the result.
54
+ */
55
+ filter?: (molecule: Molecule) => boolean;
56
+ }
57
+ /**
58
+ * Statistics for a single SDF field label, as returned in
59
+ * {@link ParseResult.statistics}.
60
+ */
61
+ export interface LabelStatistic {
62
+ /** Field label name. */
63
+ label: string;
64
+ /** Number of molecules that contain this field. */
65
+ counter: number;
66
+ /** Whether all parsed values are numeric. */
67
+ isNumeric: boolean;
68
+ /** Whether this field is included in the output (not excluded). */
69
+ keep: boolean;
70
+ /** Minimum numeric value, only set when `isNumeric` is `true`. */
71
+ minValue?: number;
72
+ /** Maximum numeric value, only set when `isNumeric` is `true`. */
73
+ maxValue?: number;
74
+ /** Whether every molecule in the result contains this field. */
75
+ always: boolean;
76
+ }
77
+ /**
78
+ * Return value of the {@link parse} function.
79
+ */
80
+ export interface ParseResult {
81
+ /** Wall-clock time taken to parse, in milliseconds. */
82
+ time: number;
83
+ /** Parsed molecule entries. */
84
+ molecules: Molecule[];
85
+ /** Sorted list of all field label names found in the file. */
86
+ labels: string[];
87
+ /** Per-label statistics. */
88
+ statistics: LabelStatistic[];
89
+ }
90
+ /**
91
+ * Synchronously parse an SDF file into an array of molecule objects.
92
+ * @param sdf - The SDF content as a string, `ArrayBuffer`, or `ArrayBufferView`.
93
+ * @param options - Parsing options.
94
+ * @returns A {@link ParseResult} containing molecules and statistics.
95
+ * @example
96
+ * ```ts
97
+ * import { readFileSync } from 'node:fs';
98
+ * import { parse } from 'sdf-parser';
99
+ *
100
+ * const sdf = readFileSync('compounds.sdf', 'utf8');
101
+ * const { molecules, statistics } = parse(sdf);
102
+ * ```
103
+ */
104
+ export declare function parse(sdf: unknown, options?: ParseOptions): ParseResult;
105
+ //# sourceMappingURL=parse.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parse.d.ts","sourceRoot":"","sources":["../src/parse.ts"],"names":[],"mappings":"AAMA;;;GAGG;AACH,MAAM,WAAW,QAAQ;IACvB,yCAAyC;IACzC,OAAO,EAAE,MAAM,CAAC;IAEhB,CAAC,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,CAAC;IACvD;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,OAAO,KAAK,IAAI,CAAC,CAAC;IACnD;;;OAGG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;IACb;;;;OAIG;IACH,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;IACnB;;;OAGG;IACH,MAAM,CAAC,EAAE,CAAC,QAAQ,EAAE,QAAQ,KAAK,OAAO,CAAC;CAC1C;AAED;;;GAGG;AACH,MAAM,WAAW,cAAc;IAC7B,wBAAwB;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,mDAAmD;IACnD,OAAO,EAAE,MAAM,CAAC;IAChB,6CAA6C;IAC7C,SAAS,EAAE,OAAO,CAAC;IACnB,mEAAmE;IACnE,IAAI,EAAE,OAAO,CAAC;IACd,kEAAkE;IAClE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,kEAAkE;IAClE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,gEAAgE;IAChE,MAAM,EAAE,OAAO,CAAC;CACjB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,uDAAuD;IACvD,IAAI,EAAE,MAAM,CAAC;IACb,+BAA+B;IAC/B,SAAS,EAAE,QAAQ,EAAE,CAAC;IACtB,8DAA8D;IAC9D,MAAM,EAAE,MAAM,EAAE,CAAC;IACjB,4BAA4B;IAC5B,UAAU,EAAE,cAAc,EAAE,CAAC;CAC9B;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,KAAK,CAAC,GAAG,EAAE,OAAO,EAAE,OAAO,GAAE,YAAiB,GAAG,WAAW,CAgH3E"}