sdf-parser 7.0.4 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ /**
2
+ * A Web Streams API `TransformStream` that splits an incoming text stream on
3
+ * the `$$$$` SDF record delimiter and emits individual molfile strings.
4
+ *
5
+ * Entries shorter than 40 characters are discarded.
6
+ * @example
7
+ * ```ts
8
+ * const stream = readStream.pipeThrough(new MolfileStream());
9
+ * for await (const molfile of stream) {
10
+ * console.log(molfile);
11
+ * }
12
+ * ```
13
+ */
14
+ export declare class MolfileStream extends TransformStream<string, string> {
15
+ #private;
16
+ constructor(_options?: {
17
+ eol?: string;
18
+ });
19
+ }
20
+ //# sourceMappingURL=MolfileStream.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"MolfileStream.d.ts","sourceRoot":"","sources":["../src/MolfileStream.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AACH,qBAAa,aAAc,SAAQ,eAAe,CAAC,MAAM,EAAE,MAAM,CAAC;;gBAIpD,QAAQ,CAAC,EAAE;QAAE,GAAG,CAAC,EAAE,MAAM,CAAA;KAAE;CAmCxC"}
@@ -0,0 +1,52 @@
1
+ /**
2
+ * A Web Streams API `TransformStream` that splits an incoming text stream on
3
+ * the `$$$$` SDF record delimiter and emits individual molfile strings.
4
+ *
5
+ * Entries shorter than 40 characters are discarded.
6
+ * @example
7
+ * ```ts
8
+ * const stream = readStream.pipeThrough(new MolfileStream());
9
+ * for await (const molfile of stream) {
10
+ * console.log(molfile);
11
+ * }
12
+ * ```
13
+ */
14
+ export class MolfileStream extends TransformStream {
15
+ #buffer = [];
16
+ // eslint-disable-next-line @typescript-eslint/no-unused-vars
17
+ constructor(_options) {
18
+ super({
19
+ transform: (chunk, controller) => {
20
+ this.#buffer.push(chunk);
21
+ const combined = this.#buffer.join('');
22
+ this.#buffer.length = 0;
23
+ let begin = 0;
24
+ let index = 0;
25
+ while ((index = combined.indexOf('$$$$', index)) !== -1) {
26
+ const endOfDelimiter = combined.indexOf('\n', index);
27
+ if (endOfDelimiter === -1) {
28
+ index = begin;
29
+ break;
30
+ }
31
+ const eolLength = combined[endOfDelimiter - 1] === '\r' ? 2 : 1;
32
+ // Remove the last eol before enqueuing
33
+ if (index - eolLength - begin > 40) {
34
+ controller.enqueue(combined.slice(begin, index - eolLength));
35
+ }
36
+ index = endOfDelimiter + eolLength;
37
+ begin = index;
38
+ }
39
+ if (begin < combined.length) {
40
+ this.#buffer.push(combined.slice(begin));
41
+ }
42
+ },
43
+ flush: (controller) => {
44
+ const remaining = this.#buffer.join('');
45
+ if (remaining && remaining.length > 40) {
46
+ controller.enqueue(remaining);
47
+ }
48
+ },
49
+ });
50
+ }
51
+ }
52
+ //# sourceMappingURL=MolfileStream.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"MolfileStream.js","sourceRoot":"","sources":["../src/MolfileStream.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;GAYG;AACH,MAAM,OAAO,aAAc,SAAQ,eAA+B;IACvD,OAAO,GAAa,EAAE,CAAC;IAEhC,6DAA6D;IAC7D,YAAY,QAA2B;QACrC,KAAK,CAAC;YACJ,SAAS,EAAE,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE;gBAC/B,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;gBACzB,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACvC,IAAI,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC;gBAExB,IAAI,KAAK,GAAG,CAAC,CAAC;gBACd,IAAI,KAAK,GAAG,CAAC,CAAC;gBACd,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,OAAO,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC;oBACxD,MAAM,cAAc,GAAG,QAAQ,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;oBACrD,IAAI,cAAc,KAAK,CAAC,CAAC,EAAE,CAAC;wBAC1B,KAAK,GAAG,KAAK,CAAC;wBACd,MAAM;oBACR,CAAC;oBACD,MAAM,SAAS,GAAG,QAAQ,CAAC,cAAc,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;oBAChE,uCAAuC;oBACvC,IAAI,KAAK,GAAG,SAAS,GAAG,KAAK,GAAG,EAAE,EAAE,CAAC;wBACnC,UAAU,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC;oBAC/D,CAAC;oBACD,KAAK,GAAG,cAAc,GAAG,SAAS,CAAC;oBACnC,KAAK,GAAG,KAAK,CAAC;gBAChB,CAAC;gBACD,IAAI,KAAK,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC;oBAC5B,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC;gBAC3C,CAAC;YACH,CAAC;YACD,KAAK,EAAE,CAAC,UAAU,EAAE,EAAE;gBACpB,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACxC,IAAI,SAAS,IAAI,SAAS,CAAC,MAAM,GAAG,EAAE,EAAE,CAAC;oBACvC,UAAU,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;gBAChC,CAAC;YACH,CAAC;SACF,CAAC,CAAC;IACL,CAAC;CACF"}
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Get the [start, end] boundaries of each SDF entry in a string.
3
+ *
4
+ * Uses `indexOf` for fast splitting without regex overhead.
5
+ * @param string - The full SDF string.
6
+ * @param substring - The delimiter to search for (e.g. `'\n$$$$'`).
7
+ * @param eol - The end-of-line character used to skip past the delimiter line.
8
+ * @returns An array of `[start, end]` index pairs, one per SDF entry.
9
+ */
10
+ export declare function getEntriesBoundaries(string: string, substring: string, eol: string): Array<[number, number]>;
11
+ //# sourceMappingURL=getEntriesBoundaries.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"getEntriesBoundaries.d.ts","sourceRoot":"","sources":["../src/getEntriesBoundaries.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,wBAAgB,oBAAoB,CAClC,MAAM,EAAE,MAAM,EACd,SAAS,EAAE,MAAM,EACjB,GAAG,EAAE,MAAM,GACV,KAAK,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAoBzB"}
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Get the [start, end] boundaries of each SDF entry in a string.
3
+ *
4
+ * Uses `indexOf` for fast splitting without regex overhead.
5
+ * @param string - The full SDF string.
6
+ * @param substring - The delimiter to search for (e.g. `'\n$$$$'`).
7
+ * @param eol - The end-of-line character used to skip past the delimiter line.
8
+ * @returns An array of `[start, end]` index pairs, one per SDF entry.
9
+ */
10
+ export function getEntriesBoundaries(string, substring, eol) {
11
+ const res = [];
12
+ let previous = 0;
13
+ let next = 0;
14
+ while (next !== -1) {
15
+ next = string.indexOf(substring, previous);
16
+ if (next !== -1) {
17
+ res.push([previous, next]);
18
+ const nextMatch = string.indexOf(eol, next + substring.length);
19
+ if (nextMatch === -1) {
20
+ next = -1;
21
+ }
22
+ else {
23
+ previous = nextMatch + eol.length;
24
+ next = previous;
25
+ }
26
+ }
27
+ else {
28
+ res.push([previous, string.length]);
29
+ }
30
+ }
31
+ return res;
32
+ }
33
+ //# sourceMappingURL=getEntriesBoundaries.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"getEntriesBoundaries.js","sourceRoot":"","sources":["../src/getEntriesBoundaries.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,MAAM,UAAU,oBAAoB,CAClC,MAAc,EACd,SAAiB,EACjB,GAAW;IAEX,MAAM,GAAG,GAA4B,EAAE,CAAC;IACxC,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,IAAI,GAAG,CAAC,CAAC;IACb,OAAO,IAAI,KAAK,CAAC,CAAC,EAAE,CAAC;QACnB,IAAI,GAAG,MAAM,CAAC,OAAO,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;QAC3C,IAAI,IAAI,KAAK,CAAC,CAAC,EAAE,CAAC;YAChB,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC;YAC3B,MAAM,SAAS,GAAG,MAAM,CAAC,OAAO,CAAC,GAAG,EAAE,IAAI,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;YAC/D,IAAI,SAAS,KAAK,CAAC,CAAC,EAAE,CAAC;gBACrB,IAAI,GAAG,CAAC,CAAC,CAAC;YACZ,CAAC;iBAAM,CAAC;gBACN,QAAQ,GAAG,SAAS,GAAG,GAAG,CAAC,MAAM,CAAC;gBAClC,IAAI,GAAG,QAAQ,CAAC;YAClB,CAAC;QACH,CAAC;aAAM,CAAC;YACN,GAAG,CAAC,IAAI,CAAC,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC"}
package/lib/index.d.ts ADDED
@@ -0,0 +1,4 @@
1
+ export * from './parse.ts';
2
+ export * from './iterator.ts';
3
+ export * from './MolfileStream.ts';
4
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,oBAAoB,CAAC"}
package/lib/index.js CHANGED
@@ -1,304 +1,4 @@
1
- 'use strict';
2
-
3
- var ensureString = require('ensure-string');
4
- var dynamicTyping = require('dynamic-typing');
5
-
6
- /**
7
- *
8
- * @param {*} string
9
- * @param {*} substring
10
- * @param {*} eol
11
- * @returns
12
- */
13
- function getEntriesBoundaries(string, substring, eol) {
14
- const res = [];
15
- let previous = 0;
16
- let next = 0;
17
- while (next !== -1) {
18
- next = string.indexOf(substring, previous);
19
- if (next !== -1) {
20
- res.push([previous, next]);
21
- const nextMatch = string.indexOf(eol, next + substring.length);
22
- if (nextMatch === -1) {
23
- next = -1;
24
- } else {
25
- previous = nextMatch + eol.length;
26
- next = previous;
27
- }
28
- } else {
29
- res.push([previous, string.length]);
30
- }
31
- }
32
- return res;
33
- }
34
-
35
- /**
36
- * Parse the molfile and the properties with > < labels >
37
- * @param {string} sdfPart
38
- * @param {*} labels
39
- * @param {*} currentLabels
40
- * @param {object} options
41
- * @returns
42
- */
43
- function getMolecule$1(sdfPart, labels, currentLabels, options) {
44
- let parts = sdfPart.split(`${options.eol}>`);
45
- if (parts.length === 0 || parts[0].length <= 5) return;
46
- let molecule = {};
47
- molecule.molfile = parts[0] + options.eol;
48
- for (let j = 1; j < parts.length; j++) {
49
- let lines = parts[j].split(options.eol);
50
- let from = lines[0].indexOf('<');
51
- let to = lines[0].indexOf('>');
52
- let label = lines[0].slice(from + 1, to);
53
- currentLabels.push(label);
54
- if (!labels[label]) {
55
- labels[label] = {
56
- counter: 0,
57
- isNumeric: options.dynamicTyping,
58
- keep: false,
59
- };
60
- if (
61
- (!options.exclude || !options.exclude.includes(label)) &&
62
- (!options.include || options.include.includes(label))
63
- ) {
64
- labels[label].keep = true;
65
- if (options.modifiers[label]) {
66
- labels[label].modifier = options.modifiers[label];
67
- }
68
- if (options.forEach[label]) {
69
- labels[label].forEach = options.forEach[label];
70
- }
71
- }
72
- }
73
- if (labels[label].keep) {
74
- for (let k = 1; k < lines.length - 1; k++) {
75
- if (molecule[label]) {
76
- molecule[label] += options.eol + lines[k];
77
- } else {
78
- molecule[label] = lines[k];
79
- }
80
- }
81
- if (labels[label].modifier) {
82
- let modifiedValue = labels[label].modifier(molecule[label]);
83
- if (modifiedValue === undefined || modifiedValue === null) {
84
- delete molecule[label];
85
- } else {
86
- molecule[label] = modifiedValue;
87
- }
88
- }
89
- if (
90
- labels[label].isNumeric &&
91
- (!Number.isFinite(+molecule[label]) || molecule[label].match(/^0[0-9]/))
92
- ) {
93
- labels[label].isNumeric = false;
94
- }
95
- }
96
- }
97
- return molecule;
98
- }
99
-
100
- /**
101
- * Parse a SDF file
102
- * @param {string|ArrayBuffer|Uint8Array} sdf - SDF file to parse
103
- * @param {object} [options={}]
104
- * @param {string[]} [options.include] - List of fields to include
105
- * @param {string[]} [options.exclude] - List of fields to exclude
106
- * @param {Function} [options.filter] - Callback allowing to filter the molecules
107
- * @param {boolean} [options.dynamicTyping] - Dynamically type the data
108
- * @param {object} [options.modifiers] - Object containing callbacks to apply on some specific fields
109
- * @param {boolean} [options.mixedEOL=false] - Set to true if you know there is a mixture between \r\n and \n
110
- * @param {string} [options.eol] - Specify the end of line character. Default will be the one found in the file
111
- * @returns {object} - Object containing the molecules, the labels and the statistics
112
- */
113
- function parse(sdf, options = {}) {
114
- options = { ...options };
115
- if (options.modifiers === undefined) options.modifiers = {};
116
- if (options.forEach === undefined) options.forEach = {};
117
- if (options.dynamicTyping === undefined) options.dynamicTyping = true;
118
-
119
- sdf = ensureString.ensureString(sdf);
120
- if (typeof sdf !== 'string') {
121
- throw new TypeError('Parameter "sdf" must be a string');
122
- }
123
-
124
- if (options.eol === undefined) {
125
- options.eol = '\n';
126
- if (options.mixedEOL) {
127
- sdf = sdf.replaceAll('\r\n', '\n');
128
- sdf = sdf.replaceAll('\r', '\n');
129
- } else {
130
- // we will find the delimiter in order to be much faster and not use regular expression
131
- let header = new Set(sdf.slice(0, 1000));
132
- if (header.has('\r\n')) {
133
- options.eol = '\r\n';
134
- } else if (header.has('\r')) {
135
- options.eol = '\r';
136
- }
137
- }
138
- }
139
-
140
- let entriesBoundaries = getEntriesBoundaries(
141
- sdf,
142
- `${options.eol}$$$$`,
143
- options.eol,
144
- );
145
- let molecules = [];
146
- let labels = {};
147
-
148
- let start = Date.now();
149
-
150
- for (let i = 0; i < entriesBoundaries.length; i++) {
151
- let sdfPart = sdf.slice(...entriesBoundaries[i]);
152
- if (sdfPart.length < 40) continue;
153
- let currentLabels = [];
154
- const molecule = getMolecule$1(sdfPart, labels, currentLabels, options);
155
- if (!molecule) continue;
156
- if (!options.filter || options.filter(molecule)) {
157
- molecules.push(molecule);
158
- // only now we can increase the counter
159
- for (let j = 0; j < currentLabels.length; j++) {
160
- labels[currentLabels[j]].counter++;
161
- }
162
- }
163
- }
164
- // all numeric fields should be converted to numbers
165
- for (let label in labels) {
166
- let currentLabel = labels[label];
167
- if (currentLabel.isNumeric) {
168
- currentLabel.minValue = Infinity;
169
- currentLabel.maxValue = -Infinity;
170
- for (let j = 0; j < molecules.length; j++) {
171
- if (molecules[j][label]) {
172
- let value = Number.parseFloat(molecules[j][label]);
173
- molecules[j][label] = value;
174
- if (value > currentLabel.maxValue) {
175
- currentLabel.maxValue = value;
176
- }
177
- if (value < currentLabel.minValue) {
178
- currentLabel.minValue = value;
179
- }
180
- }
181
- }
182
- }
183
- }
184
-
185
- // we check that a label is in all the records
186
- for (let key in labels) {
187
- if (labels[key].counter === molecules.length) {
188
- labels[key].always = true;
189
- } else {
190
- labels[key].always = false;
191
- }
192
- }
193
-
194
- let statistics = [];
195
- for (let key in labels) {
196
- let statistic = labels[key];
197
- statistic.label = key;
198
- statistics.push(statistic);
199
- }
200
-
201
- return {
202
- time: Date.now() - start,
203
- molecules,
204
- labels: Object.keys(labels),
205
- statistics,
206
- };
207
- }
208
-
209
- class MolfileStream extends TransformStream {
210
- #buffer = '';
211
-
212
- constructor() {
213
- super({
214
- transform: (chunk, controller) => {
215
- this.#buffer += chunk;
216
- let begin = 0;
217
- let index = 0;
218
- while ((index = this.#buffer.indexOf('$$$$', index)) !== -1) {
219
- // we need to check if the delimiter '\n' is in the current buffer
220
- // if it is not we need to wait for the next chunk
221
- const endOfDelimiter = this.#buffer.indexOf('\n', index);
222
- if (endOfDelimiter === -1) {
223
- index = begin;
224
- break;
225
- }
226
- const eolLength = this.#buffer[endOfDelimiter - 1] === '\r' ? 2 : 1;
227
- // need to remove the last eol because we will split on eol+'>' in getMolecule
228
- if (index - eolLength - begin > 40) {
229
- controller.enqueue(this.#buffer.slice(begin, index - eolLength));
230
- }
231
- index = endOfDelimiter + eolLength;
232
- begin = index;
233
- }
234
- this.#buffer = this.#buffer.slice(begin);
235
- },
236
- flush: (controller) => {
237
- if (this.#buffer && this.#buffer.length > 40) {
238
- controller.enqueue(this.#buffer);
239
- }
240
- },
241
- });
242
- }
243
- }
244
-
245
- /**
246
- * Parse a SDF file as an iterator
247
- * @param {ReadableStream} readStream - SDF file to parse
248
- * @param {object} [options={}] - iterator options
249
- * @param {Function} [options.filter] - Callback allowing to filter the molecules
250
- * @param {string} [options.eol='\n'] - End of line character
251
- * @param {boolean} [options.dynamicTyping] - Dynamically type the data
252
- * @yields {object} - Molecule object
253
- */
254
- async function* iterator(readStream, options = {}) {
255
- const { eol = '\n', dynamicTyping = true } = options;
256
-
257
- const moleculeStream = readStream.pipeThrough(new MolfileStream({ eol }));
258
- for await (const entry of moleculeStream) {
259
- const molecule = getMolecule(entry, {
260
- eol,
261
- dynamicTyping,
262
- });
263
- if (!options.filter || options.filter(molecule)) {
264
- yield molecule;
265
- }
266
- }
267
- }
268
-
269
- /**
270
- * Convert a SDF part to an object
271
- * @param {string} sdfPart - text containing the molfile
272
- * @param {object} options - options
273
- * @param {string} options.eol - end of line character
274
- * @param {boolean} options.dynamicTyping - Dynamically type the data (create numbers and booleans)
275
- * @returns
276
- */
277
- function getMolecule(sdfPart, options) {
278
- const { eol, dynamicTyping: dynamicTyping$1 } = options;
279
- let parts = sdfPart.split(`${eol}>`);
280
- if (parts.length === 0 || parts[0].length <= 5) return;
281
- let molecule = {};
282
- molecule.molfile = parts[0] + eol;
283
- for (let j = 1; j < parts.length; j++) {
284
- let lines = parts[j].split(eol);
285
- let from = lines[0].indexOf('<');
286
- let to = lines[0].indexOf('>');
287
- let label = lines[0].slice(from + 1, to);
288
- for (let k = 1; k < lines.length - 1; k++) {
289
- if (molecule[label]) {
290
- molecule[label] += eol + lines[k];
291
- } else {
292
- molecule[label] = lines[k];
293
- }
294
- }
295
- if (dynamicTyping$1) {
296
- molecule[label] = dynamicTyping.parseString(molecule[label]);
297
- }
298
- }
299
- return molecule;
300
- }
301
-
302
- exports.MolfileStream = MolfileStream;
303
- exports.iterator = iterator;
304
- exports.parse = parse;
1
+ export * from "./parse.js";
2
+ export * from "./iterator.js";
3
+ export * from "./MolfileStream.js";
4
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,oBAAoB,CAAC"}
@@ -0,0 +1,49 @@
1
+ /**
2
+ * A molecule entry returned by the {@link iterator} async generator.
3
+ * The `molfile` field contains the raw V2000/V3000 molfile block.
4
+ * Additional fields are populated from the SDF `> <field>` sections.
5
+ */
6
+ export interface IteratorMolecule {
7
+ /** The raw V2000/V3000 molfile block. */
8
+ molfile: string;
9
+ [label: string]: any;
10
+ }
11
+ /**
12
+ * Options for the {@link iterator} async generator.
13
+ */
14
+ export interface IteratorOptions {
15
+ /**
16
+ * End-of-line character used to split field entries.
17
+ * @default '\n'
18
+ */
19
+ eol?: string;
20
+ /**
21
+ * When `true`, numeric string values are automatically converted to numbers.
22
+ * @default true
23
+ */
24
+ dynamicTyping?: boolean;
25
+ /**
26
+ * A predicate function to filter molecules. Only molecules for which this
27
+ * function returns `true` are yielded.
28
+ */
29
+ filter?: (molecule: IteratorMolecule) => boolean;
30
+ }
31
+ /**
32
+ * Asynchronously iterate over molecules from a text-decoded SDF stream.
33
+ * @param readStream - A `ReadableStream<string>` supplying SDF text content.
34
+ * @param options - Iterator options.
35
+ * @yields {IteratorMolecule} Individual molecule objects.
36
+ * @example
37
+ * ```ts
38
+ * import { openAsBlob } from 'node:fs';
39
+ * import { iterator } from 'sdf-parser';
40
+ *
41
+ * const blob = await openAsBlob('compounds.sdf');
42
+ * const textDecoder = new TextDecoderStream();
43
+ * for await (const molecule of iterator(blob.stream().pipeThrough(textDecoder))) {
44
+ * console.log(molecule.molfile);
45
+ * }
46
+ * ```
47
+ */
48
+ export declare function iterator(readStream: ReadableStream<string>, options?: IteratorOptions): AsyncGenerator<IteratorMolecule>;
49
+ //# sourceMappingURL=iterator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"iterator.d.ts","sourceRoot":"","sources":["../src/iterator.ts"],"names":[],"mappings":"AAIA;;;;GAIG;AACH,MAAM,WAAW,gBAAgB;IAC/B,yCAAyC;IACzC,OAAO,EAAE,MAAM,CAAC;IAEhB,CAAC,KAAK,EAAE,MAAM,GAAG,GAAG,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;IACb;;;OAGG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB;;;OAGG;IACH,MAAM,CAAC,EAAE,CAAC,QAAQ,EAAE,gBAAgB,KAAK,OAAO,CAAC;CAClD;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAuB,QAAQ,CAC7B,UAAU,EAAE,cAAc,CAAC,MAAM,CAAC,EAClC,OAAO,GAAE,eAAoB,GAC5B,cAAc,CAAC,gBAAgB,CAAC,CASlC"}
@@ -0,0 +1,55 @@
1
+ import { parseString } from 'dynamic-typing';
2
+ import { MolfileStream } from "./MolfileStream.js";
3
+ /**
4
+ * Asynchronously iterate over molecules from a text-decoded SDF stream.
5
+ * @param readStream - A `ReadableStream<string>` supplying SDF text content.
6
+ * @param options - Iterator options.
7
+ * @yields {IteratorMolecule} Individual molecule objects.
8
+ * @example
9
+ * ```ts
10
+ * import { openAsBlob } from 'node:fs';
11
+ * import { iterator } from 'sdf-parser';
12
+ *
13
+ * const blob = await openAsBlob('compounds.sdf');
14
+ * const textDecoder = new TextDecoderStream();
15
+ * for await (const molecule of iterator(blob.stream().pipeThrough(textDecoder))) {
16
+ * console.log(molecule.molfile);
17
+ * }
18
+ * ```
19
+ */
20
+ export async function* iterator(readStream, options = {}) {
21
+ const { eol = '\n', dynamicTyping = true } = options;
22
+ const moleculeStream = readStream.pipeThrough(new MolfileStream({ eol }));
23
+ for await (const entry of moleculeStream) {
24
+ const molecule = parseMolecule(entry, { eol, dynamicTyping });
25
+ if (!options.filter || options.filter(molecule)) {
26
+ yield molecule;
27
+ }
28
+ }
29
+ }
30
+ function parseMolecule(sdfPart, options) {
31
+ const { eol, dynamicTyping } = options;
32
+ const parts = sdfPart.split(`${eol}>`);
33
+ const molecule = {
34
+ molfile: parts.length > 0 && parts[0].length > 5 ? parts[0] + eol : '',
35
+ };
36
+ for (let j = 1; j < parts.length; j++) {
37
+ const lines = parts[j].split(eol);
38
+ const from = lines[0].indexOf('<');
39
+ const to = lines[0].indexOf('>');
40
+ const label = lines[0].slice(from + 1, to);
41
+ for (let k = 1; k < lines.length - 1; k++) {
42
+ if (molecule[label]) {
43
+ molecule[label] = `${molecule[label]}${eol}${lines[k]}`;
44
+ }
45
+ else {
46
+ molecule[label] = lines[k];
47
+ }
48
+ }
49
+ if (dynamicTyping) {
50
+ molecule[label] = parseString(molecule[label]);
51
+ }
52
+ }
53
+ return molecule;
54
+ }
55
+ //# sourceMappingURL=iterator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"iterator.js","sourceRoot":"","sources":["../src/iterator.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAE7C,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAmCnD;;;;;;;;;;;;;;;;GAgBG;AACH,MAAM,CAAC,KAAK,SAAS,CAAC,CAAC,QAAQ,CAC7B,UAAkC,EAClC,UAA2B,EAAE;IAE7B,MAAM,EAAE,GAAG,GAAG,IAAI,EAAE,aAAa,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC;IACrD,MAAM,cAAc,GAAG,UAAU,CAAC,WAAW,CAAC,IAAI,aAAa,CAAC,EAAE,GAAG,EAAE,CAAC,CAAC,CAAC;IAC1E,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,cAAc,EAAE,CAAC;QACzC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;QAC9D,IAAI,CAAC,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;YAChD,MAAM,QAAQ,CAAC;QACjB,CAAC;IACH,CAAC;AACH,CAAC;AAOD,SAAS,aAAa,CACpB,OAAe,EACf,OAA6B;IAE7B,MAAM,EAAE,GAAG,EAAE,aAAa,EAAE,GAAG,OAAO,CAAC;IACvC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC;IACvC,MAAM,QAAQ,GAAqB;QACjC,OAAO,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,EAAE;KACvE,CAAC;IAEF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAClC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACnC,MAAM,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;QACjC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,EAAE,EAAE,CAAC,CAAC;QAE3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,IAAI,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;gBACpB,QAAQ,CAAC,KAAK,CAAC,GAAG,GAAG,QAAQ,CAAC,KAAK,CAAW,GAAG,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC;YACpE,CAAC;iBAAM,CAAC;gBACN,QAAQ,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YAC7B,CAAC;QACH,CAAC;QAED,IAAI,aAAa,EAAE,CAAC;YAClB,QAAQ,CAAC,KAAK,CAAC,GAAG,WAAW,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;QACjD,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
package/lib/parse.d.ts ADDED
@@ -0,0 +1,105 @@
1
+ /**
2
+ * A parsed SDF molecule entry. The `molfile` field contains the raw molfile
3
+ * string. Additional fields are populated from the SDF `> <field>` sections.
4
+ */
5
+ export interface Molecule {
6
+ /** The raw V2000/V3000 molfile block. */
7
+ molfile: string;
8
+ [label: string]: any;
9
+ }
10
+ /**
11
+ * Options for the {@link parse} function.
12
+ */
13
+ export interface ParseOptions {
14
+ /**
15
+ * Modifier functions applied to field values after parsing. The function
16
+ * receives the raw string value and may return a transformed value. Returning
17
+ * `undefined` or `null` removes the field from the molecule.
18
+ */
19
+ modifiers?: Record<string, (value: string) => unknown>;
20
+ /**
21
+ * Callback functions called for each field value. The callbacks are stored
22
+ * on the label info and available in statistics.
23
+ */
24
+ forEach?: Record<string, (value: unknown) => void>;
25
+ /**
26
+ * When `true`, numeric string values are automatically converted to numbers.
27
+ * @default true
28
+ */
29
+ dynamicTyping?: boolean;
30
+ /**
31
+ * End-of-line character. Auto-detected from the file content when not set.
32
+ * @default '\n'
33
+ */
34
+ eol?: string;
35
+ /**
36
+ * When `true`, normalises all `\r\n` and `\r` sequences to `\n` before
37
+ * parsing. Useful for SDF files with mixed or Windows-style line endings.
38
+ * @default false
39
+ */
40
+ mixedEOL?: boolean;
41
+ /**
42
+ * Only include fields whose names appear in this list.
43
+ * When combined with `exclude`, the field must satisfy both constraints.
44
+ */
45
+ include?: string[];
46
+ /**
47
+ * Exclude fields whose names appear in this list.
48
+ * When combined with `include`, the field must satisfy both constraints.
49
+ */
50
+ exclude?: string[];
51
+ /**
52
+ * A predicate function to filter molecules. Only molecules for which this
53
+ * function returns `true` are included in the result.
54
+ */
55
+ filter?: (molecule: Molecule) => boolean;
56
+ }
57
+ /**
58
+ * Statistics for a single SDF field label, as returned in
59
+ * {@link ParseResult.statistics}.
60
+ */
61
+ export interface LabelStatistic {
62
+ /** Field label name. */
63
+ label: string;
64
+ /** Number of molecules that contain this field. */
65
+ counter: number;
66
+ /** Whether all parsed values are numeric. */
67
+ isNumeric: boolean;
68
+ /** Whether this field is included in the output (not excluded). */
69
+ keep: boolean;
70
+ /** Minimum numeric value, only set when `isNumeric` is `true`. */
71
+ minValue?: number;
72
+ /** Maximum numeric value, only set when `isNumeric` is `true`. */
73
+ maxValue?: number;
74
+ /** Whether every molecule in the result contains this field. */
75
+ always: boolean;
76
+ }
77
+ /**
78
+ * Return value of the {@link parse} function.
79
+ */
80
+ export interface ParseResult {
81
+ /** Wall-clock time taken to parse, in milliseconds. */
82
+ time: number;
83
+ /** Parsed molecule entries. */
84
+ molecules: Molecule[];
85
+ /** Sorted list of all field label names found in the file. */
86
+ labels: string[];
87
+ /** Per-label statistics. */
88
+ statistics: LabelStatistic[];
89
+ }
90
+ /**
91
+ * Synchronously parse an SDF file into an array of molecule objects.
92
+ * @param sdf - The SDF content as a string, `ArrayBuffer`, or `ArrayBufferView`.
93
+ * @param options - Parsing options.
94
+ * @returns A {@link ParseResult} containing molecules and statistics.
95
+ * @example
96
+ * ```ts
97
+ * import { readFileSync } from 'node:fs';
98
+ * import { parse } from 'sdf-parser';
99
+ *
100
+ * const sdf = readFileSync('compounds.sdf', 'utf8');
101
+ * const { molecules, statistics } = parse(sdf);
102
+ * ```
103
+ */
104
+ export declare function parse(sdf: unknown, options?: ParseOptions): ParseResult;
105
+ //# sourceMappingURL=parse.d.ts.map