sdf-parser 7.0.2 → 7.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/index.js +20 -12
- package/package.json +1 -1
- package/src/MolfileStream.js +14 -6
- package/src/iterator.js +4 -5
- package/src/parse.js +2 -1
package/lib/index.js
CHANGED
|
@@ -108,6 +108,7 @@ function getMolecule$1(sdfPart, labels, currentLabels, options) {
|
|
|
108
108
|
* @param {object} [options.modifiers] - Object containing callbacks to apply on some specific fields
|
|
109
109
|
* @param {boolean} [options.mixedEOL=false] - Set to true if you know there is a mixture between \r\n and \n
|
|
110
110
|
* @param {string} [options.eol] - Specify the end of line character. Default will be the one found in the file
|
|
111
|
+
* @returns {object} - Object containing the molecules, the labels and the statistics
|
|
111
112
|
*/
|
|
112
113
|
function parse(sdf, options = {}) {
|
|
113
114
|
options = { ...options };
|
|
@@ -148,7 +149,7 @@ function parse(sdf, options = {}) {
|
|
|
148
149
|
|
|
149
150
|
for (let i = 0; i < entriesBoundaries.length; i++) {
|
|
150
151
|
let sdfPart = sdf.slice(...entriesBoundaries[i]);
|
|
151
|
-
|
|
152
|
+
if (sdfPart.length < 40) continue;
|
|
152
153
|
let currentLabels = [];
|
|
153
154
|
const molecule = getMolecule$1(sdfPart, labels, currentLabels, options);
|
|
154
155
|
if (!molecule) continue;
|
|
@@ -214,18 +215,26 @@ class MolfileStream extends TransformStream {
|
|
|
214
215
|
this.#buffer += chunk;
|
|
215
216
|
let begin = 0;
|
|
216
217
|
let index = 0;
|
|
217
|
-
while ((index = this.#buffer.indexOf('
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
218
|
+
while ((index = this.#buffer.indexOf('$$$$', index)) !== -1) {
|
|
219
|
+
// we need to check if the delimiter '\n' is in the current buffer
|
|
220
|
+
// if it is not we need to wait for the next chunk
|
|
221
|
+
const endOfDelimiter = this.#buffer.indexOf('\n', index);
|
|
222
|
+
if (endOfDelimiter === -1) {
|
|
223
|
+
index = begin;
|
|
224
|
+
break;
|
|
225
|
+
}
|
|
226
|
+
const eolLength = this.#buffer[endOfDelimiter - 1] === '\r' ? 2 : 1;
|
|
227
|
+
// need to remove the last eol because we will split on eol+'>' in getMolecule
|
|
228
|
+
if (index - eolLength - begin > 40) {
|
|
229
|
+
controller.enqueue(this.#buffer.slice(begin, index - eolLength));
|
|
222
230
|
}
|
|
231
|
+
index = endOfDelimiter + eolLength;
|
|
223
232
|
begin = index;
|
|
224
233
|
}
|
|
225
234
|
this.#buffer = this.#buffer.slice(begin);
|
|
226
235
|
},
|
|
227
236
|
flush: (controller) => {
|
|
228
|
-
if (this.#buffer) {
|
|
237
|
+
if (this.#buffer && this.#buffer.length > 40) {
|
|
229
238
|
controller.enqueue(this.#buffer);
|
|
230
239
|
}
|
|
231
240
|
},
|
|
@@ -234,7 +243,7 @@ class MolfileStream extends TransformStream {
|
|
|
234
243
|
}
|
|
235
244
|
|
|
236
245
|
/**
|
|
237
|
-
* Parse a SDF file
|
|
246
|
+
* Parse a SDF file as an iterator
|
|
238
247
|
* @param {ReadableStream} readStream - SDF file to parse
|
|
239
248
|
* @param {object} [options={}] - iterator options
|
|
240
249
|
* @param {Function} [options.filter] - Callback allowing to filter the molecules
|
|
@@ -245,10 +254,9 @@ class MolfileStream extends TransformStream {
|
|
|
245
254
|
async function* iterator(readStream, options = {}) {
|
|
246
255
|
const { eol = '\n', dynamicTyping = true } = options;
|
|
247
256
|
|
|
248
|
-
const moleculeStream = readStream.pipeThrough(new MolfileStream());
|
|
249
|
-
for await (const
|
|
250
|
-
|
|
251
|
-
const molecule = getMolecule(molfile, {
|
|
257
|
+
const moleculeStream = readStream.pipeThrough(new MolfileStream({ eol }));
|
|
258
|
+
for await (const entry of moleculeStream) {
|
|
259
|
+
const molecule = getMolecule(entry, {
|
|
252
260
|
eol,
|
|
253
261
|
dynamicTyping,
|
|
254
262
|
});
|
package/package.json
CHANGED
package/src/MolfileStream.js
CHANGED
|
@@ -7,18 +7,26 @@ export class MolfileStream extends TransformStream {
|
|
|
7
7
|
this.#buffer += chunk;
|
|
8
8
|
let begin = 0;
|
|
9
9
|
let index = 0;
|
|
10
|
-
while ((index = this.#buffer.indexOf('
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
10
|
+
while ((index = this.#buffer.indexOf('$$$$', index)) !== -1) {
|
|
11
|
+
// we need to check if the delimiter '\n' is in the current buffer
|
|
12
|
+
// if it is not we need to wait for the next chunk
|
|
13
|
+
const endOfDelimiter = this.#buffer.indexOf('\n', index);
|
|
14
|
+
if (endOfDelimiter === -1) {
|
|
15
|
+
index = begin;
|
|
16
|
+
break;
|
|
15
17
|
}
|
|
18
|
+
const eolLength = this.#buffer[endOfDelimiter - 1] === '\r' ? 2 : 1;
|
|
19
|
+
// need to remove the last eol because we will split on eol+'>' in getMolecule
|
|
20
|
+
if (index - eolLength - begin > 40) {
|
|
21
|
+
controller.enqueue(this.#buffer.slice(begin, index - eolLength));
|
|
22
|
+
}
|
|
23
|
+
index = endOfDelimiter + eolLength;
|
|
16
24
|
begin = index;
|
|
17
25
|
}
|
|
18
26
|
this.#buffer = this.#buffer.slice(begin);
|
|
19
27
|
},
|
|
20
28
|
flush: (controller) => {
|
|
21
|
-
if (this.#buffer) {
|
|
29
|
+
if (this.#buffer && this.#buffer.length > 40) {
|
|
22
30
|
controller.enqueue(this.#buffer);
|
|
23
31
|
}
|
|
24
32
|
},
|
package/src/iterator.js
CHANGED
|
@@ -3,7 +3,7 @@ import { parseString } from 'dynamic-typing';
|
|
|
3
3
|
import { MolfileStream } from './MolfileStream.js';
|
|
4
4
|
|
|
5
5
|
/**
|
|
6
|
-
* Parse a SDF file
|
|
6
|
+
* Parse a SDF file as an iterator
|
|
7
7
|
* @param {ReadableStream} readStream - SDF file to parse
|
|
8
8
|
* @param {object} [options={}] - iterator options
|
|
9
9
|
* @param {Function} [options.filter] - Callback allowing to filter the molecules
|
|
@@ -14,10 +14,9 @@ import { MolfileStream } from './MolfileStream.js';
|
|
|
14
14
|
export async function* iterator(readStream, options = {}) {
|
|
15
15
|
const { eol = '\n', dynamicTyping = true } = options;
|
|
16
16
|
|
|
17
|
-
const moleculeStream = readStream.pipeThrough(new MolfileStream());
|
|
18
|
-
for await (const
|
|
19
|
-
|
|
20
|
-
const molecule = getMolecule(molfile, {
|
|
17
|
+
const moleculeStream = readStream.pipeThrough(new MolfileStream({ eol }));
|
|
18
|
+
for await (const entry of moleculeStream) {
|
|
19
|
+
const molecule = getMolecule(entry, {
|
|
21
20
|
eol,
|
|
22
21
|
dynamicTyping,
|
|
23
22
|
});
|
package/src/parse.js
CHANGED
|
@@ -13,6 +13,7 @@ import { getMolecule } from './util/getMolecule';
|
|
|
13
13
|
* @param {object} [options.modifiers] - Object containing callbacks to apply on some specific fields
|
|
14
14
|
* @param {boolean} [options.mixedEOL=false] - Set to true if you know there is a mixture between \r\n and \n
|
|
15
15
|
* @param {string} [options.eol] - Specify the end of line character. Default will be the one found in the file
|
|
16
|
+
* @returns {object} - Object containing the molecules, the labels and the statistics
|
|
16
17
|
*/
|
|
17
18
|
export function parse(sdf, options = {}) {
|
|
18
19
|
options = { ...options };
|
|
@@ -53,7 +54,7 @@ export function parse(sdf, options = {}) {
|
|
|
53
54
|
|
|
54
55
|
for (let i = 0; i < entriesBoundaries.length; i++) {
|
|
55
56
|
let sdfPart = sdf.slice(...entriesBoundaries[i]);
|
|
56
|
-
|
|
57
|
+
if (sdfPart.length < 40) continue;
|
|
57
58
|
let currentLabels = [];
|
|
58
59
|
const molecule = getMolecule(sdfPart, labels, currentLabels, options);
|
|
59
60
|
if (!molecule) continue;
|