xml-twig 1.3.9 → 1.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +1 -1
- package/samples/memory-test.js +45 -41
- package/samples/dom-parsers.js +0 -108
package/README.md
CHANGED
|
@@ -9,7 +9,7 @@ When you need to read a XML file, then you have two principles:
|
|
|
9
9
|
|
|
10
10
|
* The **Document Object Model (DOM)** style. These parser read the entire XML document into memory. Usually they provide easy methods to navigate in the document tree or make modifications.
|
|
11
11
|
|
|
12
|
-
DOM parsers are perfect for rather small files, for example configuration files or (X-)HTML pages. However, for bigger XML files you may run into memory limits. When you parse a XML-File as DOM, then the footprint in RAM can be easily 10-20 times the size of the raw XML-String. If the XML-File is greater than [Buffer.constants.MAX_STRING_LENGTH
|
|
12
|
+
DOM parsers are perfect for rather small files, for example configuration files or (X-)HTML pages. However, for bigger XML files you may run into memory limits. When you parse a XML-File as DOM, then the footprint in RAM can be easily 10-20 times the size of the raw XML-String. If the XML-File is greater than [Buffer.constants.MAX_STRING_LENGTH](https://nodejs.org/api/buffer.html#bufferconstantsmax_string_length) (typically 512 MiB), then a DOM parser may throw error "Cannot create a string longer than 0x1fffffe8 characters".
|
|
13
13
|
|
|
14
14
|
* The **stream** or **event** based parsers. These parser read the XML file "line by line". The biggest advantage of such a parser is, there is no limit in the size of the XML file. You can read XML files having a size of many terabytes, because you read always just a single node.
|
|
15
15
|
|
package/package.json
CHANGED
package/samples/memory-test.js
CHANGED
|
@@ -2,26 +2,26 @@ const fs = require('fs');
|
|
|
2
2
|
const process = require('process');
|
|
3
3
|
const twig = require('xml-twig');
|
|
4
4
|
|
|
5
|
-
let
|
|
6
|
-
|
|
7
|
-
let parser = twig.createParser([{ tag: '
|
|
8
|
-
|
|
5
|
+
let Entry = 0;
|
|
6
|
+
|
|
7
|
+
let parser = twig.createParser([{ tag: 'Entry', function: EntryHandler }], { method: 'expat' })
|
|
8
|
+
// http://aiweb.cs.washington.edu/research/projects/xmltk/xmldata/data/SwissProt/SwissProt.xml.gz
|
|
9
|
+
let reader = fs.createReadStream(`SwissProt.xml`);
|
|
9
10
|
reader.pipe(parser);
|
|
10
11
|
|
|
11
|
-
function
|
|
12
|
-
|
|
13
|
-
if (
|
|
12
|
+
function EntryHandler(elt) {
|
|
13
|
+
Entry++;
|
|
14
|
+
if (Entry % 10000 === 0) {
|
|
15
|
+
console.log(`Entry ${Entry}`)
|
|
16
|
+
let len = elt.root().writer().toString().length;
|
|
17
|
+
len = Math.round((len / 1024 / 1024 + Number.EPSILON) * 100) / 100;
|
|
18
|
+
console.log(`Size of XML string: ${len} MiB`);
|
|
14
19
|
for (const [key, value] of Object.entries(process.memoryUsage())) {
|
|
15
20
|
console.log(` Memory usage by ${key}, ${Math.round((value / 1024 / 1024 + Number.EPSILON) * 100) / 100} MiB`)
|
|
16
21
|
}
|
|
17
|
-
}
|
|
18
|
-
//elt.purge();
|
|
22
|
+
}
|
|
19
23
|
}
|
|
20
24
|
|
|
21
|
-
reader.on('end', () => {
|
|
22
|
-
console.log(`All done`);
|
|
23
|
-
});
|
|
24
|
-
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
/*
|
|
@@ -29,44 +29,48 @@ reader.on('end', () => {
|
|
|
29
29
|
* Results
|
|
30
30
|
**********************
|
|
31
31
|
|
|
32
|
+
# Set memory limit to 4GiB for demonstration reasons:
|
|
32
33
|
NODE_OPTIONS=--max-old-space-size=4096
|
|
33
34
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
Memory usage by
|
|
37
|
-
Memory usage by
|
|
38
|
-
Memory usage by
|
|
39
|
-
Memory usage by
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
Memory usage by
|
|
45
|
-
Memory usage by
|
|
35
|
+
Entry 10000
|
|
36
|
+
Size of XML string: 21.08 MiB
|
|
37
|
+
Memory usage by rss, 1897.36 MiB
|
|
38
|
+
Memory usage by heapTotal, 1855.68 MiB
|
|
39
|
+
Memory usage by heapUsed, 1807.43 MiB
|
|
40
|
+
Memory usage by external, 1.13 MiB
|
|
41
|
+
Memory usage by arrayBuffers, 0.7 MiB
|
|
42
|
+
|
|
43
|
+
Entry 20000
|
|
44
|
+
Size of XML string: 40.86 MiB
|
|
45
|
+
Memory usage by rss, 3615.75 MiB
|
|
46
|
+
Memory usage by heapTotal, 3562.62 MiB
|
|
47
|
+
Memory usage by heapUsed, 3482.97 MiB
|
|
48
|
+
Memory usage by external, 0.63 MiB
|
|
49
|
+
Memory usage by arrayBuffers, 0.2 MiB
|
|
46
50
|
|
|
47
51
|
<--- Last few GCs --->
|
|
48
52
|
|
|
49
|
-
[
|
|
50
|
-
[
|
|
53
|
+
[18648:000001F930FC8F90] 13906 ms: Scavenge 4047.7 (4123.2) -> 4047.3 (4133.7) MB, 3.4 / 0.0 ms (average mu = 0.332, current mu = 0.185) allocation failure;
|
|
54
|
+
[18648:000001F930FC8F90] 13916 ms: Scavenge 4054.1 (4133.7) -> 4053.8 (4135.2) MB, 4.0 / 0.0 ms (average mu = 0.332, current mu = 0.185) allocation failure;
|
|
55
|
+
[18648:000001F930FC8F90] 13925 ms: Scavenge 4055.1 (4135.2) -> 4054.3 (4156.2) MB, 8.3 / 0.0 ms (average mu = 0.332, current mu = 0.185) allocation failure;
|
|
51
56
|
|
|
52
57
|
|
|
53
58
|
<--- JS stacktrace --->
|
|
54
59
|
|
|
55
60
|
FATAL ERROR: Reached heap limit Allocation failed - JavaScript heap out of memory
|
|
56
|
-
1:
|
|
57
|
-
2:
|
|
58
|
-
3:
|
|
59
|
-
4:
|
|
60
|
-
5:
|
|
61
|
-
6:
|
|
62
|
-
7:
|
|
63
|
-
8:
|
|
64
|
-
9:
|
|
65
|
-
10:
|
|
66
|
-
11:
|
|
67
|
-
12:
|
|
68
|
-
13:
|
|
69
|
-
|
|
61
|
+
1: 00007FF604481C7F node_api_throw_syntax_error+175855
|
|
62
|
+
2: 00007FF604406476 EVP_MD_meth_get_input_blocksize+59654
|
|
63
|
+
3: 00007FF604408160 EVP_MD_meth_get_input_blocksize+67056
|
|
64
|
+
4: 00007FF604EB0434 v8::Isolate::ReportExternalAllocationLimitReached+116
|
|
65
|
+
5: 00007FF604E9B7C2 v8::Isolate::Exit+674
|
|
66
|
+
6: 00007FF604D1D65C v8::internal::EmbedderStackStateScope::ExplicitScopeForTesting+124
|
|
67
|
+
7: 00007FF604D1A87B v8::internal::Heap::CollectGarbage+3963
|
|
68
|
+
8: 00007FF604D30AB3 v8::internal::HeapAllocator::AllocateRawWithLightRetrySlowPath+2099
|
|
69
|
+
9: 00007FF604D3135D v8::internal::HeapAllocator::AllocateRawWithRetryOrFailSlowPath+93
|
|
70
|
+
10: 00007FF604D40B20 v8::internal::Factory::NewFillerObject+816
|
|
71
|
+
11: 00007FF604A31565 v8::internal::DateCache::Weekday+1349
|
|
72
|
+
12: 00007FF604F4D961 v8::internal::SetupIsolateDelegate::SetupHeap+558193
|
|
73
|
+
13: 00007FF5B03D1532
|
|
70
74
|
|
|
71
75
|
*/
|
|
72
76
|
|
package/samples/dom-parsers.js
DELETED
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
var fs = require('fs');
|
|
2
|
-
var xml = fs.readFileSync('../SwissProt.xml', 'utf8');
|
|
3
|
-
|
|
4
|
-
console.log(`File Size: ${Math.round((xml.length / 1024 / 1024 + Number.EPSILON) * 100) / 100} MiB`);
|
|
5
|
-
for (const [key, value] of Object.entries(process.memoryUsage()))
|
|
6
|
-
console.log(` Memory usage by ${key}, ${Math.round((value / 1024 / 1024 + Number.EPSILON) * 100) / 100} MiB`)
|
|
7
|
-
|
|
8
|
-
/*
|
|
9
|
-
File Size of dblp.xml: 127.66 MiB
|
|
10
|
-
Memory usage by rss, 290.71 MiB
|
|
11
|
-
Memory usage by heapTotal, 137.07 MiB
|
|
12
|
-
Memory usage by heapUsed, 135.74 MiB
|
|
13
|
-
Memory usage by external, 128.53 MiB
|
|
14
|
-
Memory usage by arrayBuffers, 0.02 MiB
|
|
15
|
-
|
|
16
|
-
File Size of SwissProt.xml: 109.5 MiB
|
|
17
|
-
Memory usage by rss, 387.96 MiB
|
|
18
|
-
Memory usage by heapTotal, 257.09 MiB
|
|
19
|
-
Memory usage by heapUsed, 247.7 MiB
|
|
20
|
-
Memory usage by external, 110.37 MiB
|
|
21
|
-
Memory usage by arrayBuffers, 109.52 MiB
|
|
22
|
-
*/
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
/*
|
|
26
|
-
const { XMLParser } = require("fast-xml-parser");
|
|
27
|
-
let parser = new XMLParser();
|
|
28
|
-
let obj = parser.parse(xml);
|
|
29
|
-
for (const [key, value] of Object.entries(process.memoryUsage()))
|
|
30
|
-
console.log(` Memory usage by ${key}, ${Math.round((value / 1024 / 1024 + Number.EPSILON) * 100) / 100} MiB`)
|
|
31
|
-
*/
|
|
32
|
-
/*
|
|
33
|
-
Memory usage by rss, 1482.1 MiB
|
|
34
|
-
Memory usage by heapTotal, 1444.53 MiB
|
|
35
|
-
Memory usage by heapUsed, 1396.38 MiB
|
|
36
|
-
Memory usage by external, 0.87 MiB
|
|
37
|
-
Memory usage by arrayBuffers, 0.02 MiB
|
|
38
|
-
*/
|
|
39
|
-
|
|
40
|
-
/*
|
|
41
|
-
const twig = require('../twig.js');
|
|
42
|
-
let parser = twig.createParser([{ tag: twig.Root, function: rootHandler }], { method: 'expat' })
|
|
43
|
-
let reader = fs.createReadStream(`../SwissProt.xml`);
|
|
44
|
-
reader.pipe(parser);
|
|
45
|
-
function rootHandler(elt) {
|
|
46
|
-
for (const [key, value] of Object.entries(process.memoryUsage()))
|
|
47
|
-
console.log(` Memory usage by ${key}, ${Math.round((value / 1024 / 1024 + Number.EPSILON) * 100) / 100} MiB`)
|
|
48
|
-
}
|
|
49
|
-
*/
|
|
50
|
-
/*
|
|
51
|
-
Memory usage by rss, 8872.57 MiB
|
|
52
|
-
Memory usage by heapTotal, 8779.85 MiB
|
|
53
|
-
Memory usage by heapUsed, 8586.22 MiB
|
|
54
|
-
Memory usage by external, 1.15 MiB
|
|
55
|
-
Memory usage by arrayBuffers, 0.3 MiB
|
|
56
|
-
*/
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
/*
|
|
60
|
-
const parse = require('xml-parser');
|
|
61
|
-
let obj = parse(xml);
|
|
62
|
-
for (const [key, value] of Object.entries(process.memoryUsage()))
|
|
63
|
-
console.log(` Memory usage by ${key}, ${Math.round((value / 1024 / 1024 + Number.EPSILON) * 100) / 100} MiB`)
|
|
64
|
-
/*
|
|
65
|
-
Running for ages...
|
|
66
|
-
*/
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
/*
|
|
70
|
-
const xml2js = require('xml2js');
|
|
71
|
-
var parser = new xml2js.Parser();
|
|
72
|
-
parser.parseString(xml, function (err, result) {
|
|
73
|
-
for (const [key, value] of Object.entries(process.memoryUsage()))
|
|
74
|
-
console.log(` Memory usage by ${key}, ${Math.round((value / 1024 / 1024 + Number.EPSILON) * 100) / 100} MiB`)
|
|
75
|
-
|
|
76
|
-
console.log('Done');
|
|
77
|
-
});
|
|
78
|
-
*/
|
|
79
|
-
/*
|
|
80
|
-
Memory usage by rss, 898.91 MiB
|
|
81
|
-
Memory usage by heapTotal, 863.84 MiB
|
|
82
|
-
Memory usage by heapUsed, 825.19 MiB
|
|
83
|
-
Memory usage by external, 0.87 MiB
|
|
84
|
-
Memory usage by arrayBuffers, 0.02 MiB
|
|
85
|
-
*/
|
|
86
|
-
/*
|
|
87
|
-
const parser = require("xml-parse");
|
|
88
|
-
var parsedXML = parser.parse(xml);
|
|
89
|
-
for (const [key, value] of Object.entries(process.memoryUsage()))
|
|
90
|
-
console.log(` Memory usage by ${key}, ${Math.round((value / 1024 / 1024 + Number.EPSILON) * 100) / 100} MiB`)
|
|
91
|
-
// -> Uncaught RangeError RangeError: Maximum call stack size exceeded
|
|
92
|
-
*/
|
|
93
|
-
|
|
94
|
-
/*
|
|
95
|
-
const { parseXml } = require('@rgrove/parse-xml');
|
|
96
|
-
let r = parseXml(xml);
|
|
97
|
-
for (const [key, value] of Object.entries(process.memoryUsage()))
|
|
98
|
-
console.log(` Memory usage by ${key}, ${Math.round((value / 1024 / 1024 + Number.EPSILON) * 100) / 100} MiB`)
|
|
99
|
-
*/
|
|
100
|
-
/*
|
|
101
|
-
Memory usage by rss, 1943.94 MiB
|
|
102
|
-
Memory usage by heapTotal, 1903.21 MiB
|
|
103
|
-
Memory usage by heapUsed, 1854.36 MiB
|
|
104
|
-
Memory usage by external, 0.87 MiB
|
|
105
|
-
Memory usage by arrayBuffers, 0.02 MiB
|
|
106
|
-
*/
|
|
107
|
-
|
|
108
|
-
|