xml-toolkit 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +2 -1
- package/lib/AttributesMap.js +37 -0
- package/lib/EntityResolver.js +66 -0
- package/lib/SAXEvent.js +4 -4
- package/lib/SAXEventEmitter.js +3 -30
- package/lib/XMLLexer.js +0 -6
- package/lib/XMLReader.js +87 -0
- package/package.json +4 -2
- package/test/ent.xml +6 -0
- package/test/test.js +22 -4
package/index.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
const XMLLexer = require ('./lib/XMLLexer')
|
|
2
2
|
const SAXEventEmitter = require ('./lib/SAXEventEmitter')
|
|
3
3
|
const SAXEvent = require ('./lib/SAXEvent')
|
|
4
|
+
const AttributesMap = require ('./lib/AttributesMap')
|
|
4
5
|
|
|
5
|
-
module.exports = {SAXEventEmitter, XMLLexer, SAXEvent}
|
|
6
|
+
module.exports = {SAXEventEmitter, XMLLexer, SAXEvent, AttributesMap}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
const AttributesMap = class extends Map {
|
|
2
|
+
|
|
3
|
+
constructor (xmlReader) {
|
|
4
|
+
|
|
5
|
+
super ()
|
|
6
|
+
|
|
7
|
+
this._xmlReader = xmlReader
|
|
8
|
+
|
|
9
|
+
this.fix = null
|
|
10
|
+
|
|
11
|
+
if (xmlReader.useEntities) {
|
|
12
|
+
|
|
13
|
+
const {entityResolver} = xmlReader
|
|
14
|
+
|
|
15
|
+
this.fix = s => entityResolver.fix (s)
|
|
16
|
+
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
set (k, v) {
|
|
22
|
+
|
|
23
|
+
{
|
|
24
|
+
|
|
25
|
+
const {fix} = this
|
|
26
|
+
|
|
27
|
+
if (fix !== null) v = fix (v)
|
|
28
|
+
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return super.set (k, v)
|
|
32
|
+
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
module.exports = AttributesMap
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
const XMLReader = require ('./XMLReader.js')
|
|
2
|
+
|
|
3
|
+
const CH_HASH = '#'.charCodeAt (0)
|
|
4
|
+
|
|
5
|
+
const PREDEFINED = new Map ([
|
|
6
|
+
['lt' , '<'],
|
|
7
|
+
['gt' , '>'],
|
|
8
|
+
['quot' , '"'],
|
|
9
|
+
['amp' , '&'],
|
|
10
|
+
['apos' , "'"],
|
|
11
|
+
])
|
|
12
|
+
|
|
13
|
+
const EntityResolver = class {
|
|
14
|
+
|
|
15
|
+
constructor () {
|
|
16
|
+
|
|
17
|
+
this.body = new Map (PREDEFINED)
|
|
18
|
+
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
fix (s) {
|
|
22
|
+
|
|
23
|
+
let start = s.indexOf ('&'); if (start === -1) return s
|
|
24
|
+
|
|
25
|
+
let r = s.slice (0, start)
|
|
26
|
+
|
|
27
|
+
const {body} = this
|
|
28
|
+
|
|
29
|
+
while (true) {
|
|
30
|
+
|
|
31
|
+
let end = s.indexOf (';', ++ start); if (end === -1) throw new Error ('Unterminated entity reference in ' + JSON.stringify ([s]))
|
|
32
|
+
|
|
33
|
+
const key = s.slice (start, end)
|
|
34
|
+
|
|
35
|
+
if (body.has (key)) {
|
|
36
|
+
|
|
37
|
+
r += body.get (key)
|
|
38
|
+
|
|
39
|
+
}
|
|
40
|
+
else {
|
|
41
|
+
|
|
42
|
+
const charCode = key.charCodeAt (0) === CH_HASH ? parseInt (key.slice (1), 16) : parseInt (key, 10)
|
|
43
|
+
|
|
44
|
+
if (isNaN (charCode) || charCode <= 0) throw new Error ('Unknown entity reference ' + key + ' in ' + JSON.stringify ([s]))
|
|
45
|
+
|
|
46
|
+
const c = String.fromCharCode (charCode)
|
|
47
|
+
|
|
48
|
+
body.set (key, c)
|
|
49
|
+
|
|
50
|
+
r += c
|
|
51
|
+
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
start = s.indexOf ('&', ++ end)
|
|
55
|
+
|
|
56
|
+
if (start === -1) return r + s.slice (end)
|
|
57
|
+
|
|
58
|
+
r += s.slice (end, start)
|
|
59
|
+
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
module.exports = EntityResolver
|
package/lib/SAXEvent.js
CHANGED
|
@@ -117,12 +117,12 @@ const SAXEvent = class {
|
|
|
117
117
|
const q = src.indexOf (src.charAt (eq + 1), eq + 2); if (q === -1) break
|
|
118
118
|
|
|
119
119
|
let k = src.slice (start, eq)
|
|
120
|
-
|
|
120
|
+
|
|
121
121
|
if (start === 0) {
|
|
122
|
-
|
|
123
|
-
let p = k.length - 2; while (p !== 0 && k.charCodeAt (p) <= 32) p --
|
|
124
122
|
|
|
125
|
-
|
|
123
|
+
let p = k.length - 1; while (p !== 0 && k.charCodeAt (p) > 32) p --
|
|
124
|
+
|
|
125
|
+
k = k.slice (this._afterName = ++ p)
|
|
126
126
|
|
|
127
127
|
}
|
|
128
128
|
else {
|
package/lib/SAXEventEmitter.js
CHANGED
|
@@ -1,39 +1,12 @@
|
|
|
1
|
-
const
|
|
2
|
-
const {Writable} = require ('stream')
|
|
3
|
-
const SAXEvent = require ('./SAXEvent.js')
|
|
1
|
+
const XMLReader = require ('./XMLReader.js')
|
|
4
2
|
|
|
5
|
-
const SAXEventEmitter = class extends
|
|
3
|
+
const SAXEventEmitter = class extends XMLReader {
|
|
6
4
|
|
|
7
5
|
constructor (options = {}) {
|
|
8
6
|
|
|
9
|
-
options.decodeStrings = false
|
|
10
|
-
|
|
11
7
|
super (options)
|
|
12
8
|
|
|
13
|
-
this.on ('
|
|
14
|
-
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
_write (chunk, encoding, callback) {
|
|
18
|
-
|
|
19
|
-
if (chunk.length !== 0) {
|
|
20
|
-
|
|
21
|
-
let e = new SAXEvent (chunk), {type} = e
|
|
22
|
-
|
|
23
|
-
if (type === SAXEvent.TYPES.CDATA) {
|
|
24
|
-
e = new SAXEvent (e.text)
|
|
25
|
-
type = SAXEvent.TYPES.CHARACTERS
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
this.emit (type, e)
|
|
29
|
-
|
|
30
|
-
if (type === SAXEvent.TYPES.TYPE_START_ELEMENT && e.isSelfEnclosing ()) {
|
|
31
|
-
this.emit (SAXEvent.TYPES.TYPE_END_ELEMENT, e)
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
callback ()
|
|
9
|
+
this.on ('data', e => this.emit (e.type, e))
|
|
37
10
|
|
|
38
11
|
}
|
|
39
12
|
|
package/lib/XMLLexer.js
CHANGED
|
@@ -38,14 +38,10 @@ const XMLLexer = class extends Transform {
|
|
|
38
38
|
assert (Number.isInteger (maxLength), 'maxLength must be integer, not ' + maxLength)
|
|
39
39
|
assert (maxLength > 0, 'maxLength must be positive, not ' + maxLength)
|
|
40
40
|
|
|
41
|
-
if (!('stripSpace' in options)) options.stripSpace = false
|
|
42
|
-
assert (options.stripSpace === true || options.stripSpace === false, 'options.stripSpace must be boolean, not ' + typeof options.stripSpace)
|
|
43
|
-
|
|
44
41
|
super (options)
|
|
45
42
|
|
|
46
43
|
this.decoder = new StringDecoder ('utf8')
|
|
47
44
|
|
|
48
|
-
this.stripSpace = options.stripSpace
|
|
49
45
|
this.body = ''
|
|
50
46
|
this.start = 0
|
|
51
47
|
this.setState (ST_TEXT)
|
|
@@ -237,8 +233,6 @@ const XMLLexer = class extends Transform {
|
|
|
237
233
|
|
|
238
234
|
this.start = pos
|
|
239
235
|
|
|
240
|
-
if (this.stripSpace) lexeme = lexeme.trim ()
|
|
241
|
-
|
|
242
236
|
if (lexeme.length !== 0) this.push (lexeme)
|
|
243
237
|
|
|
244
238
|
}
|
package/lib/XMLReader.js
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
const assert = require ('assert')
|
|
2
|
+
const {Transform} = require ('stream')
|
|
3
|
+
const SAXEvent = require ('./SAXEvent.js')
|
|
4
|
+
|
|
5
|
+
const XMLReader = class extends Transform {
|
|
6
|
+
|
|
7
|
+
constructor (options = {}) {
|
|
8
|
+
|
|
9
|
+
options.decodeStrings = false
|
|
10
|
+
options.objectMode = true
|
|
11
|
+
|
|
12
|
+
if (!('stripSpace' in options)) options.stripSpace = false
|
|
13
|
+
assert (options.stripSpace === true || options.stripSpace === false, 'options.stripSpace must be boolean, not ' + typeof options.stripSpace)
|
|
14
|
+
|
|
15
|
+
if (!('useEntities' in options)) options.useEntities = true
|
|
16
|
+
assert (options.useEntities === true || options.useEntities === false, 'options.useEntities must be boolean, not ' + typeof options.useEntities)
|
|
17
|
+
|
|
18
|
+
super (options)
|
|
19
|
+
|
|
20
|
+
this.stripSpace = options.stripSpace
|
|
21
|
+
this.useEntities = options.useEntities
|
|
22
|
+
|
|
23
|
+
if (this.useEntities) this.entityResolver = new (require ('./EntityResolver.js')) ()
|
|
24
|
+
|
|
25
|
+
this.text = ''
|
|
26
|
+
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
_flush (callback) {
|
|
30
|
+
|
|
31
|
+
this.flush_text ()
|
|
32
|
+
|
|
33
|
+
this.push ({type: SAXEvent.TYPES.END_DOCUMENT})
|
|
34
|
+
|
|
35
|
+
callback ()
|
|
36
|
+
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
flush_text () {
|
|
40
|
+
|
|
41
|
+
let {text} = this; if (text.length === 0) return
|
|
42
|
+
|
|
43
|
+
if (this.stripSpace) text = text.trim ()
|
|
44
|
+
|
|
45
|
+
if (text.length !== 0) this.push (new SAXEvent (text))
|
|
46
|
+
|
|
47
|
+
this.text = ''
|
|
48
|
+
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
_transform (chunk, encoding, callback) {
|
|
52
|
+
|
|
53
|
+
if (chunk.length !== 0) {
|
|
54
|
+
|
|
55
|
+
let e = new SAXEvent (chunk), {type} = e
|
|
56
|
+
|
|
57
|
+
switch (type) {
|
|
58
|
+
|
|
59
|
+
case SAXEvent.TYPES.CHARACTERS:
|
|
60
|
+
|
|
61
|
+
this.text += this.useEntities ? this.entityResolver.fix (e.text) : e.text
|
|
62
|
+
break
|
|
63
|
+
|
|
64
|
+
case SAXEvent.TYPES.CDATA:
|
|
65
|
+
|
|
66
|
+
this.text += e.text
|
|
67
|
+
break
|
|
68
|
+
|
|
69
|
+
default:
|
|
70
|
+
|
|
71
|
+
this.flush_text ()
|
|
72
|
+
this.push (e)
|
|
73
|
+
break
|
|
74
|
+
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (type === SAXEvent.TYPES.START_ELEMENT && e.isSelfEnclosed) this.push (new SAXEvent ('</>'))
|
|
78
|
+
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
callback ()
|
|
82
|
+
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
module.exports = XMLReader
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "xml-toolkit",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.3",
|
|
4
4
|
"description": "Collection of classes for dealing with XML",
|
|
5
5
|
"main": "index.js",
|
|
6
6
|
"scripts": {
|
|
@@ -11,7 +11,9 @@
|
|
|
11
11
|
"url": "git+https://github.com/do-/node-xml-toolkit.git"
|
|
12
12
|
},
|
|
13
13
|
"keywords": [
|
|
14
|
-
"xml
|
|
14
|
+
"xml",
|
|
15
|
+
"sax",
|
|
16
|
+
"stax"
|
|
15
17
|
],
|
|
16
18
|
"author": "Dmitry Ovsyanko",
|
|
17
19
|
"license": "MIT",
|
package/test/ent.xml
ADDED
package/test/test.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
const fs = require ('fs')
|
|
2
2
|
const assert = require ('assert')
|
|
3
|
-
const {SAXEventEmitter, SAXEvent, XMLLexer} = require ('../')
|
|
3
|
+
const {SAXEventEmitter, SAXEvent, XMLLexer, AttributesMap} = require ('../')
|
|
4
4
|
|
|
5
5
|
async function test_001_lexer_sync (fn) {
|
|
6
6
|
|
|
@@ -60,13 +60,17 @@ console.log (xml)
|
|
|
60
60
|
const lex = new XMLLexer ({
|
|
61
61
|
// maxLength: 40,
|
|
62
62
|
// encoding: 'ascii',
|
|
63
|
-
stripSpace: true,
|
|
63
|
+
// stripSpace: true,
|
|
64
64
|
})
|
|
65
65
|
|
|
66
|
-
const sax = new SAXEventEmitter ({
|
|
66
|
+
const sax = new SAXEventEmitter ({
|
|
67
|
+
stripSpace: true,
|
|
68
|
+
// useEntities: false,
|
|
69
|
+
})
|
|
67
70
|
|
|
68
71
|
lex.pipe (sax)
|
|
69
72
|
|
|
73
|
+
/*
|
|
70
74
|
for (let event of [
|
|
71
75
|
'StartDocument',
|
|
72
76
|
'ProcessingInstruction',
|
|
@@ -77,6 +81,19 @@ console.log (xml)
|
|
|
77
81
|
'EndElement',
|
|
78
82
|
'EndDocument',
|
|
79
83
|
]) sax.on (event, data => console.log ([event, data]))
|
|
84
|
+
*/
|
|
85
|
+
|
|
86
|
+
sax.on ('StartElement', event => {
|
|
87
|
+
|
|
88
|
+
console.log ({event})
|
|
89
|
+
|
|
90
|
+
const attr = new AttributesMap (sax)
|
|
91
|
+
|
|
92
|
+
event.writeAttributesToMap (attr)
|
|
93
|
+
|
|
94
|
+
console.log ({attr})
|
|
95
|
+
|
|
96
|
+
})
|
|
80
97
|
|
|
81
98
|
// lexer.on ('data', data => console.log ({data}))
|
|
82
99
|
|
|
@@ -92,13 +109,14 @@ async function main () {
|
|
|
92
109
|
// await test_001_lexer_sync ('E05a.xml')
|
|
93
110
|
// await test_001_lexer_sync ('not-sa01.xml')
|
|
94
111
|
// await test_001_lexer_sync ('not-sa02.xml')
|
|
95
|
-
await test_001_lexer_sync ('param_types.xml')
|
|
112
|
+
// await test_001_lexer_sync ('param_types.xml')
|
|
96
113
|
// await test_002_lexer_stream ('E05a.xml')
|
|
97
114
|
// await test_002_lexer_stream ('param_types.xml')
|
|
98
115
|
// await test_002_lexer_stream ('not-sa02.xml')
|
|
99
116
|
// await test_003_emitter_sync ('E05a.xml')
|
|
100
117
|
// await test_003_emitter_sync ('param_types.xml')
|
|
101
118
|
// await test_003_emitter_sync ('not-sa01.xml')
|
|
119
|
+
await test_003_emitter_sync ('ent.xml')
|
|
102
120
|
|
|
103
121
|
}
|
|
104
122
|
|