xml-toolkit 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -1,5 +1,6 @@
1
1
  const XMLLexer = require ('./lib/XMLLexer')
2
2
  const SAXEventEmitter = require ('./lib/SAXEventEmitter')
3
3
  const SAXEvent = require ('./lib/SAXEvent')
4
+ const AttributesMap = require ('./lib/AttributesMap')
4
5
 
5
- module.exports = {SAXEventEmitter, XMLLexer, SAXEvent}
6
+ module.exports = {SAXEventEmitter, XMLLexer, SAXEvent, AttributesMap}
@@ -0,0 +1,37 @@
1
+ const AttributesMap = class extends Map {
2
+
3
+ constructor (xmlReader) {
4
+
5
+ super ()
6
+
7
+ this._xmlReader = xmlReader
8
+
9
+ this.fix = null
10
+
11
+ if (xmlReader.useEntities) {
12
+
13
+ const {entityResolver} = xmlReader
14
+
15
+ this.fix = s => entityResolver.fix (s)
16
+
17
+ }
18
+
19
+ }
20
+
21
+ set (k, v) {
22
+
23
+ {
24
+
25
+ const {fix} = this
26
+
27
+ if (fix !== null) v = fix (v)
28
+
29
+ }
30
+
31
+ return super.set (k, v)
32
+
33
+ }
34
+
35
+ }
36
+
37
+ module.exports = AttributesMap
@@ -0,0 +1,66 @@
1
+ const XMLReader = require ('./XMLReader.js')
2
+
3
+ const CH_HASH = '#'.charCodeAt (0)
4
+
5
+ const PREDEFINED = new Map ([
6
+ ['lt' , '<'],
7
+ ['gt' , '>'],
8
+ ['quot' , '"'],
9
+ ['amp' , '&'],
10
+ ['apos' , "'"],
11
+ ])
12
+
13
+ const EntityResolver = class {
14
+
15
+ constructor () {
16
+
17
+ this.body = new Map (PREDEFINED)
18
+
19
+ }
20
+
21
+ fix (s) {
22
+
23
+ let start = s.indexOf ('&'); if (start === -1) return s
24
+
25
+ let r = s.slice (0, start)
26
+
27
+ const {body} = this
28
+
29
+ while (true) {
30
+
31
+ let end = s.indexOf (';', ++ start); if (end === -1) throw new Error ('Unterminated entity reference in ' + JSON.stringify ([s]))
32
+
33
+ const key = s.slice (start, end)
34
+
35
+ if (body.has (key)) {
36
+
37
+ r += body.get (key)
38
+
39
+ }
40
+ else {
41
+
42
+ const charCode = key.charCodeAt (0) === CH_HASH ? parseInt (key.slice (1), 16) : parseInt (key, 10)
43
+
44
+ if (isNaN (charCode) || charCode <= 0) throw new Error ('Unknown entity reference ' + key + ' in ' + JSON.stringify ([s]))
45
+
46
+ const c = String.fromCharCode (charCode)
47
+
48
+ body.set (key, c)
49
+
50
+ r += c
51
+
52
+ }
53
+
54
+ start = s.indexOf ('&', ++ end)
55
+
56
+ if (start === -1) return r + s.slice (end)
57
+
58
+ r += s.slice (end, start)
59
+
60
+ }
61
+
62
+ }
63
+
64
+ }
65
+
66
+ module.exports = EntityResolver
package/lib/SAXEvent.js CHANGED
@@ -117,12 +117,12 @@ const SAXEvent = class {
117
117
  const q = src.indexOf (src.charAt (eq + 1), eq + 2); if (q === -1) break
118
118
 
119
119
  let k = src.slice (start, eq)
120
-
120
+
121
121
  if (start === 0) {
122
-
123
- let p = k.length - 2; while (p !== 0 && k.charCodeAt (p) <= 32) p --
124
122
 
125
- k = k.slice (this._afterName = p)
123
+ let p = k.length - 1; while (p !== 0 && k.charCodeAt (p) > 32) p --
124
+
125
+ k = k.slice (this._afterName = ++ p)
126
126
 
127
127
  }
128
128
  else {
@@ -1,39 +1,12 @@
1
- const assert = require ('assert')
2
- const {Writable} = require ('stream')
3
- const SAXEvent = require ('./SAXEvent.js')
1
+ const XMLReader = require ('./XMLReader.js')
4
2
 
5
- const SAXEventEmitter = class extends Writable {
3
+ const SAXEventEmitter = class extends XMLReader {
6
4
 
7
5
  constructor (options = {}) {
8
6
 
9
- options.decodeStrings = false
10
-
11
7
  super (options)
12
8
 
13
- this.on ('finish', () => this.emit (SAXEvent.TYPES.END_DOCUMENT))
14
-
15
- }
16
-
17
- _write (chunk, encoding, callback) {
18
-
19
- if (chunk.length !== 0) {
20
-
21
- let e = new SAXEvent (chunk), {type} = e
22
-
23
- if (type === SAXEvent.TYPES.CDATA) {
24
- e = new SAXEvent (e.text)
25
- type = SAXEvent.TYPES.CHARACTERS
26
- }
27
-
28
- this.emit (type, e)
29
-
30
- if (type === SAXEvent.TYPES.TYPE_START_ELEMENT && e.isSelfEnclosing ()) {
31
- this.emit (SAXEvent.TYPES.TYPE_END_ELEMENT, e)
32
- }
33
-
34
- }
35
-
36
- callback ()
9
+ this.on ('data', e => this.emit (e.type, e))
37
10
 
38
11
  }
39
12
 
package/lib/XMLLexer.js CHANGED
@@ -38,14 +38,10 @@ const XMLLexer = class extends Transform {
38
38
  assert (Number.isInteger (maxLength), 'maxLength must be integer, not ' + maxLength)
39
39
  assert (maxLength > 0, 'maxLength must be positive, not ' + maxLength)
40
40
 
41
- if (!('stripSpace' in options)) options.stripSpace = false
42
- assert (options.stripSpace === true || options.stripSpace === false, 'options.stripSpace must be boolean, not ' + typeof options.stripSpace)
43
-
44
41
  super (options)
45
42
 
46
43
  this.decoder = new StringDecoder ('utf8')
47
44
 
48
- this.stripSpace = options.stripSpace
49
45
  this.body = ''
50
46
  this.start = 0
51
47
  this.setState (ST_TEXT)
@@ -237,8 +233,6 @@ const XMLLexer = class extends Transform {
237
233
 
238
234
  this.start = pos
239
235
 
240
- if (this.stripSpace) lexeme = lexeme.trim ()
241
-
242
236
  if (lexeme.length !== 0) this.push (lexeme)
243
237
 
244
238
  }
@@ -0,0 +1,87 @@
1
+ const assert = require ('assert')
2
+ const {Transform} = require ('stream')
3
+ const SAXEvent = require ('./SAXEvent.js')
4
+
5
+ const XMLReader = class extends Transform {
6
+
7
+ constructor (options = {}) {
8
+
9
+ options.decodeStrings = false
10
+ options.objectMode = true
11
+
12
+ if (!('stripSpace' in options)) options.stripSpace = false
13
+ assert (options.stripSpace === true || options.stripSpace === false, 'options.stripSpace must be boolean, not ' + typeof options.stripSpace)
14
+
15
+ if (!('useEntities' in options)) options.useEntities = true
16
+ assert (options.useEntities === true || options.useEntities === false, 'options.useEntities must be boolean, not ' + typeof options.useEntities)
17
+
18
+ super (options)
19
+
20
+ this.stripSpace = options.stripSpace
21
+ this.useEntities = options.useEntities
22
+
23
+ if (this.useEntities) this.entityResolver = new (require ('./EntityResolver.js')) ()
24
+
25
+ this.text = ''
26
+
27
+ }
28
+
29
+ _flush (callback) {
30
+
31
+ this.flush_text ()
32
+
33
+ this.push ({type: SAXEvent.TYPES.END_DOCUMENT})
34
+
35
+ callback ()
36
+
37
+ }
38
+
39
+ flush_text () {
40
+
41
+ let {text} = this; if (text.length === 0) return
42
+
43
+ if (this.stripSpace) text = text.trim ()
44
+
45
+ if (text.length !== 0) this.push (new SAXEvent (text))
46
+
47
+ this.text = ''
48
+
49
+ }
50
+
51
+ _transform (chunk, encoding, callback) {
52
+
53
+ if (chunk.length !== 0) {
54
+
55
+ let e = new SAXEvent (chunk), {type} = e
56
+
57
+ switch (type) {
58
+
59
+ case SAXEvent.TYPES.CHARACTERS:
60
+
61
+ this.text += this.useEntities ? this.entityResolver.fix (e.text) : e.text
62
+ break
63
+
64
+ case SAXEvent.TYPES.CDATA:
65
+
66
+ this.text += e.text
67
+ break
68
+
69
+ default:
70
+
71
+ this.flush_text ()
72
+ this.push (e)
73
+ break
74
+
75
+ }
76
+
77
+ if (type === SAXEvent.TYPES.START_ELEMENT && e.isSelfEnclosed) this.push (new SAXEvent ('</>'))
78
+
79
+ }
80
+
81
+ callback ()
82
+
83
+ }
84
+
85
+ }
86
+
87
+ module.exports = XMLReader
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xml-toolkit",
3
- "version": "0.0.1",
3
+ "version": "0.0.3",
4
4
  "description": "Collection of classes for dealing with XML",
5
5
  "main": "index.js",
6
6
  "scripts": {
@@ -11,7 +11,9 @@
11
11
  "url": "git+https://github.com/do-/node-xml-toolkit.git"
12
12
  },
13
13
  "keywords": [
14
- "xml sax stax"
14
+ "xml",
15
+ "sax",
16
+ "stax"
15
17
  ],
16
18
  "author": "Dmitry Ovsyanko",
17
19
  "license": "MIT",
package/test/ent.xml ADDED
@@ -0,0 +1,6 @@
1
+ <doc>
2
+ <p>&amp;</p>
3
+ <p>ss&32;ss&#20;ss&33;</p>
4
+ <p>&lt;&gt;&quot;&apos;</p>
5
+ <a name="&lt;"/>
6
+ </doc>
package/test/test.js CHANGED
@@ -1,6 +1,6 @@
1
1
  const fs = require ('fs')
2
2
  const assert = require ('assert')
3
- const {SAXEventEmitter, SAXEvent, XMLLexer} = require ('../')
3
+ const {SAXEventEmitter, SAXEvent, XMLLexer, AttributesMap} = require ('../')
4
4
 
5
5
  async function test_001_lexer_sync (fn) {
6
6
 
@@ -60,13 +60,17 @@ console.log (xml)
60
60
  const lex = new XMLLexer ({
61
61
  // maxLength: 40,
62
62
  // encoding: 'ascii',
63
- stripSpace: true,
63
+ // stripSpace: true,
64
64
  })
65
65
 
66
- const sax = new SAXEventEmitter ({})
66
+ const sax = new SAXEventEmitter ({
67
+ stripSpace: true,
68
+ // useEntities: false,
69
+ })
67
70
 
68
71
  lex.pipe (sax)
69
72
 
73
+ /*
70
74
  for (let event of [
71
75
  'StartDocument',
72
76
  'ProcessingInstruction',
@@ -77,6 +81,19 @@ console.log (xml)
77
81
  'EndElement',
78
82
  'EndDocument',
79
83
  ]) sax.on (event, data => console.log ([event, data]))
84
+ */
85
+
86
+ sax.on ('StartElement', event => {
87
+
88
+ console.log ({event})
89
+
90
+ const attr = new AttributesMap (sax)
91
+
92
+ event.writeAttributesToMap (attr)
93
+
94
+ console.log ({attr})
95
+
96
+ })
80
97
 
81
98
  // lexer.on ('data', data => console.log ({data}))
82
99
 
@@ -92,13 +109,14 @@ async function main () {
92
109
  // await test_001_lexer_sync ('E05a.xml')
93
110
  // await test_001_lexer_sync ('not-sa01.xml')
94
111
  // await test_001_lexer_sync ('not-sa02.xml')
95
- await test_001_lexer_sync ('param_types.xml')
112
+ // await test_001_lexer_sync ('param_types.xml')
96
113
  // await test_002_lexer_stream ('E05a.xml')
97
114
  // await test_002_lexer_stream ('param_types.xml')
98
115
  // await test_002_lexer_stream ('not-sa02.xml')
99
116
  // await test_003_emitter_sync ('E05a.xml')
100
117
  // await test_003_emitter_sync ('param_types.xml')
101
118
  // await test_003_emitter_sync ('not-sa01.xml')
119
+ await test_003_emitter_sync ('ent.xml')
102
120
 
103
121
  }
104
122