@file-type/xml 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +42 -25
  2. package/lib/index.js +66 -38
  3. package/package.json +7 -2
package/README.md CHANGED
@@ -1,25 +1,42 @@
1
- [![NPM version](https://img.shields.io/npm/v/@file-type/xml.svg)](https://npmjs.org/package/@file-type/xml)
2
- [![Node.js CI](https://github.com/Borewit/file-type-xml/actions/workflows/nodejs-ci.yml/badge.svg)](https://github.com/Borewit/file-type-xml/actions/workflows/nodejs-ci.yml)
3
- # @file-type/xml
4
-
5
- Detector plugin for [file-type](https://github.com/sindresorhus/file-type) for XML files.
6
-
7
- ## Installation
8
-
9
- ```bash
10
- npm install @file-type/xml
11
- ```
12
-
13
- ### Usage
14
-
15
- The following example shows how add the XML detector to [file-type](https://github.com/sindresorhus/file-type).
16
- ```js
17
- import {NodeFileTypeParser} from 'file-type';
18
- import {detectXml} from '@file-type/xml';
19
-
20
- const parser = new NodeFileTypeParser({customDetectors: [detectXml]});
21
- const fileType = await parser.fromFile('example.kml');
22
- console.log(fileType);
23
- ```
24
-
25
-
1
+ [![NPM version](https://img.shields.io/npm/v/@file-type/xml.svg)](https://npmjs.org/package/@file-type/xml)
2
+ [![Node.js CI](https://github.com/Borewit/file-type-xml/actions/workflows/nodejs-ci.yml/badge.svg)](https://github.com/Borewit/file-type-xml/actions/workflows/nodejs-ci.yml)
3
+ # @file-type/xml
4
+
5
+ Detector plugin for [file-type](https://github.com/sindresorhus/file-type) for XML files.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ npm install @file-type/xml
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ The following example shows how add the XML detector to [file-type](https://github.com/sindresorhus/file-type).
16
+ ```js
17
+ import {NodeFileTypeParser} from 'file-type';
18
+ import {detectXml} from '@file-type/xml';
19
+
20
+ const parser = new NodeFileTypeParser({customDetectors: [detectXml]});
21
+ const fileType = await parser.fromFile('example.kml');
22
+ console.log(fileType);
23
+ ```
24
+
25
+ You can also use the XML detector outside file-type:
26
+ ```js
27
+ import {XmlTextDetector} from 'index.js';
28
+
29
+ xmlTextDetector.write('<svg xmlns="http://www.w3.org/2000/svg"><path fill="#00CD9F"/></svg>');
30
+ const fileType = xmlTextDetector.fileType;
31
+ console.log(JSON.stringify(fileType)); // Outputs: {"ext":"svg","mime":"image/svg+xml"}
32
+ ```
33
+
34
+ ## Support file formats
35
+
36
+ - [XML](https://en.wikipedia.org/wiki/XML) (default for XML, unless more specific format was detected)
37
+ - [GML (Geography Markup Language)](https://en.wikipedia.org/wiki/Geography_Markup_Language)
38
+ - [KML (Keyhole Markup Language)](https://en.wikipedia.org/wiki/XHTML)
39
+ - [MusicXML, Uncompressed](https://en.wikipedia.org/wiki/MusicXML)
40
+ - [RSS (RDF Site Summary or Really Simple Syndication)](https://en.wikipedia.org/wiki/RSS)
41
+ - [SVG: (Scalable Vector Graphics)](https://en.wikipedia.org/wiki/SVG)
42
+ - [XHTML](https://en.wikipedia.org/wiki/XHTML)
package/lib/index.js CHANGED
@@ -15,13 +15,17 @@ function startsWith(array, prefix) {
15
15
 
16
16
  function isXml(array) {
17
17
  if (startsWith(array,[60, 63, 120, 109, 108, 32])) {
18
- return {xml: true, encoding: 'utf-8'}
18
+ return {xml: true, encoding: 'utf-8', offset: 0}
19
19
  } else if (startsWith(array,[0xEF, 0xBB, 0xBF, 60, 63, 120, 109, 108, 32])) { // UTF-8 BOM
20
- return {xml: true, encoding: 'utf-8'}
20
+ return {xml: true, encoding: 'utf-8', offset: 3}
21
21
  } else if (startsWith(array,[0xFE, 0xFF, 0, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32 ])) {
22
- return {xml: true, encoding: 'utf-16be'}
22
+ return {xml: true, encoding: 'utf-16be', offset: 2}
23
23
  } else if (startsWith(array,[0xFF, 0xFE, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32, 0 ])) {
24
- return {xml: true, encoding: 'utf-16le'}
24
+ return {xml: true, encoding: 'utf-16le', offset: 2}
25
+ } else if (startsWith(array,[0, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32 ])) {
26
+ return {xml: true, encoding: 'utf-16be', offset: 0}
27
+ } else if (startsWith(array,[60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32, 0 ])) {
28
+ return {xml: true, encoding: 'utf-16le', offset: 0}
25
29
  }
26
30
  return {xml: false, encoding: undefined}
27
31
  }
@@ -58,7 +62,8 @@ const namespaceMapping = {
58
62
  }
59
63
 
60
64
  /**
61
- * Maps the root element name to corresponding file-type
65
+ * Maps the root element name to corresponding file-type.
66
+ * Used for Non-namespaced XML
62
67
  * @type {{rss: {ext: string, mime: string}}}
63
68
  */
64
69
  const rootNameMapping = {
@@ -69,50 +74,73 @@ const rootNameMapping = {
69
74
  'score-partwise': {
70
75
  ext: 'musicxml',
71
76
  mime: 'application/vnd.recordare.musicxml+xml',
72
- }
77
+ },
78
+ svg: {
79
+ ext: 'svg',
80
+ mime: 'image/svg+xml',
81
+ },
73
82
  }
74
83
 
75
- export const detectXml = async tokenizer => {
76
-
77
- const buffer = new Uint8Array(512);
78
-
79
- // Increase sample size from 12 to 256.
80
- await tokenizer.peekBuffer(buffer, {length: 128, mayBeLess: true});
81
-
82
- const {xml, encoding} = isXml(buffer);
84
+ export class XmlTextDetector {
83
85
 
84
- if (xml) {
85
- let fileType;
86
+ constructor() {
87
+ this.firstTag = true;
88
+ this.onEnd = false;
89
+ this.parser = sax.parser(true);
90
+ this.depth = 0;
91
+ this.validClose = false;
86
92
 
87
- const parser = sax.parser(true);
88
-
89
- let firstTag = true;
90
- let onEnd = false;
91
-
92
- parser.onerror = e => {
93
- onEnd = true;
93
+ this.parser.onerror = e => {
94
+ this.onEnd = true;
94
95
  };
95
- parser.onopentag = node => {
96
- if (!firstTag) {
96
+ this.parser.onopentag = node => {
97
+ ++this.depth;
98
+ if (!this.firstTag || this.onEnd) {
97
99
  return;
98
100
  }
99
- firstTag = false;
101
+ this.firstTag = false;
100
102
  const nsNode = extractNsElement(node);
101
103
  if (nsNode.ns) {
102
104
  // Resolve file-type boot root element namespace
103
- fileType = namespaceMapping[nsNode.ns];
105
+ this.fileType = namespaceMapping[nsNode.ns.toLowerCase()];
104
106
  } else {
105
107
  // Fall back on element name if there is no namespace
106
- fileType = rootNameMapping[nsNode.name];
108
+ this.fileType = rootNameMapping[nsNode.name?.toLowerCase()];
107
109
  }
108
110
 
109
- if (fileType) {
110
- onEnd = true;
111
+ if (this.fileType) {
112
+ this.onEnd = true;
111
113
  }
112
114
  };
113
- parser.onend = () => {
114
- onEnd = true;
115
+ this.parser.onend = () => {
116
+ this.onEnd = true;
115
117
  };
118
+ }
119
+
120
+ write(text) {
121
+ this.parser.write(text);
122
+ }
123
+
124
+ close() {
125
+ this.parser.close();
126
+ this.onEnd = true;
127
+ }
128
+ }
129
+
130
+ export const detectXml = async tokenizer => {
131
+
132
+ const buffer = new Uint8Array(512);
133
+
134
+ // Increase sample size from 12 to 256.
135
+ await tokenizer.peekBuffer(buffer, {length: 128, mayBeLess: true});
136
+
137
+ const {xml, encoding, offset} = isXml(buffer);
138
+
139
+ if (xml) {
140
+ await tokenizer.ignore(offset);
141
+ let fileType;
142
+
143
+ const xmlTextDetector = new XmlTextDetector();
116
144
 
117
145
  const textDecoder = new TextDecoder(encoding);
118
146
 
@@ -120,17 +148,17 @@ export const detectXml = async tokenizer => {
120
148
  const len = await tokenizer.readBuffer(buffer, {mayBeLess: true});
121
149
  const portion = buffer.subarray(0, len);
122
150
  const text = textDecoder.decode(portion);
123
- parser.write(text);
151
+ xmlTextDetector.write(text);
124
152
  if (len < buffer.length) {
125
- parser.close();
126
- onEnd = true;
153
+ xmlTextDetector.close();
127
154
  }
128
- } while(!onEnd)
155
+ } while(!xmlTextDetector.onEnd)
129
156
 
130
- return fileType ?? {
157
+ return xmlTextDetector.fileType ?? {
131
158
  ext: 'xml',
132
159
  mime: 'application/xml',
133
160
  }
134
161
  }
135
162
 
136
- };
163
+ };
164
+
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@file-type/xml",
3
- "version": "0.1.1",
3
+ "version": "0.2.0",
4
4
  "description": "XML detection plugin",
5
5
  "type": "module",
6
6
  "exports": "./lib/index.js",
@@ -9,13 +9,18 @@
9
9
  },
10
10
  "keywords": [
11
11
  "file-type",
12
+ "detect",
12
13
  "detection",
13
14
  "detector",
14
15
  "XML",
16
+ "signature",
17
+ "namespace",
15
18
  "SVG",
16
19
  "XHTML",
17
20
  "RSS",
18
- "KML"
21
+ "KML",
22
+ "GML",
23
+ "MusicXML"
19
24
  ],
20
25
  "dependencies": {
21
26
  "sax": "^1.4.1",