@file-type/xml 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -25
- package/lib/index.js +75 -39
- package/package.json +7 -2
package/README.md
CHANGED
|
@@ -1,25 +1,42 @@
|
|
|
1
|
-
[](https://npmjs.org/package/@file-type/xml)
|
|
2
|
-
[](https://github.com/Borewit/file-type-xml/actions/workflows/nodejs-ci.yml)
|
|
3
|
-
# @file-type/xml
|
|
4
|
-
|
|
5
|
-
Detector plugin for [file-type](https://github.com/sindresorhus/file-type) for XML files.
|
|
6
|
-
|
|
7
|
-
## Installation
|
|
8
|
-
|
|
9
|
-
```bash
|
|
10
|
-
npm install @file-type/xml
|
|
11
|
-
```
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
The following example shows how add the XML detector to [file-type](https://github.com/sindresorhus/file-type).
|
|
16
|
-
```js
|
|
17
|
-
import {NodeFileTypeParser} from 'file-type';
|
|
18
|
-
import {detectXml} from '@file-type/xml';
|
|
19
|
-
|
|
20
|
-
const parser = new NodeFileTypeParser({customDetectors: [detectXml]});
|
|
21
|
-
const fileType = await parser.fromFile('example.kml');
|
|
22
|
-
console.log(fileType);
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
|
|
1
|
+
[](https://npmjs.org/package/@file-type/xml)
|
|
2
|
+
[](https://github.com/Borewit/file-type-xml/actions/workflows/nodejs-ci.yml)
|
|
3
|
+
# @file-type/xml
|
|
4
|
+
|
|
5
|
+
Detector plugin for [file-type](https://github.com/sindresorhus/file-type) for XML files.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npm install @file-type/xml
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
The following example shows how add the XML detector to [file-type](https://github.com/sindresorhus/file-type).
|
|
16
|
+
```js
|
|
17
|
+
import {NodeFileTypeParser} from 'file-type';
|
|
18
|
+
import {detectXml} from '@file-type/xml';
|
|
19
|
+
|
|
20
|
+
const parser = new NodeFileTypeParser({customDetectors: [detectXml]});
|
|
21
|
+
const fileType = await parser.fromFile('example.kml');
|
|
22
|
+
console.log(fileType);
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
You can also use the XML detector outside file-type:
|
|
26
|
+
```js
|
|
27
|
+
import {XmlTextDetector} from 'index.js';
|
|
28
|
+
|
|
29
|
+
xmlTextDetector.write('<svg xmlns="http://www.w3.org/2000/svg"><path fill="#00CD9F"/></svg>');
|
|
30
|
+
const fileType = xmlTextDetector.fileType;
|
|
31
|
+
console.log(JSON.stringify(fileType)); // Outputs: {"ext":"svg","mime":"image/svg+xml"}
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Support file formats
|
|
35
|
+
|
|
36
|
+
- [XML](https://en.wikipedia.org/wiki/XML) (default for XML, unless more specific format was detected)
|
|
37
|
+
- [GML (Geography Markup Language)](https://en.wikipedia.org/wiki/Geography_Markup_Language)
|
|
38
|
+
- [KML (Keyhole Markup Language)](https://en.wikipedia.org/wiki/XHTML)
|
|
39
|
+
- [MusicXML, Uncompressed](https://en.wikipedia.org/wiki/MusicXML)
|
|
40
|
+
- [RSS (RDF Site Summary or Really Simple Syndication)](https://en.wikipedia.org/wiki/RSS)
|
|
41
|
+
- [SVG: (Scalable Vector Graphics)](https://en.wikipedia.org/wiki/SVG)
|
|
42
|
+
- [XHTML](https://en.wikipedia.org/wiki/XHTML)
|
package/lib/index.js
CHANGED
|
@@ -15,13 +15,17 @@ function startsWith(array, prefix) {
|
|
|
15
15
|
|
|
16
16
|
function isXml(array) {
|
|
17
17
|
if (startsWith(array,[60, 63, 120, 109, 108, 32])) {
|
|
18
|
-
return {xml: true, encoding: 'utf-8'}
|
|
18
|
+
return {xml: true, encoding: 'utf-8', offset: 0}
|
|
19
19
|
} else if (startsWith(array,[0xEF, 0xBB, 0xBF, 60, 63, 120, 109, 108, 32])) { // UTF-8 BOM
|
|
20
|
-
return {xml: true, encoding: 'utf-8'}
|
|
20
|
+
return {xml: true, encoding: 'utf-8', offset: 3}
|
|
21
21
|
} else if (startsWith(array,[0xFE, 0xFF, 0, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32 ])) {
|
|
22
|
-
return {xml: true, encoding: 'utf-16be'}
|
|
22
|
+
return {xml: true, encoding: 'utf-16be', offset: 2}
|
|
23
23
|
} else if (startsWith(array,[0xFF, 0xFE, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32, 0 ])) {
|
|
24
|
-
return {xml: true, encoding: 'utf-16le'}
|
|
24
|
+
return {xml: true, encoding: 'utf-16le', offset: 2}
|
|
25
|
+
} else if (startsWith(array,[0, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32 ])) {
|
|
26
|
+
return {xml: true, encoding: 'utf-16be', offset: 0}
|
|
27
|
+
} else if (startsWith(array,[60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32, 0 ])) {
|
|
28
|
+
return {xml: true, encoding: 'utf-16le', offset: 0}
|
|
25
29
|
}
|
|
26
30
|
return {xml: false, encoding: undefined}
|
|
27
31
|
}
|
|
@@ -58,7 +62,8 @@ const namespaceMapping = {
|
|
|
58
62
|
}
|
|
59
63
|
|
|
60
64
|
/**
|
|
61
|
-
* Maps the root element name to corresponding file-type
|
|
65
|
+
* Maps the root element name to corresponding file-type.
|
|
66
|
+
* Used for Non-namespaced XML
|
|
62
67
|
* @type {{rss: {ext: string, mime: string}}}
|
|
63
68
|
*/
|
|
64
69
|
const rootNameMapping = {
|
|
@@ -69,50 +74,81 @@ const rootNameMapping = {
|
|
|
69
74
|
'score-partwise': {
|
|
70
75
|
ext: 'musicxml',
|
|
71
76
|
mime: 'application/vnd.recordare.musicxml+xml',
|
|
72
|
-
}
|
|
77
|
+
},
|
|
78
|
+
svg: {
|
|
79
|
+
ext: 'svg',
|
|
80
|
+
mime: 'image/svg+xml',
|
|
81
|
+
},
|
|
73
82
|
}
|
|
74
83
|
|
|
75
|
-
export
|
|
76
|
-
|
|
77
|
-
const buffer = new Uint8Array(512);
|
|
78
|
-
|
|
79
|
-
// Increase sample size from 12 to 256.
|
|
80
|
-
await tokenizer.peekBuffer(buffer, {length: 128, mayBeLess: true});
|
|
81
|
-
|
|
82
|
-
const {xml, encoding} = isXml(buffer);
|
|
83
|
-
|
|
84
|
-
if (xml) {
|
|
85
|
-
let fileType;
|
|
86
|
-
|
|
87
|
-
const parser = sax.parser(true);
|
|
84
|
+
export class XmlTextDetector {
|
|
88
85
|
|
|
89
|
-
|
|
90
|
-
|
|
86
|
+
constructor(options) {
|
|
87
|
+
this.options = options ?? {};
|
|
88
|
+
this.firstTag = true;
|
|
89
|
+
this.onEnd = false;
|
|
90
|
+
this.parser = sax.parser(true);
|
|
91
|
+
this.nesting = 0;
|
|
91
92
|
|
|
92
|
-
parser.onerror = e => {
|
|
93
|
-
|
|
93
|
+
this.parser.onerror = e => {
|
|
94
|
+
if (e.message.startsWith('Invalid character entity')) { // Allow entity reference
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
this.fileType = undefined;
|
|
98
|
+
this.onEnd = true;
|
|
94
99
|
};
|
|
95
|
-
parser.onopentag = node => {
|
|
96
|
-
|
|
100
|
+
this.parser.onopentag = node => {
|
|
101
|
+
++this.nesting;
|
|
102
|
+
if (!this.firstTag || this.onEnd) {
|
|
97
103
|
return;
|
|
98
104
|
}
|
|
99
|
-
firstTag = false;
|
|
105
|
+
this.firstTag = false;
|
|
100
106
|
const nsNode = extractNsElement(node);
|
|
101
107
|
if (nsNode.ns) {
|
|
102
108
|
// Resolve file-type boot root element namespace
|
|
103
|
-
fileType = namespaceMapping[nsNode.ns];
|
|
109
|
+
this.fileType = namespaceMapping[nsNode.ns.toLowerCase()];
|
|
104
110
|
} else {
|
|
105
111
|
// Fall back on element name if there is no namespace
|
|
106
|
-
fileType = rootNameMapping[nsNode.name];
|
|
112
|
+
this.fileType = rootNameMapping[nsNode.name?.toLowerCase()];
|
|
107
113
|
}
|
|
108
114
|
|
|
109
|
-
if (fileType) {
|
|
110
|
-
onEnd = true;
|
|
115
|
+
if (this.fileType && !this.options.fullScan) {
|
|
116
|
+
this.onEnd = true;
|
|
111
117
|
}
|
|
112
118
|
};
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
119
|
+
|
|
120
|
+
this.parser.onclosetag = () => {
|
|
121
|
+
--this.nesting;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
write(text) {
|
|
126
|
+
this.parser.write(text);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
close() {
|
|
130
|
+
this.parser.close();
|
|
131
|
+
this.onEnd = true;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
isValid() {
|
|
135
|
+
return this.nesting === 0;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
export const detectXml = async tokenizer => {
|
|
140
|
+
|
|
141
|
+
const buffer = new Uint8Array(512);
|
|
142
|
+
|
|
143
|
+
// Increase sample size from 12 to 256.
|
|
144
|
+
await tokenizer.peekBuffer(buffer, {length: 128, mayBeLess: true});
|
|
145
|
+
|
|
146
|
+
const {xml, encoding, offset} = isXml(buffer);
|
|
147
|
+
|
|
148
|
+
if (xml) {
|
|
149
|
+
await tokenizer.ignore(offset);
|
|
150
|
+
|
|
151
|
+
const xmlTextDetector = new XmlTextDetector();
|
|
116
152
|
|
|
117
153
|
const textDecoder = new TextDecoder(encoding);
|
|
118
154
|
|
|
@@ -120,17 +156,17 @@ export const detectXml = async tokenizer => {
|
|
|
120
156
|
const len = await tokenizer.readBuffer(buffer, {mayBeLess: true});
|
|
121
157
|
const portion = buffer.subarray(0, len);
|
|
122
158
|
const text = textDecoder.decode(portion);
|
|
123
|
-
|
|
159
|
+
xmlTextDetector.write(text);
|
|
124
160
|
if (len < buffer.length) {
|
|
125
|
-
|
|
126
|
-
onEnd = true;
|
|
161
|
+
xmlTextDetector.close();
|
|
127
162
|
}
|
|
128
|
-
} while(!onEnd)
|
|
163
|
+
} while(!xmlTextDetector.onEnd)
|
|
129
164
|
|
|
130
|
-
return fileType ?? {
|
|
165
|
+
return xmlTextDetector.fileType ?? {
|
|
131
166
|
ext: 'xml',
|
|
132
167
|
mime: 'application/xml',
|
|
133
168
|
}
|
|
134
169
|
}
|
|
135
170
|
|
|
136
|
-
};
|
|
171
|
+
};
|
|
172
|
+
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@file-type/xml",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.1",
|
|
4
4
|
"description": "XML detection plugin",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"exports": "./lib/index.js",
|
|
@@ -9,13 +9,18 @@
|
|
|
9
9
|
},
|
|
10
10
|
"keywords": [
|
|
11
11
|
"file-type",
|
|
12
|
+
"detect",
|
|
12
13
|
"detection",
|
|
13
14
|
"detector",
|
|
14
15
|
"XML",
|
|
16
|
+
"signature",
|
|
17
|
+
"namespace",
|
|
15
18
|
"SVG",
|
|
16
19
|
"XHTML",
|
|
17
20
|
"RSS",
|
|
18
|
-
"KML"
|
|
21
|
+
"KML",
|
|
22
|
+
"GML",
|
|
23
|
+
"MusicXML"
|
|
19
24
|
],
|
|
20
25
|
"dependencies": {
|
|
21
26
|
"sax": "^1.4.1",
|