@file-type/xml 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/lib/index.d.ts +26 -0
- package/lib/index.js +133 -151
- package/package.json +55 -46
package/README.md
CHANGED
|
@@ -24,7 +24,7 @@ console.log(fileType);
|
|
|
24
24
|
|
|
25
25
|
You can also use the XML detector outside file-type:
|
|
26
26
|
```js
|
|
27
|
-
import {XmlTextDetector} from '
|
|
27
|
+
import {XmlTextDetector} from '@file-type/xml';
|
|
28
28
|
|
|
29
29
|
xmlTextDetector.write('<svg xmlns="http://www.w3.org/2000/svg"><path fill="#00CD9F"/></svg>');
|
|
30
30
|
const fileType = xmlTextDetector.fileType;
|
package/lib/index.d.ts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { ITokenizer } from 'strtok3';
|
|
2
|
+
/**
|
|
3
|
+
* Temporary, should be taken from `file-type`
|
|
4
|
+
*/
|
|
5
|
+
interface FileTypeResult {
|
|
6
|
+
ext: string;
|
|
7
|
+
mime: string;
|
|
8
|
+
}
|
|
9
|
+
export type Detector = (tokenizer: ITokenizer, fileType?: FileTypeResult) => Promise<FileTypeResult | undefined>;
|
|
10
|
+
interface IXmlTextDetectorOptions {
|
|
11
|
+
fullScan?: boolean;
|
|
12
|
+
}
|
|
13
|
+
export declare class XmlTextDetector {
|
|
14
|
+
private options;
|
|
15
|
+
private firstTag;
|
|
16
|
+
private parser;
|
|
17
|
+
private nesting;
|
|
18
|
+
onEnd: boolean;
|
|
19
|
+
fileType?: FileTypeResult;
|
|
20
|
+
constructor(options?: IXmlTextDetectorOptions);
|
|
21
|
+
write(text: string): void;
|
|
22
|
+
close(): void;
|
|
23
|
+
isValid(): boolean;
|
|
24
|
+
}
|
|
25
|
+
export declare const detectXml: Detector;
|
|
26
|
+
export {};
|
package/lib/index.js
CHANGED
|
@@ -1,172 +1,154 @@
|
|
|
1
|
-
|
|
2
1
|
import sax from 'sax';
|
|
3
|
-
|
|
4
2
|
function startsWith(array, prefix) {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
3
|
+
if (prefix.length > array.length) {
|
|
4
|
+
return false;
|
|
5
|
+
}
|
|
6
|
+
for (let i = 0; i < prefix.length; i++) {
|
|
7
|
+
if (array[i] !== prefix[i]) {
|
|
8
|
+
return false;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
return true;
|
|
14
12
|
}
|
|
15
|
-
|
|
16
13
|
function isXml(array) {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
14
|
+
if (startsWith(array, [60, 63, 120, 109, 108, 32])) {
|
|
15
|
+
return { xml: true, encoding: 'utf-8', offset: 0 };
|
|
16
|
+
}
|
|
17
|
+
else if (startsWith(array, [0xEF, 0xBB, 0xBF, 60, 63, 120, 109, 108, 32])) { // UTF-8 BOM
|
|
18
|
+
return { xml: true, encoding: 'utf-8', offset: 3 };
|
|
19
|
+
}
|
|
20
|
+
else if (startsWith(array, [0xFE, 0xFF, 0, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32])) {
|
|
21
|
+
return { xml: true, encoding: 'utf-16be', offset: 2 };
|
|
22
|
+
}
|
|
23
|
+
else if (startsWith(array, [0xFF, 0xFE, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32, 0])) {
|
|
24
|
+
return { xml: true, encoding: 'utf-16le', offset: 2 };
|
|
25
|
+
}
|
|
26
|
+
else if (startsWith(array, [0, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32])) {
|
|
27
|
+
return { xml: true, encoding: 'utf-16be', offset: 0 };
|
|
28
|
+
}
|
|
29
|
+
else if (startsWith(array, [60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32, 0])) {
|
|
30
|
+
return { xml: true, encoding: 'utf-16le', offset: 0 };
|
|
31
|
+
}
|
|
32
|
+
return { xml: false };
|
|
31
33
|
}
|
|
32
|
-
|
|
33
34
|
function extractNsElement(node) {
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
35
|
+
const parts = node.name.split(':');
|
|
36
|
+
if (parts.length === 1) {
|
|
37
|
+
return { name: parts[0], ns: node.attributes['xmlns'] };
|
|
38
|
+
}
|
|
39
|
+
else if (parts.length === 2) {
|
|
40
|
+
return { name: parts[1], ns: node.attributes[`xmlns:${parts[0]}`] };
|
|
41
|
+
}
|
|
40
42
|
}
|
|
41
|
-
|
|
42
43
|
/**
|
|
43
44
|
* Maps the root element namespace to corresponding file-type
|
|
44
45
|
*/
|
|
45
46
|
const namespaceMapping = {
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
}
|
|
63
|
-
|
|
47
|
+
'http://www.w3.org/2000/svg': {
|
|
48
|
+
ext: 'svg',
|
|
49
|
+
mime: 'image/svg+xml'
|
|
50
|
+
},
|
|
51
|
+
'http://www.w3.org/1999/xhtml': {
|
|
52
|
+
ext: 'xhtml',
|
|
53
|
+
mime: 'application/xhtml+xml'
|
|
54
|
+
},
|
|
55
|
+
'http://www.opengis.net/kml/2.2': {
|
|
56
|
+
ext: 'kml',
|
|
57
|
+
mime: 'application/vnd.google-earth.kml+xml'
|
|
58
|
+
},
|
|
59
|
+
'http://www.opengis.net/gml': {
|
|
60
|
+
ext: 'gml',
|
|
61
|
+
mime: 'application/gml+xml'
|
|
62
|
+
}
|
|
63
|
+
};
|
|
64
64
|
/**
|
|
65
65
|
* Maps the root element name to corresponding file-type.
|
|
66
66
|
* Used for Non-namespaced XML
|
|
67
67
|
* @type {{rss: {ext: string, mime: string}}}
|
|
68
68
|
*/
|
|
69
69
|
const rootNameMapping = {
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
}
|
|
83
|
-
|
|
70
|
+
rss: {
|
|
71
|
+
ext: 'rss',
|
|
72
|
+
mime: 'application/rss+xml'
|
|
73
|
+
},
|
|
74
|
+
'score-partwise': {
|
|
75
|
+
ext: 'musicxml',
|
|
76
|
+
mime: 'application/vnd.recordare.musicxml+xml'
|
|
77
|
+
},
|
|
78
|
+
svg: {
|
|
79
|
+
ext: 'svg',
|
|
80
|
+
mime: 'image/svg+xml'
|
|
81
|
+
}
|
|
82
|
+
};
|
|
84
83
|
export class XmlTextDetector {
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
84
|
+
constructor(options) {
|
|
85
|
+
this.options = options ?? {};
|
|
86
|
+
this.firstTag = true;
|
|
87
|
+
this.onEnd = false;
|
|
88
|
+
this.parser = sax.parser(true);
|
|
89
|
+
this.nesting = 0;
|
|
90
|
+
this.parser.onerror = e => {
|
|
91
|
+
if (e.message.startsWith('Invalid character entity')) { // Allow entity reference
|
|
92
|
+
return;
|
|
93
|
+
}
|
|
94
|
+
this.fileType = undefined;
|
|
95
|
+
this.onEnd = true;
|
|
96
|
+
};
|
|
97
|
+
this.parser.onopentag = node => {
|
|
98
|
+
++this.nesting;
|
|
99
|
+
if (!this.firstTag || this.onEnd) {
|
|
100
|
+
return;
|
|
101
|
+
}
|
|
102
|
+
this.firstTag = false;
|
|
103
|
+
const nsNode = extractNsElement(node);
|
|
104
|
+
if (nsNode?.ns) {
|
|
105
|
+
// Resolve file-type boot root element namespace
|
|
106
|
+
this.fileType = namespaceMapping[nsNode.ns.toLowerCase()];
|
|
107
|
+
}
|
|
108
|
+
else if (nsNode && nsNode.name) {
|
|
109
|
+
// Fall back on element name if there is no namespace
|
|
110
|
+
this.fileType = rootNameMapping[nsNode.name.toLowerCase()];
|
|
111
|
+
}
|
|
112
|
+
if (this.fileType && !this.options.fullScan) {
|
|
113
|
+
this.onEnd = true;
|
|
114
|
+
}
|
|
115
|
+
};
|
|
116
|
+
this.parser.onclosetag = () => {
|
|
117
|
+
--this.nesting;
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
write(text) {
|
|
121
|
+
this.parser.write(text);
|
|
122
|
+
}
|
|
123
|
+
close() {
|
|
124
|
+
this.parser.close();
|
|
116
125
|
this.onEnd = true;
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
--this.nesting;
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
write(text) {
|
|
126
|
-
this.parser.write(text);
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
close() {
|
|
130
|
-
this.parser.close();
|
|
131
|
-
this.onEnd = true;
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
isValid() {
|
|
135
|
-
return this.nesting === 0;
|
|
136
|
-
}
|
|
126
|
+
}
|
|
127
|
+
isValid() {
|
|
128
|
+
return this.nesting === 0;
|
|
129
|
+
}
|
|
137
130
|
}
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
xmlTextDetector.close();
|
|
162
|
-
}
|
|
163
|
-
} while(!xmlTextDetector.onEnd)
|
|
164
|
-
|
|
165
|
-
return xmlTextDetector.fileType ?? {
|
|
166
|
-
ext: 'xml',
|
|
167
|
-
mime: 'application/xml',
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
|
|
131
|
+
export const detectXml = async (tokenizer) => {
|
|
132
|
+
const buffer = new Uint8Array(512);
|
|
133
|
+
// Increase sample size from 12 to 256.
|
|
134
|
+
await tokenizer.peekBuffer(buffer, { length: 128, mayBeLess: true });
|
|
135
|
+
const xmlDetection = isXml(buffer);
|
|
136
|
+
if (xmlDetection.xml) {
|
|
137
|
+
await tokenizer.ignore(xmlDetection.offset);
|
|
138
|
+
const xmlTextDetector = new XmlTextDetector();
|
|
139
|
+
const textDecoder = new TextDecoder(xmlDetection.encoding);
|
|
140
|
+
do {
|
|
141
|
+
const len = await tokenizer.readBuffer(buffer, { mayBeLess: true });
|
|
142
|
+
const portion = buffer.subarray(0, len);
|
|
143
|
+
const text = textDecoder.decode(portion);
|
|
144
|
+
xmlTextDetector.write(text);
|
|
145
|
+
if (len < buffer.length) {
|
|
146
|
+
xmlTextDetector.close();
|
|
147
|
+
}
|
|
148
|
+
} while (!xmlTextDetector.onEnd);
|
|
149
|
+
return xmlTextDetector.fileType ?? {
|
|
150
|
+
ext: 'xml',
|
|
151
|
+
mime: 'application/xml'
|
|
152
|
+
};
|
|
153
|
+
}
|
|
171
154
|
};
|
|
172
|
-
|
package/package.json
CHANGED
|
@@ -1,48 +1,57 @@
|
|
|
1
1
|
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
2
|
+
"name": "@file-type/xml",
|
|
3
|
+
"version": "0.3.0",
|
|
4
|
+
"description": "XML detection plugin",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"types": "./lib/index.d.ts",
|
|
7
|
+
"exports": "./lib/index.js",
|
|
8
|
+
"scripts": {
|
|
9
|
+
"clean": "del-cli 'lib/**/*.js' 'lib/**/*.js.map' 'lib/**/*.d.ts' 'src/**/*.d.ts'",
|
|
10
|
+
"compile-src": "tsc -p lib",
|
|
11
|
+
"compile": "yarn run compile-src",
|
|
12
|
+
"build": "yarn run clean && yarn compile",
|
|
13
|
+
"test": "mocha"
|
|
14
|
+
},
|
|
15
|
+
"keywords": [
|
|
16
|
+
"file-type",
|
|
17
|
+
"detect",
|
|
18
|
+
"detection",
|
|
19
|
+
"detector",
|
|
20
|
+
"XML",
|
|
21
|
+
"signature",
|
|
22
|
+
"namespace",
|
|
23
|
+
"SVG",
|
|
24
|
+
"XHTML",
|
|
25
|
+
"RSS",
|
|
26
|
+
"KML",
|
|
27
|
+
"GML",
|
|
28
|
+
"MusicXML"
|
|
29
|
+
],
|
|
30
|
+
"dependencies": {
|
|
31
|
+
"sax": "^1.4.1",
|
|
32
|
+
"strtok3": "^10.0.1"
|
|
33
|
+
},
|
|
34
|
+
"devDependencies": {
|
|
35
|
+
"@types/sax": "^1.2.7",
|
|
36
|
+
"chai": "^5.1.2",
|
|
37
|
+
"del-cli": "^6.0.0",
|
|
38
|
+
"file-type": "^19.6.0",
|
|
39
|
+
"mocha": "^11.0.1",
|
|
40
|
+
"typescript": "^5.7.2"
|
|
41
|
+
},
|
|
42
|
+
"files": [
|
|
43
|
+
"lib/**/*.js",
|
|
44
|
+
"lib/**/*.d.ts",
|
|
45
|
+
"lib/*.cjs"
|
|
46
|
+
],
|
|
47
|
+
"author": {
|
|
48
|
+
"name": "Borewit",
|
|
49
|
+
"url": "https://github.com/Borewit"
|
|
50
|
+
},
|
|
51
|
+
"repository": {
|
|
52
|
+
"type": "git",
|
|
53
|
+
"url": "git+https://github.com/Borewit/file-type-xml.git"
|
|
54
|
+
},
|
|
55
|
+
"license": "MIT",
|
|
56
|
+
"packageManager": "yarn@4.5.3"
|
|
48
57
|
}
|