@file-type/xml 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +25 -0
  2. package/lib/index.js +136 -0
  3. package/package.json +43 -0
package/README.md ADDED
@@ -0,0 +1,25 @@
1
+ [![NPM version](https://img.shields.io/npm/v/@file-type/xml.svg)](https://npmjs.org/package/@file-type/xml)
2
+ [![Node.js CI](https://github.com/Borewit/file-type-xml/actions/workflows/nodejs-ci.yml/badge.svg)](https://github.com/Borewit/file-type-xml/actions/workflows/nodejs-ci.yml)
3
+ # @file-type/xml
4
+
5
+ Detector plugin for [file-type](https://github.com/sindresorhus/file-type) for XML files.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ npm install @file-type/xml
11
+ ```
12
+
13
+ ### Usage
14
+
15
+ The following example shows how add the XML detector to [file-type](https://github.com/sindresorhus/file-type).
16
+ ```js
17
+ import {NodeFileTypeParser} from 'file-type';
18
+ import {detectXml} from '@file-type/xml';
19
+
20
+ const parser = new NodeFileTypeParser({customDetectors: [detectXml]});
21
+ const fileType = await parser.fromFile('example.kml');
22
+ console.log(fileType);
23
+ ```
24
+
25
+
package/lib/index.js ADDED
@@ -0,0 +1,136 @@
1
+
2
+ import sax from 'sax';
3
+
4
+ function startsWith(array, prefix) {
5
+ if (prefix.length > array.length) {
6
+ return false;
7
+ }
8
+ for (let i = 0; i < prefix.length; i++) {
9
+ if (array[i] !== prefix[i]) {
10
+ return false;
11
+ }
12
+ }
13
+ return true;
14
+ }
15
+
16
+ function isXml(array) {
17
+ if (startsWith(array,[60, 63, 120, 109, 108, 32])) {
18
+ return {xml: true, encoding: 'utf-8'}
19
+ } else if (startsWith(array,[0xEF, 0xBB, 0xBF, 60, 63, 120, 109, 108, 32])) { // UTF-8 BOM
20
+ return {xml: true, encoding: 'utf-8'}
21
+ } else if (startsWith(array,[0xFE, 0xFF, 0, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32 ])) {
22
+ return {xml: true, encoding: 'utf-16be'}
23
+ } else if (startsWith(array,[0xFF, 0xFE, 60, 0, 63, 0, 120, 0, 109, 0, 108, 0, 32, 0 ])) {
24
+ return {xml: true, encoding: 'utf-16le'}
25
+ }
26
+ return {xml: false, encoding: undefined}
27
+ }
28
+
29
+ function extractNsElement(node) {
30
+ const parts = node.name.split(':');
31
+ if(parts.length === 1) {
32
+ return {name: parts[0], ns: node.attributes['xmlns']};
33
+ } else if (parts.length === 2) {
34
+ return {name: parts[1], ns: node.attributes[`xmlns:${parts[0]}`]};
35
+ }
36
+ }
37
+
38
+ /**
39
+ * Maps the root element namespace to corresponding file-type
40
+ */
41
+ const namespaceMapping = {
42
+ 'http://www.w3.org/2000/svg': {
43
+ ext: 'svg',
44
+ mime: 'image/svg+xml',
45
+ },
46
+ 'http://www.w3.org/1999/xhtml': {
47
+ ext: 'xhtml',
48
+ mime: 'application/xhtml+xml',
49
+ },
50
+ 'http://www.opengis.net/kml/2.2': {
51
+ ext: 'kml',
52
+ mime: 'application/vnd.google-earth.kml+xml',
53
+ },
54
+ 'http://www.opengis.net/gml': {
55
+ ext: 'gml',
56
+ mime: 'application/gml+xml',
57
+ }
58
+ }
59
+
60
+ /**
61
+ * Maps the root element name to corresponding file-type
62
+ * @type {{rss: {ext: string, mime: string}}}
63
+ */
64
+ const rootNameMapping = {
65
+ rss: {
66
+ ext: 'rss',
67
+ mime: 'application/rss+xml',
68
+ },
69
+ 'score-partwise': {
70
+ ext: 'musicxml',
71
+ mime: 'application/vnd.recordare.musicxml+xml',
72
+ }
73
+ }
74
+
75
+ export const detectXml = async tokenizer => {
76
+
77
+ const buffer = new Uint8Array(512);
78
+
79
+ // Increase sample size from 12 to 256.
80
+ await tokenizer.peekBuffer(buffer, {length: 128, mayBeLess: true});
81
+
82
+ const {xml, encoding} = isXml(buffer);
83
+
84
+ if (xml) {
85
+ let fileType;
86
+
87
+ const parser = sax.parser(true);
88
+
89
+ let firstTag = true;
90
+ let onEnd = false;
91
+
92
+ parser.onerror = e => {
93
+ onEnd = true;
94
+ };
95
+ parser.onopentag = node => {
96
+ if (!firstTag) {
97
+ return;
98
+ }
99
+ firstTag = false;
100
+ const nsNode = extractNsElement(node);
101
+ if (nsNode.ns) {
102
+ // Resolve file-type boot root element namespace
103
+ fileType = namespaceMapping[nsNode.ns];
104
+ } else {
105
+ // Fall back on element name if there is no namespace
106
+ fileType = rootNameMapping[nsNode.name];
107
+ }
108
+
109
+ if (fileType) {
110
+ onEnd = true;
111
+ }
112
+ };
113
+ parser.onend = () => {
114
+ onEnd = true;
115
+ };
116
+
117
+ const textDecoder = new TextDecoder(encoding);
118
+
119
+ do {
120
+ const len = await tokenizer.readBuffer(buffer, {mayBeLess: true});
121
+ const portion = buffer.subarray(0, len);
122
+ const text = textDecoder.decode(portion);
123
+ parser.write(text);
124
+ if (len < buffer.length) {
125
+ parser.close();
126
+ onEnd = true;
127
+ }
128
+ } while(!onEnd)
129
+
130
+ return fileType ?? {
131
+ ext: 'xml',
132
+ mime: 'application/xml',
133
+ }
134
+ }
135
+
136
+ };
package/package.json ADDED
@@ -0,0 +1,43 @@
1
+ {
2
+ "name": "@file-type/xml",
3
+ "version": "0.1.1",
4
+ "description": "XML detection plugin",
5
+ "type": "module",
6
+ "exports": "./lib/index.js",
7
+ "scripts": {
8
+ "test": "mocha"
9
+ },
10
+ "keywords": [
11
+ "file-type",
12
+ "detection",
13
+ "detector",
14
+ "XML",
15
+ "SVG",
16
+ "XHTML",
17
+ "RSS",
18
+ "KML"
19
+ ],
20
+ "dependencies": {
21
+ "sax": "^1.4.1",
22
+ "strtok3": "^10.0.1"
23
+ },
24
+ "devDependencies": {
25
+ "chai": "^5.1.2",
26
+ "mocha": "^11.0.1"
27
+ },
28
+ "files": [
29
+ "lib/**/*.js",
30
+ "lib/**/*.d.ts",
31
+ "lib/*.cjs"
32
+ ],
33
+ "author": {
34
+ "name": "Borewit",
35
+ "url": "https://github.com/Borewit"
36
+ },
37
+ "repository": {
38
+ "type": "git",
39
+ "url": "git+https://github.com/Borewit/file-type-xml.git"
40
+ },
41
+ "license": "MIT",
42
+ "packageManager": "yarn@4.5.3"
43
+ }