music-metadata 10.1.0 → 10.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,8 @@
1
- import { Float32_BE, Float64_BE, StringType, UINT8 } from 'token-types';
2
1
  import initDebug from 'debug';
3
- import { EndOfStreamError } from 'strtok3';
4
2
  import { BasicParser } from '../common/BasicParser.js';
5
- import * as matroskaDtd from './MatroskaDtd.js';
6
- import { DataType, TargetType, TrackType } from './types.js';
7
- import * as Token from 'token-types';
3
+ import { matroskaDtd } from './MatroskaDtd.js';
4
+ import { TargetType, TrackType } from './types.js';
5
+ import { EbmlIterator, ParseAction } from '../ebml/EbmlIterator.js';
8
6
  const debug = initDebug('music-metadata:parser:matroska');
9
7
  /**
10
8
  * Extensible Binary Meta Language (EBML) parser
@@ -15,17 +13,13 @@ const debug = initDebug('music-metadata:parser:matroska');
15
13
  */
16
14
  export class MatroskaParser extends BasicParser {
17
15
  constructor() {
18
- super();
19
- this.padding = 0;
20
- this.parserMap = new Map();
21
- this.ebmlMaxIDLength = 4;
22
- this.ebmlMaxSizeLength = 8;
23
- this.parserMap.set(DataType.uint, e => this.readUint(e));
24
- this.parserMap.set(DataType.string, e => this.readString(e));
25
- this.parserMap.set(DataType.binary, e => this.readBuffer(e));
26
- this.parserMap.set(DataType.uid, async (e) => this.readBuffer(e));
27
- this.parserMap.set(DataType.bool, e => this.readFlag(e));
28
- this.parserMap.set(DataType.float, e => this.readFloat(e));
16
+ super(...arguments);
17
+ this.seekHeadOffset = 0;
18
+ /**
19
+ * Use index to skip multiple segment/cluster elements at once.
20
+ * Significant performance impact
21
+ */
22
+ this.flagUseIndexToSkipClusters = false;
29
23
  }
30
24
  /**
31
25
  * Initialize parser with output (metadata), input (tokenizer) & parsing options (options).
@@ -35,206 +29,127 @@ export class MatroskaParser extends BasicParser {
35
29
  */
36
30
  init(metadata, tokenizer, options) {
37
31
  super.init(metadata, tokenizer, options);
32
+ this.flagUseIndexToSkipClusters = options.mkvUseIndex ?? false;
38
33
  return this;
39
34
  }
40
35
  async parse() {
41
- var _a;
42
- const containerSize = (_a = this.tokenizer.fileInfo.size) !== null && _a !== void 0 ? _a : Number.MAX_SAFE_INTEGER;
43
- const matroska = await this.parseContainer(matroskaDtd.elements, containerSize, []);
44
- this.metadata.setFormat('container', `EBML/${matroska.ebml.docType}`);
45
- if (matroska.segment) {
46
- const info = matroska.segment.info;
47
- if (info) {
48
- const timecodeScale = info.timecodeScale ? info.timecodeScale : 1000000;
49
- if (typeof info.duration === 'number') {
50
- const duration = info.duration * timecodeScale / 1000000000;
51
- await this.addTag('segment:title', info.title);
52
- this.metadata.setFormat('duration', Number(duration));
53
- }
54
- }
55
- const audioTracks = matroska.segment.tracks;
56
- if (audioTracks === null || audioTracks === void 0 ? void 0 : audioTracks.entries) {
57
- audioTracks.entries.forEach(entry => {
58
- const stream = {
59
- codecName: entry.codecID.replace('A_', '').replace('V_', ''),
60
- codecSettings: entry.codecSettings,
61
- flagDefault: entry.flagDefault,
62
- flagLacing: entry.flagLacing,
63
- flagEnabled: entry.flagEnabled,
64
- language: entry.language,
65
- name: entry.name,
66
- type: entry.trackType,
67
- audio: entry.audio,
68
- video: entry.video
69
- };
70
- this.metadata.addStreamInfo(stream);
71
- });
72
- const audioTrack = audioTracks.entries
73
- .filter(entry => entry.trackType === TrackType.audio)
74
- .reduce((acc, cur) => {
75
- if (!acc)
76
- return cur;
77
- if (cur.flagDefault && !acc.flagDefault)
78
- return cur;
79
- if (cur.trackNumber < acc.trackNumber)
80
- return cur;
81
- return acc;
82
- }, null);
83
- if (audioTrack) {
84
- this.metadata.setFormat('codec', audioTrack.codecID.replace('A_', ''));
85
- this.metadata.setFormat('sampleRate', audioTrack.audio.samplingFrequency);
86
- this.metadata.setFormat('numberOfChannels', audioTrack.audio.channels);
87
- }
88
- if (matroska.segment.tags) {
89
- await Promise.all(matroska.segment.tags.tag.map(async (tag) => {
90
- const target = tag.target;
91
- const targetType = (target === null || target === void 0 ? void 0 : target.targetTypeValue) ? TargetType[target.targetTypeValue] : ((target === null || target === void 0 ? void 0 : target.targetType) ? target.targetType : 'track');
92
- await Promise.all(tag.simpleTags.map(async (simpleTag) => {
93
- const value = simpleTag.string ? simpleTag.string : simpleTag.binary;
94
- await this.addTag(`${targetType}:${simpleTag.name}`, value);
95
- }));
96
- }));
97
- }
98
- if (matroska.segment.attachments) {
99
- await Promise.all(matroska.segment.attachments.attachedFiles
100
- .filter(file => file.mimeType.startsWith('image/'))
101
- .map(file => this.addTag('picture', {
102
- data: file.data,
103
- format: file.mimeType,
104
- description: file.description,
105
- name: file.name
106
- })));
107
- }
108
- }
109
- }
110
- }
111
- async parseContainer(container, posDone, path) {
112
- const tree = {};
113
- while (this.tokenizer.position < posDone) {
114
- let element;
115
- try {
116
- element = await this.readElement();
117
- }
118
- catch (error) {
119
- if (error instanceof EndOfStreamError) {
120
- break;
121
- }
122
- throw error;
123
- }
124
- const type = container[element.id];
125
- if (type) {
126
- debug(`Element: name=${type.name}, container=${!!type.container}`);
127
- if (type.container) {
128
- const res = await this.parseContainer(type.container, element.len >= 0 ? this.tokenizer.position + element.len : -1, path.concat([type.name]));
129
- if (type.multiple) {
130
- if (!tree[type.name]) {
131
- tree[type.name] = [];
36
+ const containerSize = this.tokenizer.fileInfo.size ?? Number.MAX_SAFE_INTEGER;
37
+ const matroskaIterator = new EbmlIterator(this.tokenizer);
38
+ debug('Initializing DTD end MatroskaIterator');
39
+ await matroskaIterator.iterate(matroskaDtd, containerSize, {
40
+ startNext: (element) => {
41
+ switch (element.id) {
42
+ // case 0x1f43b675: // cluster
43
+ case 0x1c53bb6b: // Cueing Data
44
+ debug(`Skip element: name=${element.name}, id=0x${element.id.toString(16)}`);
45
+ return ParseAction.IgnoreElement;
46
+ case 0x1f43b675: // cluster
47
+ if (this.flagUseIndexToSkipClusters && this.seekHead) {
48
+ const index = this.seekHead.seek.find(index => index.position + this.seekHeadOffset > this.tokenizer.position);
49
+ if (index) {
50
+ // Go to next index position
51
+ const ignoreSize = index.position + this.seekHeadOffset - this.tokenizer.position;
52
+ debug(`Use index to go to next position, ignoring ${ignoreSize} bytes`);
53
+ this.tokenizer.ignore(ignoreSize);
54
+ return ParseAction.SkipElement;
55
+ }
132
56
  }
133
- tree[type.name].push(res);
134
- }
135
- else {
136
- tree[type.name] = res;
137
- }
138
- }
139
- else {
140
- const parser = this.parserMap.get(type.value);
141
- if (typeof parser === 'function') {
142
- tree[type.name] = await parser(element);
143
- }
57
+ return ParseAction.IgnoreElement;
58
+ default:
59
+ return ParseAction.ReadNext;
144
60
  }
145
- }
146
- else {
61
+ },
62
+ elementValue: async (element, value, offset) => {
63
+ debug(`Received: name=${element.name}, value=${value}`);
147
64
  switch (element.id) {
148
- case 0xec: // void
149
- this.padding += element.len;
150
- await this.tokenizer.ignore(element.len);
65
+ case 0x4282: // docType
66
+ this.metadata.setFormat('container', `EBML/${value}`);
67
+ break;
68
+ case 0x114d9b74:
69
+ this.seekHead = value;
70
+ this.seekHeadOffset = offset;
71
+ break;
72
+ case 0x1549a966:
73
+ { // Info (Segment Information)
74
+ const info = value;
75
+ const timecodeScale = info.timecodeScale ? info.timecodeScale : 1000000;
76
+ if (typeof info.duration === 'number') {
77
+ const duration = info.duration * timecodeScale / 1000000000;
78
+ await this.addTag('segment:title', info.title);
79
+ this.metadata.setFormat('duration', Number(duration));
80
+ }
81
+ }
82
+ break;
83
+ case 0x1654ae6b:
84
+ { // tracks
85
+ const audioTracks = value;
86
+ if (audioTracks?.entries) {
87
+ audioTracks.entries.forEach(entry => {
88
+ const stream = {
89
+ codecName: entry.codecID.replace('A_', '').replace('V_', ''),
90
+ codecSettings: entry.codecSettings,
91
+ flagDefault: entry.flagDefault,
92
+ flagLacing: entry.flagLacing,
93
+ flagEnabled: entry.flagEnabled,
94
+ language: entry.language,
95
+ name: entry.name,
96
+ type: entry.trackType,
97
+ audio: entry.audio,
98
+ video: entry.video
99
+ };
100
+ this.metadata.addStreamInfo(stream);
101
+ });
102
+ const audioTrack = audioTracks.entries
103
+ .filter(entry => entry.trackType === TrackType.audio)
104
+ .reduce((acc, cur) => {
105
+ if (!acc)
106
+ return cur;
107
+ if (cur.flagDefault && !acc.flagDefault)
108
+ return cur;
109
+ if (cur.trackNumber < acc.trackNumber)
110
+ return cur;
111
+ return acc;
112
+ }, null);
113
+ if (audioTrack) {
114
+ this.metadata.setFormat('codec', audioTrack.codecID.replace('A_', ''));
115
+ this.metadata.setFormat('sampleRate', audioTrack.audio.samplingFrequency);
116
+ this.metadata.setFormat('numberOfChannels', audioTrack.audio.channels);
117
+ }
118
+ }
119
+ }
120
+ break;
121
+ case 0x1254c367:
122
+ { // tags
123
+ const tags = value;
124
+ await Promise.all(tags.tag.map(async (tag) => {
125
+ const target = tag.target;
126
+ const targetType = target?.targetTypeValue ? TargetType[target.targetTypeValue] : (target?.targetType ? target.targetType : 'track');
127
+ await Promise.all(tag.simpleTags.map(async (simpleTag) => {
128
+ const value = simpleTag.string ? simpleTag.string : simpleTag.binary;
129
+ await this.addTag(`${targetType}:${simpleTag.name}`, value);
130
+ }));
131
+ }));
132
+ }
133
+ break;
134
+ case 0x1941a469:
135
+ { // attachments
136
+ const attachments = value;
137
+ await Promise.all(attachments.attachedFiles
138
+ .filter(file => file.mimeType.startsWith('image/'))
139
+ .map(file => this.addTag('picture', {
140
+ data: file.data,
141
+ format: file.mimeType,
142
+ description: file.description,
143
+ name: file.name
144
+ })));
145
+ }
151
146
  break;
152
- default:
153
- debug(`parseEbml: path=${path.join('/')}, unknown element: id=${element.id.toString(16)}`);
154
- this.padding += element.len;
155
- await this.tokenizer.ignore(element.len);
156
147
  }
157
148
  }
158
- }
159
- return tree;
160
- }
161
- async readVintData(maxLength) {
162
- const msb = await this.tokenizer.peekNumber(UINT8);
163
- let mask = 0x80;
164
- let oc = 1;
165
- // Calculate VINT_WIDTH
166
- while ((msb & mask) === 0) {
167
- if (oc > maxLength) {
168
- throw new Error('VINT value exceeding maximum size');
169
- }
170
- ++oc;
171
- mask >>= 1;
172
- }
173
- const id = new Uint8Array(oc);
174
- await this.tokenizer.readBuffer(id);
175
- return id;
176
- }
177
- async readElement() {
178
- const id = await this.readVintData(this.ebmlMaxIDLength);
179
- const lenField = await this.readVintData(this.ebmlMaxSizeLength);
180
- lenField[0] ^= 0x80 >> (lenField.length - 1);
181
- return {
182
- id: MatroskaParser.readUIntBE(id, id.length),
183
- len: MatroskaParser.readUIntBE(lenField, lenField.length)
184
- };
185
- }
186
- async readFloat(e) {
187
- switch (e.len) {
188
- case 0:
189
- return 0.0;
190
- case 4:
191
- return this.tokenizer.readNumber(Float32_BE);
192
- case 8:
193
- return this.tokenizer.readNumber(Float64_BE);
194
- case 10:
195
- return this.tokenizer.readNumber(Float64_BE);
196
- default:
197
- throw new Error(`Invalid IEEE-754 float length: ${e.len}`);
198
- }
199
- }
200
- async readFlag(e) {
201
- return (await this.readUint(e)) === 1;
202
- }
203
- async readUint(e) {
204
- const buf = await this.readBuffer(e);
205
- return MatroskaParser.readUIntBE(buf, e.len);
206
- }
207
- async readString(e) {
208
- const rawString = await this.tokenizer.readToken(new StringType(e.len, 'utf-8'));
209
- return rawString.replace(/\x00.*$/g, '');
210
- }
211
- async readBuffer(e) {
212
- const buf = new Uint8Array(e.len);
213
- await this.tokenizer.readBuffer(buf);
214
- return buf;
149
+ });
215
150
  }
216
151
  async addTag(tagId, value) {
217
152
  await this.metadata.addTag('matroska', tagId, value);
218
153
  }
219
- static readUIntBE(buf, len) {
220
- return Number(MatroskaParser.readUIntBeAsBigInt(buf, len));
221
- }
222
- /**
223
- * Reeds an unsigned integer from a big endian buffer of length `len`
224
- * @param buf Buffer to decode from
225
- * @param len Number of bytes
226
- * @private
227
- */
228
- static readUIntBeAsBigInt(buf, len) {
229
- const normalizedNumber = new Uint8Array(8);
230
- const cleanNumber = buf.subarray(0, len);
231
- try {
232
- normalizedNumber.set(cleanNumber, 8 - len);
233
- return Token.UINT64_BE.get(normalizedNumber, 0);
234
- }
235
- catch (error) {
236
- return BigInt(-1);
237
- }
238
- }
239
154
  }
240
155
  //# sourceMappingURL=MatroskaParser.js.map
@@ -1,34 +1,10 @@
1
- export interface IHeader {
2
- id: number;
3
- len: number;
4
- }
5
- export declare enum DataType {
6
- 'string' = 0,
7
- uint = 1,
8
- uid = 2,
9
- bool = 3,
10
- binary = 4,
11
- float = 5
12
- }
13
- export interface IElementType<T> {
14
- readonly name: string;
15
- readonly value?: DataType;
16
- readonly container?: IContainerType;
17
- readonly multiple?: boolean;
18
- }
19
- export interface IContainerType {
20
- [id: number]: IElementType<string | number | boolean | Uint8Array>;
21
- }
22
- export interface ITree {
23
- [name: string]: string | number | boolean | Uint8Array | ITree | ITree[];
24
- }
25
- export type ValueType = string | number | Uint8Array | boolean | ITree | ITree[];
26
- export interface ISeekHead {
27
- id?: Uint8Array;
28
- position?: number;
1
+ import type { IEbmlDoc } from '../ebml/types.js';
2
+ export interface ISeek {
3
+ id: Uint8Array;
4
+ position: number;
29
5
  }
30
- export interface IMetaSeekInformation {
31
- seekHeads: ISeekHead[];
6
+ export interface ISeekHead {
7
+ seek: ISeek[];
32
8
  }
33
9
  export interface ISegmentInformation {
34
10
  uid?: Uint8Array;
@@ -151,26 +127,14 @@ export interface IAttachments {
151
127
  attachedFiles: IAttachmedFile[];
152
128
  }
153
129
  export interface IMatroskaSegment {
154
- metaSeekInfo?: IMetaSeekInformation;
155
- seekHeads?: ISeekHead[];
130
+ metaSeekInfo?: ISeekHead;
131
+ seekHeads?: ISeek[];
156
132
  info?: ISegmentInformation;
157
133
  tracks?: ITrackElement;
158
134
  tags?: ITags;
159
135
  cues?: ICuePoint[];
160
136
  attachments?: IAttachments;
161
137
  }
162
- export interface IEbmlElements {
163
- version?: number;
164
- readVersion?: number;
165
- maxIDWidth?: number;
166
- maxSizeWidth?: number;
167
- docType?: string;
168
- docTypeVersion?: number;
169
- docTypeReadVersion?: number;
170
- }
171
- export interface IEbmlDoc {
172
- ebml: IEbmlElements;
173
- }
174
138
  export interface IMatroskaDoc extends IEbmlDoc {
175
139
  segment: IMatroskaSegment;
176
140
  }
@@ -1,12 +1,3 @@
1
- export var DataType;
2
- (function (DataType) {
3
- DataType[DataType["string"] = 0] = "string";
4
- DataType[DataType["uint"] = 1] = "uint";
5
- DataType[DataType["uid"] = 2] = "uid";
6
- DataType[DataType["bool"] = 3] = "bool";
7
- DataType[DataType["binary"] = 4] = "binary";
8
- DataType[DataType["float"] = 5] = "float";
9
- })(DataType || (DataType = {}));
10
1
  export var TargetType;
11
2
  (function (TargetType) {
12
3
  TargetType[TargetType["shot"] = 10] = "shot";
package/lib/type.d.ts CHANGED
@@ -544,6 +544,15 @@ export interface IOptions {
544
544
  * Set observer for async callbacks to common or format.
545
545
  */
546
546
  observer?: Observer;
547
+ /**
548
+ * In Matroska based files, use the _SeekHead_ element index to skip _segment/cluster_ elements.
549
+ * By default, disabled
550
+ * Can have a significant performance impact if enabled.
551
+ * Possible side effect can be that certain metadata maybe skipped, depending on the index.
552
+ * If there is no _SeekHead_ element present in the Matroska file, this flag has no effect
553
+ * Ref: https://www.matroska.org/technical/diagram.html
554
+ */
555
+ mkvUseIndex?: boolean;
547
556
  }
548
557
  export interface IApeHeader extends IOptions {
549
558
  /**
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "music-metadata",
3
3
  "description": "Music metadata parser for Node.js, supporting virtual any audio and tag format.",
4
- "version": "10.1.0",
4
+ "version": "10.2.0",
5
5
  "author": {
6
6
  "name": "Borewit",
7
7
  "url": "https://github.com/Borewit"
@@ -85,7 +85,7 @@
85
85
  "compile-doc": "tsc -p doc-gen",
86
86
  "compile": "yarn run compile-src && yarn compile-test && yarn compile-doc",
87
87
  "lint-ts": "biome check",
88
- "lint-md": "yarn run remark -u preset-lint-markdown-style-guide .",
88
+ "lint-md": "yarn run remark -u remark-preset-lint-consistent .",
89
89
  "lint": "yarn run lint-ts && yarn run lint-md",
90
90
  "test": "mocha",
91
91
  "build": "yarn run clean && yarn compile && yarn run doc-gen",
@@ -97,9 +97,9 @@
97
97
  "@tokenizer/token": "^0.3.0",
98
98
  "content-type": "^1.0.5",
99
99
  "debug": "^4.3.4",
100
- "file-type": "^19.4.0",
100
+ "file-type": "^19.4.1",
101
101
  "media-typer": "^1.1.0",
102
- "strtok3": "^8.0.5",
102
+ "strtok3": "^8.1.0",
103
103
  "token-types": "^6.0.0",
104
104
  "uint8array-extras": "^1.4.0"
105
105
  },
@@ -111,7 +111,7 @@
111
111
  "@types/debug": "^4.1.12",
112
112
  "@types/media-typer": "^1.1.3",
113
113
  "@types/mocha": "^10.0.7",
114
- "@types/node": "^22.2.0",
114
+ "@types/node": "^22.3.0",
115
115
  "c8": "^10.1.2",
116
116
  "chai": "^5.1.1",
117
117
  "chai-as-promised": "^8.0.0",
@@ -120,7 +120,7 @@
120
120
  "mocha": "^10.7.3",
121
121
  "prettier": "^3.3.3",
122
122
  "remark-cli": "^12.0.1",
123
- "remark-preset-lint-markdown-style-guide": "^6.0.0",
123
+ "remark-preset-lint-consistent": "^6.0.0",
124
124
  "ts-node": "^10.9.2",
125
125
  "typescript": "^5.5.3"
126
126
  },