podcast-dl 11.6.0 → 11.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ # rss-parser (local fork)
2
+
3
+ This is a local fork of [rss-parser](https://github.com/rbren/rss-parser) by Bobby Brennan.
@@ -0,0 +1,66 @@
1
+ export const feed = [
2
+ ["author", "creator"],
3
+ ["dc:publisher", "publisher"],
4
+ ["dc:creator", "creator"],
5
+ ["dc:source", "source"],
6
+ ["dc:title", "title"],
7
+ ["dc:type", "type"],
8
+ "title",
9
+ "description",
10
+ "author",
11
+ "pubDate",
12
+ "webMaster",
13
+ "managingEditor",
14
+ "generator",
15
+ "link",
16
+ "language",
17
+ "copyright",
18
+ "lastBuildDate",
19
+ "docs",
20
+ "generator",
21
+ "ttl",
22
+ "rating",
23
+ "skipHours",
24
+ "skipDays",
25
+ ];
26
+
27
+ export const item = [
28
+ ["author", "creator"],
29
+ ["dc:creator", "creator"],
30
+ ["dc:date", "date"],
31
+ ["dc:language", "language"],
32
+ ["dc:rights", "rights"],
33
+ ["dc:source", "source"],
34
+ ["dc:title", "title"],
35
+ "title",
36
+ "link",
37
+ "pubDate",
38
+ "author",
39
+ "summary",
40
+ ["content:encoded", "content:encoded", { includeSnippet: true }],
41
+ "enclosure",
42
+ "dc:creator",
43
+ "dc:date",
44
+ "comments",
45
+ ["podcast:transcript", "podcastTranscripts", { keepArray: true }],
46
+ ];
47
+
48
+ const mapItunesField = (f) => ["itunes:" + f, f];
49
+
50
+ export const podcastFeed = ["author", "subtitle", "summary", "explicit"].map(
51
+ mapItunesField
52
+ );
53
+
54
+ export const podcastItem = [
55
+ "author",
56
+ "subtitle",
57
+ "summary",
58
+ "explicit",
59
+ "duration",
60
+ "image",
61
+ "episode",
62
+ "image",
63
+ "season",
64
+ "keywords",
65
+ "episodeType",
66
+ ].map(mapItunesField);
@@ -0,0 +1,387 @@
1
+ import http from "http";
2
+ import https from "https";
3
+ import xml2js from "xml2js";
4
+
5
+ import * as fields from "./fields.js";
6
+ import * as utils from "./utils.js";
7
+
8
+ const DEFAULT_HEADERS = {
9
+ "User-Agent": "rss-parser",
10
+ Accept: "application/rss+xml",
11
+ };
12
+ const DEFAULT_MAX_REDIRECTS = 5;
13
+ const DEFAULT_TIMEOUT = 60000;
14
+
15
+ class Parser {
16
+ constructor(options = {}) {
17
+ options.headers = options.headers || {};
18
+ options.xml2js = options.xml2js || {};
19
+ options.customFields = options.customFields || {};
20
+ options.customFields.item = options.customFields.item || [];
21
+ options.customFields.feed = options.customFields.feed || [];
22
+ options.requestOptions = options.requestOptions || {};
23
+ if (!options.maxRedirects) options.maxRedirects = DEFAULT_MAX_REDIRECTS;
24
+ if (!options.timeout) options.timeout = DEFAULT_TIMEOUT;
25
+ this.options = options;
26
+ this.xmlParser = new xml2js.Parser(this.options.xml2js);
27
+ }
28
+
29
+ parseString(xml, callback) {
30
+ let prom = new Promise((resolve, reject) => {
31
+ this.xmlParser.parseString(xml, (err, result) => {
32
+ if (err) return reject(err);
33
+ if (!result) {
34
+ return reject(new Error("Unable to parse XML."));
35
+ }
36
+ let feed = null;
37
+ if (result.feed) {
38
+ feed = this.buildAtomFeed(result);
39
+ } else if (
40
+ result.rss &&
41
+ result.rss.$ &&
42
+ result.rss.$.version &&
43
+ result.rss.$.version.match(/^2/)
44
+ ) {
45
+ feed = this.buildRSS2(result);
46
+ } else if (result["rdf:RDF"]) {
47
+ feed = this.buildRSS1(result);
48
+ } else if (
49
+ result.rss &&
50
+ result.rss.$ &&
51
+ result.rss.$.version &&
52
+ result.rss.$.version.match(/0\.9/)
53
+ ) {
54
+ feed = this.buildRSS0_9(result);
55
+ } else if (result.rss && this.options.defaultRSS) {
56
+ switch (this.options.defaultRSS) {
57
+ case 0.9:
58
+ feed = this.buildRSS0_9(result);
59
+ break;
60
+ case 1:
61
+ feed = this.buildRSS1(result);
62
+ break;
63
+ case 2:
64
+ feed = this.buildRSS2(result);
65
+ break;
66
+ default:
67
+ return reject(new Error("default RSS version not recognized."));
68
+ }
69
+ } else {
70
+ return reject(new Error("Feed not recognized as RSS 1 or 2."));
71
+ }
72
+ resolve(feed);
73
+ });
74
+ });
75
+ prom = utils.maybePromisify(callback, prom);
76
+ return prom;
77
+ }
78
+
79
+ parseURL(feedUrl, callback, redirectCount = 0) {
80
+ let xml = "";
81
+ let get = feedUrl.indexOf("https") === 0 ? https.get : http.get;
82
+ let parsedUrl = new URL(feedUrl);
83
+ let urlParts = {
84
+ hostname: parsedUrl.hostname,
85
+ port: parsedUrl.port || undefined,
86
+ path: parsedUrl.pathname + parsedUrl.search,
87
+ protocol: parsedUrl.protocol,
88
+ };
89
+ let headers = Object.assign({}, DEFAULT_HEADERS, this.options.headers);
90
+ let timeout = null;
91
+ let prom = new Promise((resolve, reject) => {
92
+ const requestOpts = Object.assign(
93
+ { headers },
94
+ urlParts,
95
+ this.options.requestOptions
96
+ );
97
+ let req = get(requestOpts, (res) => {
98
+ if (
99
+ this.options.maxRedirects &&
100
+ res.statusCode >= 300 &&
101
+ res.statusCode < 400 &&
102
+ res.headers["location"]
103
+ ) {
104
+ if (redirectCount === this.options.maxRedirects) {
105
+ return reject(new Error("Too many redirects"));
106
+ } else {
107
+ const newLocation = new URL(res.headers["location"], feedUrl).href;
108
+ return this.parseURL(newLocation, null, redirectCount + 1).then(
109
+ resolve,
110
+ reject
111
+ );
112
+ }
113
+ } else if (res.statusCode >= 300) {
114
+ return reject(new Error("Status code " + res.statusCode));
115
+ }
116
+ let encoding = utils.getEncodingFromContentType(
117
+ res.headers["content-type"]
118
+ );
119
+ res.setEncoding(encoding);
120
+ res.on("data", (chunk) => {
121
+ xml += chunk;
122
+ });
123
+ res.on("end", () => {
124
+ return this.parseString(xml).then(resolve, reject);
125
+ });
126
+ });
127
+ req.on("error", reject);
128
+ timeout = setTimeout(() => {
129
+ return reject(
130
+ new Error("Request timed out after " + this.options.timeout + "ms")
131
+ );
132
+ }, this.options.timeout);
133
+ }).then(
134
+ (data) => {
135
+ clearTimeout(timeout);
136
+ return Promise.resolve(data);
137
+ },
138
+ (e) => {
139
+ clearTimeout(timeout);
140
+ return Promise.reject(e);
141
+ }
142
+ );
143
+ prom = utils.maybePromisify(callback, prom);
144
+ return prom;
145
+ }
146
+
147
+ buildAtomFeed(xmlObj) {
148
+ let feed = { items: [] };
149
+ utils.copyFromXML(xmlObj.feed, feed, this.options.customFields.feed);
150
+ if (xmlObj.feed.link) {
151
+ feed.link = utils.getLink(xmlObj.feed.link, "alternate", 0);
152
+ feed.feedUrl = utils.getLink(xmlObj.feed.link, "self", 1);
153
+ }
154
+ if (xmlObj.feed.title) {
155
+ let title = xmlObj.feed.title[0] || "";
156
+ if (title._) title = title._;
157
+ if (title) feed.title = title;
158
+ }
159
+ if (xmlObj.feed.updated) {
160
+ feed.lastBuildDate = xmlObj.feed.updated[0];
161
+ }
162
+ feed.items = (xmlObj.feed.entry || []).map((entry) =>
163
+ this.parseItemAtom(entry)
164
+ );
165
+ return feed;
166
+ }
167
+
168
+ parseItemAtom(entry) {
169
+ let item = {};
170
+ utils.copyFromXML(entry, item, this.options.customFields.item);
171
+ if (entry.title) {
172
+ let title = entry.title[0] || "";
173
+ if (title._) title = title._;
174
+ if (title) item.title = title;
175
+ }
176
+ if (entry.link && entry.link.length) {
177
+ item.link = utils.getLink(entry.link, "alternate", 0);
178
+ }
179
+ if (entry.published && entry.published.length && entry.published[0].length)
180
+ item.pubDate = new Date(entry.published[0]).toISOString();
181
+ if (
182
+ !item.pubDate &&
183
+ entry.updated &&
184
+ entry.updated.length &&
185
+ entry.updated[0].length
186
+ )
187
+ item.pubDate = new Date(entry.updated[0]).toISOString();
188
+ if (
189
+ entry.author &&
190
+ entry.author.length &&
191
+ entry.author[0].name &&
192
+ entry.author[0].name.length
193
+ )
194
+ item.author = entry.author[0].name[0];
195
+ if (entry.content && entry.content.length) {
196
+ item.content = utils.getContent(entry.content[0]);
197
+ item.contentSnippet = utils.getSnippet(item.content);
198
+ }
199
+ if (entry.summary && entry.summary.length) {
200
+ item.summary = utils.getContent(entry.summary[0]);
201
+ }
202
+ if (entry.id) {
203
+ item.id = entry.id[0];
204
+ }
205
+ this.setISODate(item);
206
+ return item;
207
+ }
208
+
209
+ buildRSS0_9(xmlObj) {
210
+ var channel = xmlObj.rss.channel[0];
211
+ var items = channel.item;
212
+ return this.buildRSS(channel, items);
213
+ }
214
+
215
+ buildRSS1(xmlObj) {
216
+ xmlObj = xmlObj["rdf:RDF"];
217
+ let channel = xmlObj.channel[0];
218
+ let items = xmlObj.item;
219
+ return this.buildRSS(channel, items);
220
+ }
221
+
222
+ buildRSS2(xmlObj) {
223
+ let channel = xmlObj.rss.channel[0];
224
+ let items = channel.item;
225
+ let feed = this.buildRSS(channel, items);
226
+ if (xmlObj.rss.$ && xmlObj.rss.$["xmlns:itunes"]) {
227
+ this.decorateItunes(feed, channel);
228
+ }
229
+ return feed;
230
+ }
231
+
232
+ buildRSS(channel, items) {
233
+ items = items || [];
234
+ let feed = { items: [] };
235
+ let feedFields = fields.feed.concat(this.options.customFields.feed);
236
+ let itemFields = fields.item.concat(this.options.customFields.item);
237
+
238
+ // Fix: Look for atom:link with rel="self" instead of just taking the first one
239
+ if (channel["atom:link"]) {
240
+ feed.feedUrl = utils.getLink(channel["atom:link"], "self", 0);
241
+ }
242
+
243
+ if (channel.image && channel.image[0] && channel.image[0].url) {
244
+ feed.image = {};
245
+ let image = channel.image[0];
246
+ if (image.link) feed.image.link = image.link[0];
247
+ if (image.url) feed.image.url = image.url[0];
248
+ if (image.title) feed.image.title = image.title[0];
249
+ if (image.width) feed.image.width = image.width[0];
250
+ if (image.height) feed.image.height = image.height[0];
251
+ }
252
+ const paginationLinks = this.generatePaginationLinks(channel);
253
+ if (Object.keys(paginationLinks).length) {
254
+ feed.paginationLinks = paginationLinks;
255
+ }
256
+ utils.copyFromXML(channel, feed, feedFields);
257
+ feed.items = items.map((xmlItem) => this.parseItemRss(xmlItem, itemFields));
258
+ return feed;
259
+ }
260
+
261
+ parseItemRss(xmlItem, itemFields) {
262
+ let item = {};
263
+ utils.copyFromXML(xmlItem, item, itemFields);
264
+ if (xmlItem.enclosure) {
265
+ item.enclosure = xmlItem.enclosure[0].$;
266
+ }
267
+ if (xmlItem.description) {
268
+ item.content = utils.getContent(xmlItem.description[0]);
269
+ item.contentSnippet = utils.getSnippet(item.content);
270
+ }
271
+ if (xmlItem.guid) {
272
+ item.guid = xmlItem.guid[0];
273
+ if (item.guid._) item.guid = item.guid._;
274
+ }
275
+ if (xmlItem.$ && xmlItem.$["rdf:about"]) {
276
+ item["rdf:about"] = xmlItem.$["rdf:about"];
277
+ }
278
+ if (xmlItem.category) item.categories = xmlItem.category;
279
+ this.setISODate(item);
280
+ return item;
281
+ }
282
+
283
+ decorateItunes(feed, channel) {
284
+ let items = channel.item || [];
285
+ feed.itunes = {};
286
+
287
+ if (channel["itunes:owner"]) {
288
+ let owner = {};
289
+
290
+ if (channel["itunes:owner"][0]["itunes:name"]) {
291
+ owner.name = channel["itunes:owner"][0]["itunes:name"][0];
292
+ }
293
+ if (channel["itunes:owner"][0]["itunes:email"]) {
294
+ owner.email = channel["itunes:owner"][0]["itunes:email"][0];
295
+ }
296
+ feed.itunes.owner = owner;
297
+ }
298
+
299
+ if (channel["itunes:image"]) {
300
+ let image;
301
+ let hasImageHref =
302
+ channel["itunes:image"][0] &&
303
+ channel["itunes:image"][0].$ &&
304
+ channel["itunes:image"][0].$.href;
305
+ image = hasImageHref ? channel["itunes:image"][0].$.href : null;
306
+ if (image) {
307
+ feed.itunes.image = image;
308
+ }
309
+ }
310
+
311
+ if (channel["itunes:category"]) {
312
+ const categoriesWithSubs = channel["itunes:category"].map((category) => {
313
+ return {
314
+ name: category && category.$ && category.$.text,
315
+ subs: category["itunes:category"]
316
+ ? category["itunes:category"].map((subcategory) => ({
317
+ name: subcategory && subcategory.$ && subcategory.$.text,
318
+ }))
319
+ : null,
320
+ };
321
+ });
322
+
323
+ feed.itunes.categories = categoriesWithSubs.map(
324
+ (category) => category.name
325
+ );
326
+ feed.itunes.categoriesWithSubs = categoriesWithSubs;
327
+ }
328
+
329
+ if (channel["itunes:keywords"]) {
330
+ if (channel["itunes:keywords"].length > 1) {
331
+ feed.itunes.keywords = channel["itunes:keywords"].map(
332
+ (keyword) => keyword && keyword.$ && keyword.$.text
333
+ );
334
+ } else {
335
+ let keywords = channel["itunes:keywords"][0];
336
+ if (keywords && typeof keywords._ === "string") {
337
+ keywords = keywords._;
338
+ }
339
+
340
+ if (keywords && keywords.$ && keywords.$.text) {
341
+ feed.itunes.keywords = keywords.$.text.split(",");
342
+ } else if (typeof keywords === "string") {
343
+ feed.itunes.keywords = keywords.split(",");
344
+ }
345
+ }
346
+ }
347
+
348
+ utils.copyFromXML(channel, feed.itunes, fields.podcastFeed);
349
+ items.forEach((item, index) => {
350
+ let entry = feed.items[index];
351
+ entry.itunes = {};
352
+ utils.copyFromXML(item, entry.itunes, fields.podcastItem);
353
+ let image = item["itunes:image"];
354
+ if (image && image[0] && image[0].$ && image[0].$.href) {
355
+ entry.itunes.image = image[0].$.href;
356
+ }
357
+ });
358
+ }
359
+
360
+ setISODate(item) {
361
+ let date = item.pubDate || item.date;
362
+ if (date) {
363
+ try {
364
+ item.isoDate = new Date(date.trim()).toISOString();
365
+ } catch {
366
+ // Ignore bad date format
367
+ }
368
+ }
369
+ }
370
+
371
+ generatePaginationLinks(channel) {
372
+ if (!channel["atom:link"]) {
373
+ return {};
374
+ }
375
+ const paginationRelAttributes = ["self", "first", "next", "prev", "last"];
376
+
377
+ return channel["atom:link"].reduce((paginationLinks, link) => {
378
+ if (!link.$ || !paginationRelAttributes.includes(link.$.rel)) {
379
+ return paginationLinks;
380
+ }
381
+ paginationLinks[link.$.rel] = link.$.href;
382
+ return paginationLinks;
383
+ }, {});
384
+ }
385
+ }
386
+
387
+ export default Parser;
@@ -0,0 +1,101 @@
1
+ import entities from "entities";
2
+ import xml2js from "xml2js";
3
+
4
+ export const stripHtml = (str) => {
5
+ str = str.replace(
6
+ /([^\n])<\/?(h|br|p|ul|ol|li|blockquote|section|table|tr|div)(?:.|\n)*?>([^\n])/gm,
7
+ "$1\n$3"
8
+ );
9
+ str = str.replace(/<(?:.|\n)*?>/gm, "");
10
+ return str;
11
+ };
12
+
13
+ export const getSnippet = (str) => {
14
+ return entities.decodeHTML(stripHtml(str)).trim();
15
+ };
16
+
17
+ export const getLink = (links, rel, fallbackIdx) => {
18
+ if (!links) return;
19
+ for (let i = 0; i < links.length; ++i) {
20
+ if (links[i].$.rel === rel) return links[i].$.href;
21
+ }
22
+ if (links[fallbackIdx]) return links[fallbackIdx].$.href;
23
+ };
24
+
25
+ export const getContent = (content) => {
26
+ if (typeof content._ === "string") {
27
+ return content._;
28
+ } else if (typeof content === "object") {
29
+ let builder = new xml2js.Builder({
30
+ headless: true,
31
+ explicitRoot: true,
32
+ rootName: "div",
33
+ renderOpts: { pretty: false },
34
+ });
35
+ return builder.buildObject(content);
36
+ } else {
37
+ return content;
38
+ }
39
+ };
40
+
41
+ export const copyFromXML = (xml, dest, fields) => {
42
+ fields.forEach((f) => {
43
+ let from = f;
44
+ let to = f;
45
+ let options = {};
46
+ if (Array.isArray(f)) {
47
+ from = f[0];
48
+ to = f[1];
49
+ if (f.length > 2) {
50
+ options = f[2];
51
+ }
52
+ }
53
+ const { keepArray, includeSnippet } = options;
54
+ if (xml[from] !== undefined) {
55
+ dest[to] = keepArray ? xml[from] : xml[from][0];
56
+ }
57
+ if (dest[to] && typeof dest[to]._ === "string") {
58
+ dest[to] = dest[to]._;
59
+ }
60
+ if (includeSnippet && dest[to] && typeof dest[to] === "string") {
61
+ dest[to + "Snippet"] = getSnippet(dest[to]);
62
+ }
63
+ });
64
+ };
65
+
66
+ export const maybePromisify = (callback, promise) => {
67
+ if (!callback) return promise;
68
+ return promise.then(
69
+ (data) => setTimeout(() => callback(null, data)),
70
+ (err) => setTimeout(() => callback(err))
71
+ );
72
+ };
73
+
74
+ const DEFAULT_ENCODING = "utf8";
75
+ const ENCODING_REGEX = /(encoding|charset)\s*=\s*(\S+)/;
76
+ const SUPPORTED_ENCODINGS = [
77
+ "ascii",
78
+ "utf8",
79
+ "utf16le",
80
+ "ucs2",
81
+ "base64",
82
+ "latin1",
83
+ "binary",
84
+ "hex",
85
+ ];
86
+ const ENCODING_ALIASES = {
87
+ "utf-8": "utf8",
88
+ "iso-8859-1": "latin1",
89
+ };
90
+
91
+ export const getEncodingFromContentType = (contentType) => {
92
+ contentType = contentType || "";
93
+ let match = contentType.match(ENCODING_REGEX);
94
+ let encoding = (match || [])[2] || "";
95
+ encoding = encoding.toLowerCase();
96
+ encoding = ENCODING_ALIASES[encoding] || encoding;
97
+ if (!encoding || SUPPORTED_ENCODINGS.indexOf(encoding) === -1) {
98
+ encoding = DEFAULT_ENCODING;
99
+ }
100
+ return encoding;
101
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "podcast-dl",
3
- "version": "11.6.0",
3
+ "version": "11.6.1",
4
4
  "description": "A CLI for downloading podcasts.",
5
5
  "type": "module",
6
6
  "bin": "./bin/bin.js",
@@ -33,7 +33,8 @@
33
33
  "url": "https://github.com/lightpohl/podcast-dl.git"
34
34
  },
35
35
  "files": [
36
- "bin"
36
+ "bin",
37
+ "lib"
37
38
  ],
38
39
  "author": "Joshua Pohl",
39
40
  "license": "MIT",