podcast-dl 7.0.0 → 7.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -4
- package/bin/async.js +24 -13
- package/bin/bin.js +25 -2
- package/bin/util.js +73 -39
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
# podcast-dl
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-

|
|
3
|
+
A CLI for downloading podcasts with a focus on archiving.
|
|
6
4
|
|
|
7
5
|
## How to Use
|
|
8
6
|
|
|
@@ -12,6 +10,8 @@
|
|
|
12
10
|
|
|
13
11
|
`npx podcast-dl --url <PODCAST_RSS_URL>`
|
|
14
12
|
|
|
13
|
+
### [More Examples](./docs/examples.md)
|
|
14
|
+
|
|
15
15
|
## Options
|
|
16
16
|
|
|
17
17
|
Type values surrounded in square brackets (`[]`) can be used as used as boolean options (no argument required).
|
|
@@ -20,6 +20,7 @@ Type values surrounded in square brackets (`[]`) can be used as used as boolean
|
|
|
20
20
|
| ------------------------ | ------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
21
21
|
| --url | String | true | URL to podcast RSS feed. |
|
|
22
22
|
| --out-dir | String | false | Specify output directory for episodes and metadata. Defaults to "./{{podcast_title}}". See "Templating" for more details. |
|
|
23
|
+
| --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is 1. |
|
|
23
24
|
| --archive | [String] | false | Download or write out items not listed in archive file. Generates archive file at path if not found. Defaults to "./{{podcast_title}}/archive.json" when used as a boolean option. See "Templating" for more details. |
|
|
24
25
|
| --episode-template | String | false | Template for generating episode related filenames. See "Templating" for details. |
|
|
25
26
|
| --include-meta | | false | Write out podcast metadata to JSON. |
|
|
@@ -38,7 +39,7 @@ Type values surrounded in square brackets (`[]`) can be used as used as boolean
|
|
|
38
39
|
| --info | | false | Print retrieved podcast info instead of downloading. |
|
|
39
40
|
| --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported. |
|
|
40
41
|
| --exec | String | false | Execute a command after each episode is downloaded. |
|
|
41
|
-
| --
|
|
42
|
+
| --filter-url-tacking | | false | Attempts to extract the direct download link of an episode if detected (**experimental**). |
|
|
42
43
|
| --version | | false | Output the version number. |
|
|
43
44
|
| --help | | false | Output usage information. |
|
|
44
45
|
|
package/bin/async.js
CHANGED
|
@@ -16,10 +16,12 @@ import { getArchiveFilename, getFilename } from "./naming.js";
|
|
|
16
16
|
import {
|
|
17
17
|
getEpisodeAudioUrlAndExt,
|
|
18
18
|
getArchiveKey,
|
|
19
|
+
getTempPath,
|
|
19
20
|
runFfmpeg,
|
|
20
21
|
runExec,
|
|
21
22
|
writeItemMeta,
|
|
22
23
|
writeToArchive,
|
|
24
|
+
getUrlEmbed,
|
|
23
25
|
} from "./util.js";
|
|
24
26
|
|
|
25
27
|
const pipeline = promisify(stream.pipeline);
|
|
@@ -34,6 +36,7 @@ const download = async ({
|
|
|
34
36
|
archive,
|
|
35
37
|
override,
|
|
36
38
|
onAfterDownload,
|
|
39
|
+
filterUrlTracking,
|
|
37
40
|
}) => {
|
|
38
41
|
const logMessage = getLogMessageWithMarker(marker);
|
|
39
42
|
if (!override && fs.existsSync(outputPath)) {
|
|
@@ -41,7 +44,18 @@ const download = async ({
|
|
|
41
44
|
return;
|
|
42
45
|
}
|
|
43
46
|
|
|
44
|
-
|
|
47
|
+
let embeddedUrl = null;
|
|
48
|
+
if (filterUrlTracking) {
|
|
49
|
+
logMessage("Attempting to find embedded URL...");
|
|
50
|
+
embeddedUrl = await getUrlEmbed(url);
|
|
51
|
+
|
|
52
|
+
if (!embeddedUrl) {
|
|
53
|
+
logMessage("Unable to find embedded URL. Defaulting to full address");
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const finalUrl = embeddedUrl || url;
|
|
58
|
+
const headResponse = await got(finalUrl, {
|
|
45
59
|
timeout: 5000,
|
|
46
60
|
method: "HEAD",
|
|
47
61
|
responseType: "json",
|
|
@@ -50,9 +64,10 @@ const download = async ({
|
|
|
50
64
|
},
|
|
51
65
|
});
|
|
52
66
|
|
|
67
|
+
const tempOutputPath = getTempPath(outputPath);
|
|
53
68
|
const removeFile = () => {
|
|
54
|
-
if (fs.existsSync(
|
|
55
|
-
fs.unlinkSync(
|
|
69
|
+
if (fs.existsSync(tempOutputPath)) {
|
|
70
|
+
fs.unlinkSync(tempOutputPath);
|
|
56
71
|
}
|
|
57
72
|
};
|
|
58
73
|
|
|
@@ -87,15 +102,15 @@ const download = async ({
|
|
|
87
102
|
});
|
|
88
103
|
|
|
89
104
|
await pipeline(
|
|
90
|
-
got.stream(
|
|
91
|
-
fs.createWriteStream(
|
|
105
|
+
got.stream(finalUrl).on("downloadProgress", onDownloadProgress),
|
|
106
|
+
fs.createWriteStream(tempOutputPath)
|
|
92
107
|
);
|
|
93
108
|
} catch (error) {
|
|
94
109
|
removeFile();
|
|
95
110
|
throw error;
|
|
96
111
|
}
|
|
97
112
|
|
|
98
|
-
const fileSize = fs.statSync(
|
|
113
|
+
const fileSize = fs.statSync(tempOutputPath).size;
|
|
99
114
|
|
|
100
115
|
if (fileSize === 0) {
|
|
101
116
|
removeFile();
|
|
@@ -108,13 +123,7 @@ const download = async ({
|
|
|
108
123
|
return;
|
|
109
124
|
}
|
|
110
125
|
|
|
111
|
-
|
|
112
|
-
logMessage(
|
|
113
|
-
"File size differs from expected content length. Suggestion: verify file works as expected",
|
|
114
|
-
LOG_LEVELS.important
|
|
115
|
-
);
|
|
116
|
-
logMessage(`${outputPath}`, LOG_LEVELS.important);
|
|
117
|
-
}
|
|
126
|
+
fs.renameSync(tempOutputPath, outputPath);
|
|
118
127
|
|
|
119
128
|
logMessage("Download complete!");
|
|
120
129
|
|
|
@@ -140,6 +149,7 @@ let downloadItemsAsync = async ({
|
|
|
140
149
|
episodeTemplate,
|
|
141
150
|
exec,
|
|
142
151
|
feed,
|
|
152
|
+
filterUrlTracking,
|
|
143
153
|
includeEpisodeMeta,
|
|
144
154
|
mono,
|
|
145
155
|
override,
|
|
@@ -177,6 +187,7 @@ let downloadItemsAsync = async ({
|
|
|
177
187
|
archive,
|
|
178
188
|
override,
|
|
179
189
|
marker,
|
|
190
|
+
filterUrlTracking,
|
|
180
191
|
key: getArchiveKey({
|
|
181
192
|
prefix: archiveUrl,
|
|
182
193
|
name: getArchiveFilename({
|
package/bin/bin.js
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
import fs from "fs";
|
|
4
4
|
import _path from "path";
|
|
5
|
-
import _url from "url";
|
|
6
5
|
import commander from "commander";
|
|
7
6
|
import { createRequire } from "module";
|
|
8
7
|
import pluralize from "pluralize";
|
|
@@ -119,6 +118,10 @@ commander
|
|
|
119
118
|
createParseNumber({ min: 1, max: 32, name: "threads" }),
|
|
120
119
|
1
|
|
121
120
|
)
|
|
121
|
+
.option(
|
|
122
|
+
"--filter-url-tracking",
|
|
123
|
+
"attempts to extract the direct download link of an episode if detected (experimental)"
|
|
124
|
+
)
|
|
122
125
|
.parse(process.argv);
|
|
123
126
|
|
|
124
127
|
const {
|
|
@@ -140,6 +143,7 @@ const {
|
|
|
140
143
|
exec,
|
|
141
144
|
mono,
|
|
142
145
|
threads,
|
|
146
|
+
filterUrlTracking,
|
|
143
147
|
addMp3Metadata: addMp3MetadataFlag,
|
|
144
148
|
adjustBitrate: bitrate,
|
|
145
149
|
} = commander;
|
|
@@ -151,7 +155,7 @@ const main = async () => {
|
|
|
151
155
|
logErrorAndExit("No URL provided");
|
|
152
156
|
}
|
|
153
157
|
|
|
154
|
-
const { hostname, pathname } =
|
|
158
|
+
const { hostname, pathname } = new URL(url);
|
|
155
159
|
const archiveUrl = `${hostname}${pathname}`;
|
|
156
160
|
const feed = await getFeed(url);
|
|
157
161
|
const basePath = _path.resolve(
|
|
@@ -286,8 +290,27 @@ const main = async () => {
|
|
|
286
290
|
override,
|
|
287
291
|
targetItems,
|
|
288
292
|
threads,
|
|
293
|
+
filterUrlTracking,
|
|
289
294
|
});
|
|
290
295
|
|
|
296
|
+
if (hasErrors && numEpisodesDownloaded !== targetItems.length) {
|
|
297
|
+
logMessage(
|
|
298
|
+
`\n${numEpisodesDownloaded} of ${pluralize(
|
|
299
|
+
"episode",
|
|
300
|
+
targetItems.length,
|
|
301
|
+
true
|
|
302
|
+
)} downloaded\n`
|
|
303
|
+
);
|
|
304
|
+
} else {
|
|
305
|
+
logMessage(
|
|
306
|
+
`\nSuccessfully downloaded ${pluralize(
|
|
307
|
+
"episode",
|
|
308
|
+
numEpisodesDownloaded,
|
|
309
|
+
true
|
|
310
|
+
)}\n`
|
|
311
|
+
);
|
|
312
|
+
}
|
|
313
|
+
|
|
291
314
|
if (numEpisodesDownloaded === 0) {
|
|
292
315
|
process.exit(ERROR_STATUSES.nothingDownloaded);
|
|
293
316
|
}
|
package/bin/util.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
import _url from "url";
|
|
2
1
|
import rssParser from "rss-parser";
|
|
3
2
|
import path from "path";
|
|
4
3
|
import fs from "fs";
|
|
5
4
|
import dayjs from "dayjs";
|
|
5
|
+
import got from "got";
|
|
6
6
|
import util from "util";
|
|
7
7
|
import { exec } from "child_process";
|
|
8
8
|
|
|
@@ -15,6 +15,10 @@ const parser = new rssParser({
|
|
|
15
15
|
defaultRSS: 2.0,
|
|
16
16
|
});
|
|
17
17
|
|
|
18
|
+
const getTempPath = (path) => {
|
|
19
|
+
return `${path}.tmp`;
|
|
20
|
+
};
|
|
21
|
+
|
|
18
22
|
const getArchiveKey = ({ prefix, name }) => {
|
|
19
23
|
return `${prefix}-${name}`;
|
|
20
24
|
};
|
|
@@ -45,6 +49,54 @@ const getIsInArchive = ({ key, archive }) => {
|
|
|
45
49
|
return archiveResult.includes(key);
|
|
46
50
|
};
|
|
47
51
|
|
|
52
|
+
const getPossibleUrlEmbeds = (url, maxAmount = 5) => {
|
|
53
|
+
const fullUrl = new URL(url);
|
|
54
|
+
const possibleStartIndexes = [];
|
|
55
|
+
|
|
56
|
+
for (let i = 0; i < fullUrl.pathname.length; i++) {
|
|
57
|
+
if (fullUrl.pathname[i] === "/") {
|
|
58
|
+
possibleStartIndexes.push(i);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const possibleEmbedChoices = possibleStartIndexes.map((startIndex) => {
|
|
63
|
+
let possibleEmbed = fullUrl.pathname.slice(startIndex + 1);
|
|
64
|
+
|
|
65
|
+
if (!possibleEmbed.startsWith("http")) {
|
|
66
|
+
possibleEmbed = `https://${possibleEmbed}`;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return decodeURIComponent(possibleEmbed);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
return possibleEmbedChoices
|
|
73
|
+
.slice(Math.max(possibleEmbedChoices.length - maxAmount, 0))
|
|
74
|
+
.reverse();
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
const getUrlEmbed = async (url) => {
|
|
78
|
+
const possibleUrlEmbeds = getPossibleUrlEmbeds(url);
|
|
79
|
+
for (const possibleUrl of possibleUrlEmbeds) {
|
|
80
|
+
try {
|
|
81
|
+
const embeddedUrl = new URL(possibleUrl);
|
|
82
|
+
await got(embeddedUrl.href, {
|
|
83
|
+
timeout: 3000,
|
|
84
|
+
method: "HEAD",
|
|
85
|
+
responseType: "json",
|
|
86
|
+
headers: {
|
|
87
|
+
accept: "*/*",
|
|
88
|
+
},
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
return embeddedUrl;
|
|
92
|
+
} catch (error) {
|
|
93
|
+
// do nothing
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
return null;
|
|
98
|
+
};
|
|
99
|
+
|
|
48
100
|
const getLoopControls = ({ limit, offset, length, reverse }) => {
|
|
49
101
|
if (reverse) {
|
|
50
102
|
const startIndex = length - 1 - offset;
|
|
@@ -247,28 +299,18 @@ const writeFeedMeta = ({ outputPath, feed, key, archive, override }) => {
|
|
|
247
299
|
return;
|
|
248
300
|
}
|
|
249
301
|
|
|
250
|
-
const
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
302
|
+
const output = {};
|
|
303
|
+
["title", "description", "link", "feedUrl", "managingEditor"].forEach(
|
|
304
|
+
(key) => {
|
|
305
|
+
if (feed[key]) {
|
|
306
|
+
output[key] = feed[key];
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
);
|
|
255
310
|
|
|
256
311
|
try {
|
|
257
312
|
if (override || !fs.existsSync(outputPath)) {
|
|
258
|
-
fs.writeFileSync(
|
|
259
|
-
outputPath,
|
|
260
|
-
JSON.stringify(
|
|
261
|
-
{
|
|
262
|
-
title,
|
|
263
|
-
description,
|
|
264
|
-
link,
|
|
265
|
-
feedUrl,
|
|
266
|
-
managingEditor,
|
|
267
|
-
},
|
|
268
|
-
null,
|
|
269
|
-
4
|
|
270
|
-
)
|
|
271
|
-
);
|
|
313
|
+
fs.writeFileSync(outputPath, JSON.stringify(output, null, 4));
|
|
272
314
|
} else {
|
|
273
315
|
logMessage("Feed metadata exists locally. Skipping write...");
|
|
274
316
|
}
|
|
@@ -302,26 +344,16 @@ const writeItemMeta = ({
|
|
|
302
344
|
return;
|
|
303
345
|
}
|
|
304
346
|
|
|
305
|
-
const
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
347
|
+
const output = {};
|
|
348
|
+
["title", "contentSnippet", "pubDate", "creator"].forEach((key) => {
|
|
349
|
+
if (item[key]) {
|
|
350
|
+
output[key] = item[key];
|
|
351
|
+
}
|
|
352
|
+
});
|
|
309
353
|
|
|
310
354
|
try {
|
|
311
355
|
if (override || !fs.existsSync(outputPath)) {
|
|
312
|
-
fs.writeFileSync(
|
|
313
|
-
outputPath,
|
|
314
|
-
JSON.stringify(
|
|
315
|
-
{
|
|
316
|
-
title,
|
|
317
|
-
pubDate,
|
|
318
|
-
creator,
|
|
319
|
-
descriptionText,
|
|
320
|
-
},
|
|
321
|
-
null,
|
|
322
|
-
4
|
|
323
|
-
)
|
|
324
|
-
);
|
|
356
|
+
fs.writeFileSync(outputPath, JSON.stringify(output, null, 4));
|
|
325
357
|
} else {
|
|
326
358
|
logMessage(
|
|
327
359
|
`${marker} | Episode metadata exists locally. Skipping write...`
|
|
@@ -341,7 +373,7 @@ const writeItemMeta = ({
|
|
|
341
373
|
};
|
|
342
374
|
|
|
343
375
|
const getUrlExt = (url) => {
|
|
344
|
-
const { pathname } =
|
|
376
|
+
const { pathname } = new URL(url);
|
|
345
377
|
|
|
346
378
|
if (!pathname) {
|
|
347
379
|
return "";
|
|
@@ -408,7 +440,7 @@ const getImageUrl = ({ image, itunes }) => {
|
|
|
408
440
|
};
|
|
409
441
|
|
|
410
442
|
const getFeed = async (url) => {
|
|
411
|
-
const { href } =
|
|
443
|
+
const { href } = new URL(url);
|
|
412
444
|
|
|
413
445
|
let feed;
|
|
414
446
|
try {
|
|
@@ -521,7 +553,9 @@ export {
|
|
|
521
553
|
getFeed,
|
|
522
554
|
getImageUrl,
|
|
523
555
|
getItemsToDownload,
|
|
556
|
+
getTempPath,
|
|
524
557
|
getUrlExt,
|
|
558
|
+
getUrlEmbed,
|
|
525
559
|
logFeedInfo,
|
|
526
560
|
ITEM_LIST_FORMATS,
|
|
527
561
|
logItemsList,
|