podcast-dl 9.3.4 → 9.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -31
- package/bin/async.js +3 -0
- package/bin/bin.js +24 -17
- package/bin/commander.js +44 -1
- package/bin/naming.js +10 -2
- package/bin/util.js +88 -11
- package/bin/validate.js +0 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -24,37 +24,40 @@ Either `--url` or `--file` must be provided.
|
|
|
24
24
|
|
|
25
25
|
Type values surrounded in square brackets (`[]`) can be used as used as boolean options (no argument required).
|
|
26
26
|
|
|
27
|
-
| Option
|
|
28
|
-
|
|
|
29
|
-
| --url
|
|
30
|
-
| --file
|
|
31
|
-
| --out-dir
|
|
32
|
-
| --threads
|
|
33
|
-
| --attempts
|
|
34
|
-
| --archive
|
|
35
|
-
| --episode-template
|
|
36
|
-
| --include-meta
|
|
37
|
-
| --include-episode-meta
|
|
38
|
-
| --include-episode-images
|
|
39
|
-
| --
|
|
40
|
-
| --
|
|
41
|
-
| --
|
|
42
|
-
| --
|
|
43
|
-
| --
|
|
44
|
-
| --episode-
|
|
45
|
-
| --episode-
|
|
46
|
-
| --
|
|
47
|
-
| --
|
|
48
|
-
| --
|
|
49
|
-
| --
|
|
50
|
-
| --
|
|
51
|
-
| --
|
|
52
|
-
| --
|
|
53
|
-
| --
|
|
54
|
-
| --
|
|
55
|
-
| --
|
|
56
|
-
| --
|
|
57
|
-
| --
|
|
27
|
+
| Option | Type | Required | Description |
|
|
28
|
+
| ----------------------------- | ------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
29
|
+
| --url | String | true\* | URL to podcast RSS feed. |
|
|
30
|
+
| --file | String | true\* | Path to local RSS file. |
|
|
31
|
+
| --out-dir | String | false | Specify output directory for episodes and metadata. Defaults to "./{{podcast_title}}". See "Template Options" for more details. |
|
|
32
|
+
| --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is 1. |
|
|
33
|
+
| --attempts | Number | false | Sets the number of download attempts per individual file. Default is 3. |
|
|
34
|
+
| --archive | [String] | false | Download or write out items not listed in archive file. Generates archive file at path if not found. Defaults to "./{{podcast_title}}/archive.json" when used as a boolean option. See "Template Options" for more details. |
|
|
35
|
+
| --episode-template | String | false | Template for generating episode related filenames. See "Template Options" for details. |
|
|
36
|
+
| --include-meta | | false | Write out podcast metadata to JSON. |
|
|
37
|
+
| --include-episode-meta | | false | Write out individual episode metadata **to** JSON. |
|
|
38
|
+
| --include-episode-images | | false | Download found episode images. |
|
|
39
|
+
| --include-episode-transcripts | | false | download found episode transcripts. |
|
|
40
|
+
| --offset | Number | false | Offset starting download position. Default is 0. |
|
|
41
|
+
| --limit | Number | false | Max number of episodes to download. Downloads all by default. |
|
|
42
|
+
| --after | String | false | Only download episodes after this date (i.e. MM/DD/YYY, inclusive). |
|
|
43
|
+
| --before | String | false | Only download episodes before this date (i.e. MM/DD/YYY, inclusive) |
|
|
44
|
+
| --episode-regex | String | false | Match episode title against provided regex before starting download. |
|
|
45
|
+
| --episode-digits | Number | false | Minimum number of digits to use for episode numbering (e.g. 3 would generate "001" instead of "1"). Default is 0. |
|
|
46
|
+
| --episode-num-offset | Number | false | Offset the acquired episode number. Default is 0. |
|
|
47
|
+
| --episode-source-order | String | false | Attempted order to extract episode audio URL from RSS feed. Default is "enclosure,link". |
|
|
48
|
+
| --episode-transcript-types | String | false | List of allowed transcript types in preferred order. Default is "application/json,application/x-subrip,application/srr,application/srt,text/vtt,text/html,text/plain". |
|
|
49
|
+
| --add-mp3-metadata | | false | Attempts to add a base level of episode metadata to each episode. Recommended only in cases where the original metadata is of poor quality. (**ffmpeg required**) |
|
|
50
|
+
| --adjust-bitrate | String (e.g. "48k") | false | Attempts to adjust bitrate of episodes. (**ffmpeg required**) |
|
|
51
|
+
| --mono | | false | Attempts to force episodes into mono. (**ffmpeg required**) |
|
|
52
|
+
| --override | | false | Override local files on collision. |
|
|
53
|
+
| --always-postprocess | | false | Always run additional tasks on the file regardless if the file already exists. This includes --add-mp3-metadata, --adjust-bitrate, --mono, and --exec. |
|
|
54
|
+
| --reverse | | false | Reverse download direction and start at last RSS item. |
|
|
55
|
+
| --info | | false | Print retrieved podcast info instead of downloading. |
|
|
56
|
+
| --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported. |
|
|
57
|
+
| --exec | String | false | Execute a command after each episode is downloaded. See "Template Options" for more details. |
|
|
58
|
+
| --parser-config | String | false | Path to JSON file that will be parsed and used to override the default config passed to [rss-parser](https://github.com/rbren/rss-parser#xml-options). |
|
|
59
|
+
| --proxy | | false | Enable proxy support. Specify environment variables listed by [global-agent](https://github.com/gajus/global-agent#environment-variables). |
|
|
60
|
+
| --help | | false | Output usage information. |
|
|
58
61
|
|
|
59
62
|
## Archive
|
|
60
63
|
|
package/bin/async.js
CHANGED
|
@@ -165,6 +165,7 @@ const downloadItemsAsync = async ({
|
|
|
165
165
|
bitrate,
|
|
166
166
|
episodeTemplate,
|
|
167
167
|
episodeDigits,
|
|
168
|
+
episodeNumOffset,
|
|
168
169
|
episodeSourceOrder,
|
|
169
170
|
exec,
|
|
170
171
|
feed,
|
|
@@ -199,6 +200,7 @@ const downloadItemsAsync = async ({
|
|
|
199
200
|
ext: audioFileExt,
|
|
200
201
|
template: episodeTemplate,
|
|
201
202
|
width: episodeDigits,
|
|
203
|
+
offset: episodeNumOffset,
|
|
202
204
|
});
|
|
203
205
|
const outputPodcastPath = _path.resolve(basePath, episodeFilename);
|
|
204
206
|
|
|
@@ -282,6 +284,7 @@ const downloadItemsAsync = async ({
|
|
|
282
284
|
ext: episodeMetaExt,
|
|
283
285
|
template: episodeTemplate,
|
|
284
286
|
width: episodeDigits,
|
|
287
|
+
offset: episodeNumOffset,
|
|
285
288
|
});
|
|
286
289
|
const outputEpisodeMetaPath = _path.resolve(basePath, episodeMetaName);
|
|
287
290
|
|
package/bin/bin.js
CHANGED
|
@@ -32,31 +32,34 @@ import { downloadItemsAsync } from "./async.js";
|
|
|
32
32
|
setupCommander(commander, process.argv);
|
|
33
33
|
|
|
34
34
|
const {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
35
|
+
after,
|
|
36
|
+
alwaysPostprocess,
|
|
37
|
+
attempts,
|
|
38
|
+
before,
|
|
39
39
|
episodeDigits,
|
|
40
|
+
episodeNumOffset,
|
|
41
|
+
episodeRegex,
|
|
40
42
|
episodeSourceOrder,
|
|
41
|
-
|
|
42
|
-
|
|
43
|
+
episodeTemplate,
|
|
44
|
+
episodeTranscriptTypes,
|
|
45
|
+
exec,
|
|
46
|
+
file,
|
|
43
47
|
includeEpisodeImages,
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
after,
|
|
48
|
-
before,
|
|
49
|
-
override,
|
|
50
|
-
alwaysPostprocess,
|
|
51
|
-
reverse,
|
|
48
|
+
includeEpisodeMeta,
|
|
49
|
+
includeEpisodeTranscripts,
|
|
50
|
+
includeMeta,
|
|
52
51
|
info,
|
|
52
|
+
limit,
|
|
53
53
|
list,
|
|
54
|
-
exec,
|
|
55
54
|
mono,
|
|
56
|
-
|
|
57
|
-
|
|
55
|
+
offset,
|
|
56
|
+
outDir,
|
|
57
|
+
override,
|
|
58
58
|
parserConfig,
|
|
59
59
|
proxy,
|
|
60
|
+
reverse,
|
|
61
|
+
threads,
|
|
62
|
+
url,
|
|
60
63
|
addMp3Metadata: addMp3MetadataFlag,
|
|
61
64
|
adjustBitrate: bitrate,
|
|
62
65
|
} = commander;
|
|
@@ -206,10 +209,13 @@ const main = async () => {
|
|
|
206
209
|
after,
|
|
207
210
|
before,
|
|
208
211
|
episodeDigits,
|
|
212
|
+
episodeNumOffset,
|
|
209
213
|
episodeRegex,
|
|
210
214
|
episodeSourceOrder,
|
|
211
215
|
episodeTemplate,
|
|
212
216
|
includeEpisodeImages,
|
|
217
|
+
includeEpisodeTranscripts,
|
|
218
|
+
episodeTranscriptTypes,
|
|
213
219
|
});
|
|
214
220
|
|
|
215
221
|
if (!targetItems.length) {
|
|
@@ -229,6 +235,7 @@ const main = async () => {
|
|
|
229
235
|
bitrate,
|
|
230
236
|
episodeTemplate,
|
|
231
237
|
episodeDigits,
|
|
238
|
+
episodeNumOffset,
|
|
232
239
|
episodeSourceOrder,
|
|
233
240
|
exec,
|
|
234
241
|
feed,
|
package/bin/commander.js
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
AUDIO_ORDER_TYPES,
|
|
3
|
+
ITEM_LIST_FORMATS,
|
|
4
|
+
TRANSCRIPT_TYPES,
|
|
5
|
+
} from "./util.js";
|
|
2
6
|
import { createParseNumber, hasFfmpeg } from "./validate.js";
|
|
3
7
|
import { logErrorAndExit } from "./logger.js";
|
|
4
8
|
|
|
@@ -26,6 +30,16 @@ export const setupCommander = (commander, argv) => {
|
|
|
26
30
|
createParseNumber({ min: 0, name: "--episode-digits" }),
|
|
27
31
|
1
|
|
28
32
|
)
|
|
33
|
+
.option(
|
|
34
|
+
"--episode-num-offset <number>",
|
|
35
|
+
"offset the acquired episode number",
|
|
36
|
+
createParseNumber({
|
|
37
|
+
min: Number.MIN_SAFE_INTEGER,
|
|
38
|
+
max: Number.MAX_SAFE_INTEGER,
|
|
39
|
+
name: "--episode-num-offset",
|
|
40
|
+
}),
|
|
41
|
+
0
|
|
42
|
+
)
|
|
29
43
|
.option(
|
|
30
44
|
"--episode-source-order <string>",
|
|
31
45
|
"attempted order to extract episode audio URL from rss feed",
|
|
@@ -48,6 +62,35 @@ export const setupCommander = (commander, argv) => {
|
|
|
48
62
|
"--include-episode-meta",
|
|
49
63
|
"write out individual episode metadata to json"
|
|
50
64
|
)
|
|
65
|
+
.option(
|
|
66
|
+
"--include-episode-transcripts",
|
|
67
|
+
"download found episode transcripts"
|
|
68
|
+
)
|
|
69
|
+
.option(
|
|
70
|
+
"--episode-transcript-types <string>",
|
|
71
|
+
"list of allowed transcript types in preferred order",
|
|
72
|
+
(value) => {
|
|
73
|
+
const parsed = value.split(",").map((type) => type.trim());
|
|
74
|
+
const isValid = parsed.every((type) => !!TRANSCRIPT_TYPES[type]);
|
|
75
|
+
|
|
76
|
+
if (!isValid) {
|
|
77
|
+
logErrorAndExit(
|
|
78
|
+
`Invalid type found in --transcript-types: ${value}\n`
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return parsed;
|
|
83
|
+
},
|
|
84
|
+
[
|
|
85
|
+
TRANSCRIPT_TYPES["application/json"],
|
|
86
|
+
TRANSCRIPT_TYPES["application/x-subrip"],
|
|
87
|
+
TRANSCRIPT_TYPES["application/srr"],
|
|
88
|
+
TRANSCRIPT_TYPES["application/srt"],
|
|
89
|
+
TRANSCRIPT_TYPES["text/vtt"],
|
|
90
|
+
TRANSCRIPT_TYPES["text/html"],
|
|
91
|
+
TRANSCRIPT_TYPES["text/plain"],
|
|
92
|
+
]
|
|
93
|
+
)
|
|
51
94
|
.option("--include-episode-images", "download found episode images")
|
|
52
95
|
.option(
|
|
53
96
|
"--offset <number>",
|
package/bin/naming.js
CHANGED
|
@@ -18,8 +18,16 @@ const getSimpleFilename = (name, ext = "") => {
|
|
|
18
18
|
return `${getSafeName(name, MAX_LENGTH_FILENAME - (ext?.length ?? 0))}${ext}`;
|
|
19
19
|
};
|
|
20
20
|
|
|
21
|
-
const getItemFilename = ({
|
|
22
|
-
|
|
21
|
+
const getItemFilename = ({
|
|
22
|
+
item,
|
|
23
|
+
ext,
|
|
24
|
+
url,
|
|
25
|
+
feed,
|
|
26
|
+
template,
|
|
27
|
+
width,
|
|
28
|
+
offset = 0,
|
|
29
|
+
}) => {
|
|
30
|
+
const episodeNum = feed.items.length - item._originalIndex + offset;
|
|
23
31
|
const formattedPubDate = item.pubDate
|
|
24
32
|
? dayjs(new Date(item.pubDate)).format("YYYYMMDD")
|
|
25
33
|
: null;
|
package/bin/util.js
CHANGED
|
@@ -11,11 +11,18 @@ import { getArchiveFilename, getItemFilename } from "./naming.js";
|
|
|
11
11
|
const execWithPromise = util.promisify(exec);
|
|
12
12
|
const isWin = process.platform === "win32";
|
|
13
13
|
|
|
14
|
+
const defaultRssParserConfig = {
|
|
15
|
+
defaultRSS: 2.0,
|
|
16
|
+
customFields: {
|
|
17
|
+
item: [["podcast:transcript", "podcastTranscripts", { keepArray: true }]],
|
|
18
|
+
},
|
|
19
|
+
};
|
|
20
|
+
|
|
14
21
|
/*
|
|
15
22
|
Escape arguments for a shell command used with exec.
|
|
16
23
|
Borrowed from shell-escape: https://github.com/xxorax/node-shell-escape/
|
|
17
24
|
Additionally, @see https://www.robvanderwoude.com/escapechars.php for why
|
|
18
|
-
we avoid trying
|
|
25
|
+
we avoid trying to escape complex sequences in Windows.
|
|
19
26
|
*/
|
|
20
27
|
const escapeArgForShell = (arg) => {
|
|
21
28
|
let result = arg;
|
|
@@ -154,10 +161,13 @@ const getItemsToDownload = ({
|
|
|
154
161
|
before,
|
|
155
162
|
after,
|
|
156
163
|
episodeDigits,
|
|
164
|
+
episodeNumOffset,
|
|
157
165
|
episodeRegex,
|
|
158
166
|
episodeSourceOrder,
|
|
159
167
|
episodeTemplate,
|
|
160
168
|
includeEpisodeImages,
|
|
169
|
+
includeEpisodeTranscripts,
|
|
170
|
+
episodeTranscriptTypes,
|
|
161
171
|
}) => {
|
|
162
172
|
const { startIndex, shouldGo, next } = getLoopControls({
|
|
163
173
|
offset,
|
|
@@ -243,6 +253,7 @@ const getItemsToDownload = ({
|
|
|
243
253
|
ext: episodeImageFileExt,
|
|
244
254
|
template: episodeTemplate,
|
|
245
255
|
width: episodeDigits,
|
|
256
|
+
offset: episodeNumOffset,
|
|
246
257
|
});
|
|
247
258
|
|
|
248
259
|
const outputImagePath = path.resolve(basePath, episodeImageName);
|
|
@@ -254,6 +265,46 @@ const getItemsToDownload = ({
|
|
|
254
265
|
}
|
|
255
266
|
}
|
|
256
267
|
|
|
268
|
+
if (includeEpisodeTranscripts) {
|
|
269
|
+
const episodeTranscriptUrl = getTranscriptUrl(
|
|
270
|
+
item,
|
|
271
|
+
episodeTranscriptTypes
|
|
272
|
+
);
|
|
273
|
+
|
|
274
|
+
if (episodeTranscriptUrl) {
|
|
275
|
+
const episodeTranscriptFileExt = getUrlExt(episodeTranscriptUrl);
|
|
276
|
+
const episodeTranscriptArchiveKey = getArchiveKey({
|
|
277
|
+
prefix: archiveUrl,
|
|
278
|
+
name: getArchiveFilename({
|
|
279
|
+
pubDate,
|
|
280
|
+
name: title,
|
|
281
|
+
ext: episodeTranscriptFileExt,
|
|
282
|
+
}),
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
const episodeTranscriptName = getItemFilename({
|
|
286
|
+
item,
|
|
287
|
+
feed,
|
|
288
|
+
url: episodeAudioUrl,
|
|
289
|
+
ext: episodeTranscriptFileExt,
|
|
290
|
+
template: episodeTemplate,
|
|
291
|
+
width: episodeDigits,
|
|
292
|
+
offset: episodeNumOffset,
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
const outputTranscriptPath = path.resolve(
|
|
296
|
+
basePath,
|
|
297
|
+
episodeTranscriptName
|
|
298
|
+
);
|
|
299
|
+
|
|
300
|
+
item._extra_downloads.push({
|
|
301
|
+
url: episodeTranscriptUrl,
|
|
302
|
+
outputPath: outputTranscriptPath,
|
|
303
|
+
key: episodeTranscriptArchiveKey,
|
|
304
|
+
});
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
257
308
|
items.push(item);
|
|
258
309
|
}
|
|
259
310
|
|
|
@@ -472,12 +523,40 @@ const getImageUrl = ({ image, itunes }) => {
|
|
|
472
523
|
return null;
|
|
473
524
|
};
|
|
474
525
|
|
|
475
|
-
const
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
526
|
+
export const TRANSCRIPT_TYPES = {
|
|
527
|
+
"application/json": "application/json",
|
|
528
|
+
"application/srr": "application/srr",
|
|
529
|
+
"application/srt": "application/srt",
|
|
530
|
+
"application/x-subrip": "application/x-subrip",
|
|
531
|
+
"text/html": "text/html",
|
|
532
|
+
"text/plain": "text/plain",
|
|
533
|
+
"text/vtt": "text/vtt",
|
|
534
|
+
};
|
|
535
|
+
|
|
536
|
+
// @see https://github.com/Podcastindex-org/podcast-namespace/blob/main/docs/1.0.md#transcript
|
|
537
|
+
const getTranscriptUrl = (item, transcriptTypes = []) => {
|
|
538
|
+
if (!item.podcastTranscripts?.length) {
|
|
539
|
+
return null;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
for (const transcriptType of transcriptTypes) {
|
|
543
|
+
const matchingTranscriptType = item.podcastTranscripts.find(
|
|
544
|
+
(transcript) =>
|
|
545
|
+
!!transcript?.["$"]?.url && transcript?.["$"]?.type === transcriptType
|
|
546
|
+
);
|
|
547
|
+
|
|
548
|
+
if (matchingTranscriptType) {
|
|
549
|
+
return matchingTranscriptType?.["$"]?.url;
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
return null;
|
|
554
|
+
};
|
|
479
555
|
|
|
480
|
-
|
|
556
|
+
const getFileFeed = async (filePath, parserConfig) => {
|
|
557
|
+
const config = parserConfig
|
|
558
|
+
? getJsonFile(parserConfig)
|
|
559
|
+
: defaultRssParserConfig;
|
|
481
560
|
const rssString = getFileString(filePath);
|
|
482
561
|
|
|
483
562
|
if (parserConfig && !config) {
|
|
@@ -497,11 +576,9 @@ const getFileFeed = async (filePath, parserConfig) => {
|
|
|
497
576
|
};
|
|
498
577
|
|
|
499
578
|
const getUrlFeed = async (url, parserConfig) => {
|
|
500
|
-
const
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
const config = parserConfig ? getJsonFile(parserConfig) : defaultConfig;
|
|
579
|
+
const config = parserConfig
|
|
580
|
+
? getJsonFile(parserConfig)
|
|
581
|
+
: defaultRssParserConfig;
|
|
505
582
|
|
|
506
583
|
if (parserConfig && !config) {
|
|
507
584
|
logErrorAndExit(`Unable to load parser config: ${parserConfig}`);
|
package/bin/validate.js
CHANGED