podcast-dl 9.4.0 → 9.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -32
- package/bin/bin.js +4 -0
- package/bin/commander.js +34 -1
- package/bin/util.js +85 -10
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -24,38 +24,40 @@ Either `--url` or `--file` must be provided.
|
|
|
24
24
|
|
|
25
25
|
Type values surrounded in square brackets (`[]`) can be used as used as boolean options (no argument required).
|
|
26
26
|
|
|
27
|
-
| Option
|
|
28
|
-
|
|
|
29
|
-
| --url
|
|
30
|
-
| --file
|
|
31
|
-
| --out-dir
|
|
32
|
-
| --threads
|
|
33
|
-
| --attempts
|
|
34
|
-
| --archive
|
|
35
|
-
| --episode-template
|
|
36
|
-
| --include-meta
|
|
37
|
-
| --include-episode-meta
|
|
38
|
-
| --include-episode-images
|
|
39
|
-
| --
|
|
40
|
-
| --
|
|
41
|
-
| --
|
|
42
|
-
| --
|
|
43
|
-
| --
|
|
44
|
-
| --episode-
|
|
45
|
-
| --episode-
|
|
46
|
-
| --episode-
|
|
47
|
-
| --
|
|
48
|
-
| --
|
|
49
|
-
| --
|
|
50
|
-
| --
|
|
51
|
-
| --
|
|
52
|
-
| --
|
|
53
|
-
| --
|
|
54
|
-
| --
|
|
55
|
-
| --
|
|
56
|
-
| --
|
|
57
|
-
| --
|
|
58
|
-
| --
|
|
27
|
+
| Option | Type | Required | Description |
|
|
28
|
+
| ----------------------------- | ------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
29
|
+
| --url | String | true\* | URL to podcast RSS feed. |
|
|
30
|
+
| --file | String | true\* | Path to local RSS file. |
|
|
31
|
+
| --out-dir | String | false | Specify output directory for episodes and metadata. Defaults to "./{{podcast_title}}". See "Template Options" for more details. |
|
|
32
|
+
| --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is 1. |
|
|
33
|
+
| --attempts | Number | false | Sets the number of download attempts per individual file. Default is 3. |
|
|
34
|
+
| --archive | [String] | false | Download or write out items not listed in archive file. Generates archive file at path if not found. Defaults to "./{{podcast_title}}/archive.json" when used as a boolean option. See "Template Options" for more details. |
|
|
35
|
+
| --episode-template | String | false | Template for generating episode related filenames. See "Template Options" for details. |
|
|
36
|
+
| --include-meta | | false | Write out podcast metadata to JSON. |
|
|
37
|
+
| --include-episode-meta | | false | Write out individual episode metadata **to** JSON. |
|
|
38
|
+
| --include-episode-images | | false | Download found episode images. |
|
|
39
|
+
| --include-episode-transcripts | | false | download found episode transcripts. |
|
|
40
|
+
| --offset | Number | false | Offset starting download position. Default is 0. |
|
|
41
|
+
| --limit | Number | false | Max number of episodes to download. Downloads all by default. |
|
|
42
|
+
| --after | String | false | Only download episodes after this date (i.e. MM/DD/YYY, inclusive). |
|
|
43
|
+
| --before | String | false | Only download episodes before this date (i.e. MM/DD/YYY, inclusive) |
|
|
44
|
+
| --episode-regex | String | false | Match episode title against provided regex before starting download. |
|
|
45
|
+
| --episode-digits | Number | false | Minimum number of digits to use for episode numbering (e.g. 3 would generate "001" instead of "1"). Default is 0. |
|
|
46
|
+
| --episode-num-offset | Number | false | Offset the acquired episode number. Default is 0. |
|
|
47
|
+
| --episode-source-order | String | false | Attempted order to extract episode audio URL from RSS feed. Default is "enclosure,link". |
|
|
48
|
+
| --episode-transcript-types | String | false | List of allowed transcript types in preferred order. Default is "application/json,application/x-subrip,application/srr,application/srt,text/vtt,text/html,text/plain". |
|
|
49
|
+
| --add-mp3-metadata | | false | Attempts to add a base level of episode metadata to each episode. Recommended only in cases where the original metadata is of poor quality. (**ffmpeg required**) |
|
|
50
|
+
| --adjust-bitrate | String (e.g. "48k") | false | Attempts to adjust bitrate of episodes. (**ffmpeg required**) |
|
|
51
|
+
| --mono | | false | Attempts to force episodes into mono. (**ffmpeg required**) |
|
|
52
|
+
| --override | | false | Override local files on collision. |
|
|
53
|
+
| --always-postprocess | | false | Always run additional tasks on the file regardless if the file already exists. This includes --add-mp3-metadata, --adjust-bitrate, --mono, and --exec. |
|
|
54
|
+
| --reverse | | false | Reverse download direction and start at last RSS item. |
|
|
55
|
+
| --info | | false | Print retrieved podcast info instead of downloading. |
|
|
56
|
+
| --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported. |
|
|
57
|
+
| --exec | String | false | Execute a command after each episode is downloaded. See "Template Options" for more details. |
|
|
58
|
+
| --parser-config | String | false | Path to JSON file that will be parsed and used to override the default config passed to [rss-parser](https://github.com/rbren/rss-parser#xml-options). |
|
|
59
|
+
| --proxy | | false | Enable proxy support. Specify environment variables listed by [global-agent](https://github.com/gajus/global-agent#environment-variables). |
|
|
60
|
+
| --help | | false | Output usage information. |
|
|
59
61
|
|
|
60
62
|
## Archive
|
|
61
63
|
|
package/bin/bin.js
CHANGED
|
@@ -41,10 +41,12 @@ const {
|
|
|
41
41
|
episodeRegex,
|
|
42
42
|
episodeSourceOrder,
|
|
43
43
|
episodeTemplate,
|
|
44
|
+
episodeTranscriptTypes,
|
|
44
45
|
exec,
|
|
45
46
|
file,
|
|
46
47
|
includeEpisodeImages,
|
|
47
48
|
includeEpisodeMeta,
|
|
49
|
+
includeEpisodeTranscripts,
|
|
48
50
|
includeMeta,
|
|
49
51
|
info,
|
|
50
52
|
limit,
|
|
@@ -212,6 +214,8 @@ const main = async () => {
|
|
|
212
214
|
episodeSourceOrder,
|
|
213
215
|
episodeTemplate,
|
|
214
216
|
includeEpisodeImages,
|
|
217
|
+
includeEpisodeTranscripts,
|
|
218
|
+
episodeTranscriptTypes,
|
|
215
219
|
});
|
|
216
220
|
|
|
217
221
|
if (!targetItems.length) {
|
package/bin/commander.js
CHANGED
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import {
|
|
2
|
+
AUDIO_ORDER_TYPES,
|
|
3
|
+
ITEM_LIST_FORMATS,
|
|
4
|
+
TRANSCRIPT_TYPES,
|
|
5
|
+
} from "./util.js";
|
|
2
6
|
import { createParseNumber, hasFfmpeg } from "./validate.js";
|
|
3
7
|
import { logErrorAndExit } from "./logger.js";
|
|
4
8
|
|
|
@@ -58,6 +62,35 @@ export const setupCommander = (commander, argv) => {
|
|
|
58
62
|
"--include-episode-meta",
|
|
59
63
|
"write out individual episode metadata to json"
|
|
60
64
|
)
|
|
65
|
+
.option(
|
|
66
|
+
"--include-episode-transcripts",
|
|
67
|
+
"download found episode transcripts"
|
|
68
|
+
)
|
|
69
|
+
.option(
|
|
70
|
+
"--episode-transcript-types <string>",
|
|
71
|
+
"list of allowed transcript types in preferred order",
|
|
72
|
+
(value) => {
|
|
73
|
+
const parsed = value.split(",").map((type) => type.trim());
|
|
74
|
+
const isValid = parsed.every((type) => !!TRANSCRIPT_TYPES[type]);
|
|
75
|
+
|
|
76
|
+
if (!isValid) {
|
|
77
|
+
logErrorAndExit(
|
|
78
|
+
`Invalid type found in --transcript-types: ${value}\n`
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
return parsed;
|
|
83
|
+
},
|
|
84
|
+
[
|
|
85
|
+
TRANSCRIPT_TYPES["application/json"],
|
|
86
|
+
TRANSCRIPT_TYPES["application/x-subrip"],
|
|
87
|
+
TRANSCRIPT_TYPES["application/srr"],
|
|
88
|
+
TRANSCRIPT_TYPES["application/srt"],
|
|
89
|
+
TRANSCRIPT_TYPES["text/vtt"],
|
|
90
|
+
TRANSCRIPT_TYPES["text/html"],
|
|
91
|
+
TRANSCRIPT_TYPES["text/plain"],
|
|
92
|
+
]
|
|
93
|
+
)
|
|
61
94
|
.option("--include-episode-images", "download found episode images")
|
|
62
95
|
.option(
|
|
63
96
|
"--offset <number>",
|
package/bin/util.js
CHANGED
|
@@ -11,6 +11,13 @@ import { getArchiveFilename, getItemFilename } from "./naming.js";
|
|
|
11
11
|
const execWithPromise = util.promisify(exec);
|
|
12
12
|
const isWin = process.platform === "win32";
|
|
13
13
|
|
|
14
|
+
const defaultRssParserConfig = {
|
|
15
|
+
defaultRSS: 2.0,
|
|
16
|
+
customFields: {
|
|
17
|
+
item: [["podcast:transcript", "podcastTranscripts", { keepArray: true }]],
|
|
18
|
+
},
|
|
19
|
+
};
|
|
20
|
+
|
|
14
21
|
/*
|
|
15
22
|
Escape arguments for a shell command used with exec.
|
|
16
23
|
Borrowed from shell-escape: https://github.com/xxorax/node-shell-escape/
|
|
@@ -159,6 +166,8 @@ const getItemsToDownload = ({
|
|
|
159
166
|
episodeSourceOrder,
|
|
160
167
|
episodeTemplate,
|
|
161
168
|
includeEpisodeImages,
|
|
169
|
+
includeEpisodeTranscripts,
|
|
170
|
+
episodeTranscriptTypes,
|
|
162
171
|
}) => {
|
|
163
172
|
const { startIndex, shouldGo, next } = getLoopControls({
|
|
164
173
|
offset,
|
|
@@ -256,6 +265,46 @@ const getItemsToDownload = ({
|
|
|
256
265
|
}
|
|
257
266
|
}
|
|
258
267
|
|
|
268
|
+
if (includeEpisodeTranscripts) {
|
|
269
|
+
const episodeTranscriptUrl = getTranscriptUrl(
|
|
270
|
+
item,
|
|
271
|
+
episodeTranscriptTypes
|
|
272
|
+
);
|
|
273
|
+
|
|
274
|
+
if (episodeTranscriptUrl) {
|
|
275
|
+
const episodeTranscriptFileExt = getUrlExt(episodeTranscriptUrl);
|
|
276
|
+
const episodeTranscriptArchiveKey = getArchiveKey({
|
|
277
|
+
prefix: archiveUrl,
|
|
278
|
+
name: getArchiveFilename({
|
|
279
|
+
pubDate,
|
|
280
|
+
name: title,
|
|
281
|
+
ext: episodeTranscriptFileExt,
|
|
282
|
+
}),
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
const episodeTranscriptName = getItemFilename({
|
|
286
|
+
item,
|
|
287
|
+
feed,
|
|
288
|
+
url: episodeAudioUrl,
|
|
289
|
+
ext: episodeTranscriptFileExt,
|
|
290
|
+
template: episodeTemplate,
|
|
291
|
+
width: episodeDigits,
|
|
292
|
+
offset: episodeNumOffset,
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
const outputTranscriptPath = path.resolve(
|
|
296
|
+
basePath,
|
|
297
|
+
episodeTranscriptName
|
|
298
|
+
);
|
|
299
|
+
|
|
300
|
+
item._extra_downloads.push({
|
|
301
|
+
url: episodeTranscriptUrl,
|
|
302
|
+
outputPath: outputTranscriptPath,
|
|
303
|
+
key: episodeTranscriptArchiveKey,
|
|
304
|
+
});
|
|
305
|
+
}
|
|
306
|
+
}
|
|
307
|
+
|
|
259
308
|
items.push(item);
|
|
260
309
|
}
|
|
261
310
|
|
|
@@ -474,12 +523,40 @@ const getImageUrl = ({ image, itunes }) => {
|
|
|
474
523
|
return null;
|
|
475
524
|
};
|
|
476
525
|
|
|
477
|
-
const
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
526
|
+
export const TRANSCRIPT_TYPES = {
|
|
527
|
+
"application/json": "application/json",
|
|
528
|
+
"application/srr": "application/srr",
|
|
529
|
+
"application/srt": "application/srt",
|
|
530
|
+
"application/x-subrip": "application/x-subrip",
|
|
531
|
+
"text/html": "text/html",
|
|
532
|
+
"text/plain": "text/plain",
|
|
533
|
+
"text/vtt": "text/vtt",
|
|
534
|
+
};
|
|
535
|
+
|
|
536
|
+
// @see https://github.com/Podcastindex-org/podcast-namespace/blob/main/docs/1.0.md#transcript
|
|
537
|
+
const getTranscriptUrl = (item, transcriptTypes = []) => {
|
|
538
|
+
if (!item.podcastTranscripts?.length) {
|
|
539
|
+
return null;
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
for (const transcriptType of transcriptTypes) {
|
|
543
|
+
const matchingTranscriptType = item.podcastTranscripts.find(
|
|
544
|
+
(transcript) =>
|
|
545
|
+
!!transcript?.["$"]?.url && transcript?.["$"]?.type === transcriptType
|
|
546
|
+
);
|
|
547
|
+
|
|
548
|
+
if (matchingTranscriptType) {
|
|
549
|
+
return matchingTranscriptType?.["$"]?.url;
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
return null;
|
|
554
|
+
};
|
|
481
555
|
|
|
482
|
-
|
|
556
|
+
const getFileFeed = async (filePath, parserConfig) => {
|
|
557
|
+
const config = parserConfig
|
|
558
|
+
? getJsonFile(parserConfig)
|
|
559
|
+
: defaultRssParserConfig;
|
|
483
560
|
const rssString = getFileString(filePath);
|
|
484
561
|
|
|
485
562
|
if (parserConfig && !config) {
|
|
@@ -499,11 +576,9 @@ const getFileFeed = async (filePath, parserConfig) => {
|
|
|
499
576
|
};
|
|
500
577
|
|
|
501
578
|
const getUrlFeed = async (url, parserConfig) => {
|
|
502
|
-
const
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
const config = parserConfig ? getJsonFile(parserConfig) : defaultConfig;
|
|
579
|
+
const config = parserConfig
|
|
580
|
+
? getJsonFile(parserConfig)
|
|
581
|
+
: defaultRssParserConfig;
|
|
507
582
|
|
|
508
583
|
if (parserConfig && !config) {
|
|
509
584
|
logErrorAndExit(`Unable to load parser config: ${parserConfig}`);
|