podcast-dl 9.4.0 → 9.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,38 +24,40 @@ Either `--url` or `--file` must be provided.
24
24
 
25
25
  Type values surrounded in square brackets (`[]`) can be used as used as boolean options (no argument required).
26
26
 
27
- | Option | Type | Required | Description |
28
- | ------------------------ | ------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
29
- | --url | String | true\* | URL to podcast RSS feed. |
30
- | --file | String | true\* | Path to local RSS file. |
31
- | --out-dir | String | false | Specify output directory for episodes and metadata. Defaults to "./{{podcast_title}}". See "Template Options" for more details. |
32
- | --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is 1. |
33
- | --attempts | Number | false | Sets the number of download attempts per individual file. Default is 3. |
34
- | --archive | [String] | false | Download or write out items not listed in archive file. Generates archive file at path if not found. Defaults to "./{{podcast_title}}/archive.json" when used as a boolean option. See "Template Options" for more details. |
35
- | --episode-template | String | false | Template for generating episode related filenames. See "Template Options" for details. |
36
- | --include-meta | | false | Write out podcast metadata to JSON. |
37
- | --include-episode-meta | | false | Write out individual episode metadata to JSON. |
38
- | --include-episode-images | | false | Download found episode images. |
39
- | --offset | Number | false | Offset starting download position. Default is 0. |
40
- | --limit | Number | false | Max number of episodes to download. Downloads all by default. |
41
- | --after | String | false | Only download episodes after this date (i.e. MM/DD/YYY, inclusive). |
42
- | --before | String | false | Only download episodes before this date (i.e. MM/DD/YYY, inclusive) |
43
- | --episode-regex | String | false | Match episode title against provided regex before starting download. |
44
- | --episode-digits | Number | false | Minimum number of digits to use for episode numbering (e.g. 3 would generate "001" instead of "1"). Default is 0. |
45
- | --episode-num-offset | Number | false | Offset the acquired episode number. Default is 0. |
46
- | --episode-source-order | String | false | Attempted order to extract episode audio URL from RSS feed. Default is "enclosure,link". |
47
- | --add-mp3-metadata | | false | Attempts to add a base level of episode metadata to each episode. Recommended only in cases where the original metadata is of poor quality. (**ffmpeg required**) |
48
- | --adjust-bitrate | String (e.g. "48k") | false | Attempts to adjust bitrate of episodes. (**ffmpeg required**) |
49
- | --mono | | false | Attempts to force episodes into mono. (**ffmpeg required**) |
50
- | --override | | false | Override local files on collision. |
51
- | --always-postprocess | | false | Always run additional tasks on the file regardless if the file already exists. This includes --add-mp3-metadata, --adjust-bitrate, --mono, and --exec. |
52
- | --reverse | | false | Reverse download direction and start at last RSS item. |
53
- | --info | | false | Print retrieved podcast info instead of downloading. |
54
- | --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported. |
55
- | --exec | String | false | Execute a command after each episode is downloaded. See "Template Options" for more details. |
56
- | --parser-config | String | false | Path to JSON file that will be parsed and used to override the default config passed to [rss-parser](https://github.com/rbren/rss-parser#xml-options). |
57
- | --proxy | | false | Enable proxy support. Specify environment variables listed by [global-agent](https://github.com/gajus/global-agent#environment-variables). |
58
- | --help | | false | Output usage information. |
27
+ | Option | Type | Required | Description |
28
+ | ----------------------------- | ------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
29
+ | --url | String | true\* | URL to podcast RSS feed. |
30
+ | --file | String | true\* | Path to local RSS file. |
31
+ | --out-dir | String | false | Specify output directory for episodes and metadata. Defaults to "./{{podcast_title}}". See "Template Options" for more details. |
32
+ | --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is 1. |
33
+ | --attempts | Number | false | Sets the number of download attempts per individual file. Default is 3. |
34
+ | --archive | [String] | false | Download or write out items not listed in archive file. Generates archive file at path if not found. Defaults to "./{{podcast_title}}/archive.json" when used as a boolean option. See "Template Options" for more details. |
35
+ | --episode-template | String | false | Template for generating episode related filenames. See "Template Options" for details. |
36
+ | --include-meta | | false | Write out podcast metadata to JSON. |
37
+ | --include-episode-meta | | false | Write out individual episode metadata **to** JSON. |
38
+ | --include-episode-images | | false | Download found episode images. |
39
+ | --include-episode-transcripts | | false | download found episode transcripts. |
40
+ | --offset | Number | false | Offset starting download position. Default is 0. |
41
+ | --limit | Number | false | Max number of episodes to download. Downloads all by default. |
42
+ | --after | String | false | Only download episodes after this date (i.e. MM/DD/YYY, inclusive). |
43
+ | --before | String | false | Only download episodes before this date (i.e. MM/DD/YYY, inclusive) |
44
+ | --episode-regex | String | false | Match episode title against provided regex before starting download. |
45
+ | --episode-digits | Number | false | Minimum number of digits to use for episode numbering (e.g. 3 would generate "001" instead of "1"). Default is 0. |
46
+ | --episode-num-offset | Number | false | Offset the acquired episode number. Default is 0. |
47
+ | --episode-source-order | String | false | Attempted order to extract episode audio URL from RSS feed. Default is "enclosure,link". |
48
+ | --episode-transcript-types | String | false | List of allowed transcript types in preferred order. Default is "application/json,application/x-subrip,application/srr,application/srt,text/vtt,text/html,text/plain". |
49
+ | --add-mp3-metadata | | false | Attempts to add a base level of episode metadata to each episode. Recommended only in cases where the original metadata is of poor quality. (**ffmpeg required**) |
50
+ | --adjust-bitrate | String (e.g. "48k") | false | Attempts to adjust bitrate of episodes. (**ffmpeg required**) |
51
+ | --mono | | false | Attempts to force episodes into mono. (**ffmpeg required**) |
52
+ | --override | | false | Override local files on collision. |
53
+ | --always-postprocess | | false | Always run additional tasks on the file regardless if the file already exists. This includes --add-mp3-metadata, --adjust-bitrate, --mono, and --exec. |
54
+ | --reverse | | false | Reverse download direction and start at last RSS item. |
55
+ | --info | | false | Print retrieved podcast info instead of downloading. |
56
+ | --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported. |
57
+ | --exec | String | false | Execute a command after each episode is downloaded. See "Template Options" for more details. |
58
+ | --parser-config | String | false | Path to JSON file that will be parsed and used to override the default config passed to [rss-parser](https://github.com/rbren/rss-parser#xml-options). |
59
+ | --proxy | | false | Enable proxy support. Specify environment variables listed by [global-agent](https://github.com/gajus/global-agent#environment-variables). |
60
+ | --help | | false | Output usage information. |
59
61
 
60
62
  ## Archive
61
63
 
package/bin/bin.js CHANGED
@@ -41,10 +41,12 @@ const {
41
41
  episodeRegex,
42
42
  episodeSourceOrder,
43
43
  episodeTemplate,
44
+ episodeTranscriptTypes,
44
45
  exec,
45
46
  file,
46
47
  includeEpisodeImages,
47
48
  includeEpisodeMeta,
49
+ includeEpisodeTranscripts,
48
50
  includeMeta,
49
51
  info,
50
52
  limit,
@@ -212,6 +214,8 @@ const main = async () => {
212
214
  episodeSourceOrder,
213
215
  episodeTemplate,
214
216
  includeEpisodeImages,
217
+ includeEpisodeTranscripts,
218
+ episodeTranscriptTypes,
215
219
  });
216
220
 
217
221
  if (!targetItems.length) {
package/bin/commander.js CHANGED
@@ -1,4 +1,8 @@
1
- import { AUDIO_ORDER_TYPES, ITEM_LIST_FORMATS } from "./util.js";
1
+ import {
2
+ AUDIO_ORDER_TYPES,
3
+ ITEM_LIST_FORMATS,
4
+ TRANSCRIPT_TYPES,
5
+ } from "./util.js";
2
6
  import { createParseNumber, hasFfmpeg } from "./validate.js";
3
7
  import { logErrorAndExit } from "./logger.js";
4
8
 
@@ -58,6 +62,35 @@ export const setupCommander = (commander, argv) => {
58
62
  "--include-episode-meta",
59
63
  "write out individual episode metadata to json"
60
64
  )
65
+ .option(
66
+ "--include-episode-transcripts",
67
+ "download found episode transcripts"
68
+ )
69
+ .option(
70
+ "--episode-transcript-types <string>",
71
+ "list of allowed transcript types in preferred order",
72
+ (value) => {
73
+ const parsed = value.split(",").map((type) => type.trim());
74
+ const isValid = parsed.every((type) => !!TRANSCRIPT_TYPES[type]);
75
+
76
+ if (!isValid) {
77
+ logErrorAndExit(
78
+ `Invalid type found in --transcript-types: ${value}\n`
79
+ );
80
+ }
81
+
82
+ return parsed;
83
+ },
84
+ [
85
+ TRANSCRIPT_TYPES["application/json"],
86
+ TRANSCRIPT_TYPES["application/x-subrip"],
87
+ TRANSCRIPT_TYPES["application/srr"],
88
+ TRANSCRIPT_TYPES["application/srt"],
89
+ TRANSCRIPT_TYPES["text/vtt"],
90
+ TRANSCRIPT_TYPES["text/html"],
91
+ TRANSCRIPT_TYPES["text/plain"],
92
+ ]
93
+ )
61
94
  .option("--include-episode-images", "download found episode images")
62
95
  .option(
63
96
  "--offset <number>",
package/bin/util.js CHANGED
@@ -11,6 +11,13 @@ import { getArchiveFilename, getItemFilename } from "./naming.js";
11
11
  const execWithPromise = util.promisify(exec);
12
12
  const isWin = process.platform === "win32";
13
13
 
14
+ const defaultRssParserConfig = {
15
+ defaultRSS: 2.0,
16
+ customFields: {
17
+ item: [["podcast:transcript", "podcastTranscripts", { keepArray: true }]],
18
+ },
19
+ };
20
+
14
21
  /*
15
22
  Escape arguments for a shell command used with exec.
16
23
  Borrowed from shell-escape: https://github.com/xxorax/node-shell-escape/
@@ -159,6 +166,8 @@ const getItemsToDownload = ({
159
166
  episodeSourceOrder,
160
167
  episodeTemplate,
161
168
  includeEpisodeImages,
169
+ includeEpisodeTranscripts,
170
+ episodeTranscriptTypes,
162
171
  }) => {
163
172
  const { startIndex, shouldGo, next } = getLoopControls({
164
173
  offset,
@@ -256,6 +265,46 @@ const getItemsToDownload = ({
256
265
  }
257
266
  }
258
267
 
268
+ if (includeEpisodeTranscripts) {
269
+ const episodeTranscriptUrl = getTranscriptUrl(
270
+ item,
271
+ episodeTranscriptTypes
272
+ );
273
+
274
+ if (episodeTranscriptUrl) {
275
+ const episodeTranscriptFileExt = getUrlExt(episodeTranscriptUrl);
276
+ const episodeTranscriptArchiveKey = getArchiveKey({
277
+ prefix: archiveUrl,
278
+ name: getArchiveFilename({
279
+ pubDate,
280
+ name: title,
281
+ ext: episodeTranscriptFileExt,
282
+ }),
283
+ });
284
+
285
+ const episodeTranscriptName = getItemFilename({
286
+ item,
287
+ feed,
288
+ url: episodeAudioUrl,
289
+ ext: episodeTranscriptFileExt,
290
+ template: episodeTemplate,
291
+ width: episodeDigits,
292
+ offset: episodeNumOffset,
293
+ });
294
+
295
+ const outputTranscriptPath = path.resolve(
296
+ basePath,
297
+ episodeTranscriptName
298
+ );
299
+
300
+ item._extra_downloads.push({
301
+ url: episodeTranscriptUrl,
302
+ outputPath: outputTranscriptPath,
303
+ key: episodeTranscriptArchiveKey,
304
+ });
305
+ }
306
+ }
307
+
259
308
  items.push(item);
260
309
  }
261
310
 
@@ -474,12 +523,40 @@ const getImageUrl = ({ image, itunes }) => {
474
523
  return null;
475
524
  };
476
525
 
477
- const getFileFeed = async (filePath, parserConfig) => {
478
- const defaultConfig = {
479
- defaultRSS: 2.0,
480
- };
526
+ export const TRANSCRIPT_TYPES = {
527
+ "application/json": "application/json",
528
+ "application/srr": "application/srr",
529
+ "application/srt": "application/srt",
530
+ "application/x-subrip": "application/x-subrip",
531
+ "text/html": "text/html",
532
+ "text/plain": "text/plain",
533
+ "text/vtt": "text/vtt",
534
+ };
535
+
536
+ // @see https://github.com/Podcastindex-org/podcast-namespace/blob/main/docs/1.0.md#transcript
537
+ const getTranscriptUrl = (item, transcriptTypes = []) => {
538
+ if (!item.podcastTranscripts?.length) {
539
+ return null;
540
+ }
541
+
542
+ for (const transcriptType of transcriptTypes) {
543
+ const matchingTranscriptType = item.podcastTranscripts.find(
544
+ (transcript) =>
545
+ !!transcript?.["$"]?.url && transcript?.["$"]?.type === transcriptType
546
+ );
547
+
548
+ if (matchingTranscriptType) {
549
+ return matchingTranscriptType?.["$"]?.url;
550
+ }
551
+ }
552
+
553
+ return null;
554
+ };
481
555
 
482
- const config = parserConfig ? getJsonFile(parserConfig) : defaultConfig;
556
+ const getFileFeed = async (filePath, parserConfig) => {
557
+ const config = parserConfig
558
+ ? getJsonFile(parserConfig)
559
+ : defaultRssParserConfig;
483
560
  const rssString = getFileString(filePath);
484
561
 
485
562
  if (parserConfig && !config) {
@@ -499,11 +576,9 @@ const getFileFeed = async (filePath, parserConfig) => {
499
576
  };
500
577
 
501
578
  const getUrlFeed = async (url, parserConfig) => {
502
- const defaultConfig = {
503
- defaultRSS: 2.0,
504
- };
505
-
506
- const config = parserConfig ? getJsonFile(parserConfig) : defaultConfig;
579
+ const config = parserConfig
580
+ ? getJsonFile(parserConfig)
581
+ : defaultRssParserConfig;
507
582
 
508
583
  if (parserConfig && !config) {
509
584
  logErrorAndExit(`Unable to load parser config: ${parserConfig}`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "podcast-dl",
3
- "version": "9.4.0",
3
+ "version": "9.5.0",
4
4
  "description": "A CLI for downloading podcasts.",
5
5
  "type": "module",
6
6
  "bin": "./bin/bin.js",