podcast-dl 9.4.0 → 10.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,38 +24,41 @@ Either `--url` or `--file` must be provided.
24
24
 
25
25
  Type values surrounded in square brackets (`[]`) can be used as used as boolean options (no argument required).
26
26
 
27
- | Option | Type | Required | Description |
28
- | ------------------------ | ------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
29
- | --url | String | true\* | URL to podcast RSS feed. |
30
- | --file | String | true\* | Path to local RSS file. |
31
- | --out-dir | String | false | Specify output directory for episodes and metadata. Defaults to "./{{podcast_title}}". See "Template Options" for more details. |
32
- | --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is 1. |
33
- | --attempts | Number | false | Sets the number of download attempts per individual file. Default is 3. |
34
- | --archive | [String] | false | Download or write out items not listed in archive file. Generates archive file at path if not found. Defaults to "./{{podcast_title}}/archive.json" when used as a boolean option. See "Template Options" for more details. |
35
- | --episode-template | String | false | Template for generating episode related filenames. See "Template Options" for details. |
36
- | --include-meta | | false | Write out podcast metadata to JSON. |
37
- | --include-episode-meta | | false | Write out individual episode metadata to JSON. |
38
- | --include-episode-images | | false | Download found episode images. |
39
- | --offset | Number | false | Offset starting download position. Default is 0. |
40
- | --limit | Number | false | Max number of episodes to download. Downloads all by default. |
41
- | --after | String | false | Only download episodes after this date (i.e. MM/DD/YYY, inclusive). |
42
- | --before | String | false | Only download episodes before this date (i.e. MM/DD/YYY, inclusive) |
43
- | --episode-regex | String | false | Match episode title against provided regex before starting download. |
44
- | --episode-digits | Number | false | Minimum number of digits to use for episode numbering (e.g. 3 would generate "001" instead of "1"). Default is 0. |
45
- | --episode-num-offset | Number | false | Offset the acquired episode number. Default is 0. |
46
- | --episode-source-order | String | false | Attempted order to extract episode audio URL from RSS feed. Default is "enclosure,link". |
47
- | --add-mp3-metadata | | false | Attempts to add a base level of episode metadata to each episode. Recommended only in cases where the original metadata is of poor quality. (**ffmpeg required**) |
48
- | --adjust-bitrate | String (e.g. "48k") | false | Attempts to adjust bitrate of episodes. (**ffmpeg required**) |
49
- | --mono | | false | Attempts to force episodes into mono. (**ffmpeg required**) |
50
- | --override | | false | Override local files on collision. |
51
- | --always-postprocess | | false | Always run additional tasks on the file regardless if the file already exists. This includes --add-mp3-metadata, --adjust-bitrate, --mono, and --exec. |
52
- | --reverse | | false | Reverse download direction and start at last RSS item. |
53
- | --info | | false | Print retrieved podcast info instead of downloading. |
54
- | --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported. |
55
- | --exec | String | false | Execute a command after each episode is downloaded. See "Template Options" for more details. |
56
- | --parser-config | String | false | Path to JSON file that will be parsed and used to override the default config passed to [rss-parser](https://github.com/rbren/rss-parser#xml-options). |
57
- | --proxy | | false | Enable proxy support. Specify environment variables listed by [global-agent](https://github.com/gajus/global-agent#environment-variables). |
58
- | --help | | false | Output usage information. |
27
+ | Option | Type | Required | Description |
28
+ | --------------------------------- | ------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
29
+ | --url | String | true\* | URL to podcast RSS feed. |
30
+ | --file | String | true\* | Path to local RSS file. |
31
+ | --out-dir | String | false | Specify output directory for episodes and metadata. Defaults to "./{{podcast_title}}". See "Template Options" for more details. |
32
+ | --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is 1. |
33
+ | --attempts | Number | false | Sets the number of download attempts per individual file. Default is 3. |
34
+ | --archive | [String] | false | Download or write out items not listed in archive file. Generates archive file at path if not found. Defaults to "./{{podcast_title}}/archive.json" when used as a boolean option. See "Template Options" for more details. |
35
+ | --episode-template | String | false | Template for generating episode related filenames. See "Template Options" for details. |
36
+ | --episode-custom-template-options | <String...> | false | Provide custom options for the episode template. See "Template Options" for details. |
37
+ | --include-meta | | false | Write out podcast metadata to JSON. |
38
+ | --include-episode-meta | | false | Write out individual episode metadata **to** JSON. |
39
+ | --include-episode-images | | false | Download found episode images. |
40
+ | --include-episode-transcripts | | false | Download found episode transcripts. |
41
+ | --offset | Number | false | Offset starting download position. Default is 0. |
42
+ | --limit | Number | false | Max number of episodes to download. Downloads all by default. |
43
+ | --after | String | false | Only download episodes after this date (i.e. MM/DD/YYY, inclusive). |
44
+ | --before | String | false | Only download episodes before this date (i.e. MM/DD/YYY, inclusive) |
45
+ | --episode-regex | String | false | Match episode title against provided regex before starting download. |
46
+ | --episode-digits | Number | false | Minimum number of digits to use for episode numbering (e.g. 3 would generate "001" instead of "1"). Default is 0. |
47
+ | --episode-num-offset | Number | false | Offset the acquired episode number. Default is 0. |
48
+ | --episode-source-order | String | false | Attempted order to extract episode audio URL from RSS feed. Default is "enclosure,link". |
49
+ | --episode-transcript-types | String | false | List of allowed transcript types in preferred order. Default is "application/json,application/x-subrip,application/srr,application/srt,text/vtt,text/html,text/plain". |
50
+ | --add-mp3-metadata | | false | Attempts to add a base level of episode metadata to each episode. Recommended only in cases where the original metadata is of poor quality. (**ffmpeg required**) |
51
+ | --adjust-bitrate | String (e.g. "48k") | false | Attempts to adjust bitrate of episodes. (**ffmpeg required**) |
52
+ | --mono | | false | Attempts to force episodes into mono. (**ffmpeg required**) |
53
+ | --override | | false | Override local files on collision. |
54
+ | --always-postprocess | | false | Always run additional tasks on the file regardless if the file already exists. This includes --add-mp3-metadata, --adjust-bitrate, --mono, and --exec. |
55
+ | --reverse | | false | Reverse download direction and start at last RSS item. |
56
+ | --info | | false | Print retrieved podcast info instead of downloading. |
57
+ | --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported. |
58
+ | --exec | String | false | Execute a command after each episode is downloaded. See "Template Options" for more details. |
59
+ | --parser-config | String | false | Path to JSON file that will be parsed and used to override the default config passed to [rss-parser](https://github.com/rbren/rss-parser#xml-options). |
60
+ | --proxy | | false | Enable proxy support. Specify environment variables listed by [global-agent](https://github.com/gajus/global-agent#environment-variables). |
61
+ | --help | | false | Output usage information. |
59
62
 
60
63
  ## Archive
61
64
 
@@ -86,6 +89,12 @@ Options that support templates allow users to specify a template for the generat
86
89
  - `podcast_link`: `link` value provided for the podcast feed. Typically the homepage URL.
87
90
  - `guid`: The GUID of the episode.
88
91
 
92
+ #### `--episode-custom-template-options`
93
+
94
+ Each matcher provided will be used to extract a value from the episode `title`. Access these values in the template using the `custom_<n>` keyword where `<n>` is the index of the matcher provided (starting from `0`).
95
+
96
+ If no match is found, the `custom_<n>` keyword will be replaced with an empty string.
97
+
89
98
  ### `--exec`
90
99
 
91
100
  - `episode_path`: The path to the downloaded episode.
package/bin/async.js CHANGED
@@ -70,7 +70,7 @@ const download = async (options) => {
70
70
  },
71
71
  });
72
72
  } catch (error) {
73
- // unable to retrive head response
73
+ // unable to retrieve head response
74
74
  }
75
75
 
76
76
  const tempOutputPath = getTempPath(outputPath);
@@ -164,6 +164,7 @@ const downloadItemsAsync = async ({
164
164
  basePath,
165
165
  bitrate,
166
166
  episodeTemplate,
167
+ episodeCustomTemplateOptions,
167
168
  episodeDigits,
168
169
  episodeNumOffset,
169
170
  episodeSourceOrder,
@@ -199,6 +200,7 @@ const downloadItemsAsync = async ({
199
200
  url: episodeAudioUrl,
200
201
  ext: audioFileExt,
201
202
  template: episodeTemplate,
203
+ customTemplateOptions: episodeCustomTemplateOptions,
202
204
  width: episodeDigits,
203
205
  offset: episodeNumOffset,
204
206
  });
@@ -283,6 +285,7 @@ const downloadItemsAsync = async ({
283
285
  url: episodeAudioUrl,
284
286
  ext: episodeMetaExt,
285
287
  template: episodeTemplate,
288
+ customTemplateOptions: episodeCustomTemplateOptions,
286
289
  width: episodeDigits,
287
290
  offset: episodeNumOffset,
288
291
  });
package/bin/bin.js CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  import fs from "fs";
4
4
  import _path from "path";
5
- import commander from "commander";
5
+ import { program } from "commander";
6
6
  import pluralize from "pluralize";
7
7
  import { bootstrap as bootstrapProxy } from "global-agent";
8
8
 
@@ -29,7 +29,7 @@ import {
29
29
  import { getFolderName, getSimpleFilename } from "./naming.js";
30
30
  import { downloadItemsAsync } from "./async.js";
31
31
 
32
- setupCommander(commander, process.argv);
32
+ const opts = setupCommander(program);
33
33
 
34
34
  const {
35
35
  after,
@@ -41,10 +41,13 @@ const {
41
41
  episodeRegex,
42
42
  episodeSourceOrder,
43
43
  episodeTemplate,
44
+ episodeCustomTemplateOptions,
45
+ episodeTranscriptTypes,
44
46
  exec,
45
47
  file,
46
48
  includeEpisodeImages,
47
49
  includeEpisodeMeta,
50
+ includeEpisodeTranscripts,
48
51
  includeMeta,
49
52
  info,
50
53
  limit,
@@ -60,9 +63,9 @@ const {
60
63
  url,
61
64
  addMp3Metadata: addMp3MetadataFlag,
62
65
  adjustBitrate: bitrate,
63
- } = commander;
66
+ } = opts;
64
67
 
65
- let { archive } = commander;
68
+ let { archive } = opts;
66
69
 
67
70
  const main = async () => {
68
71
  if (!url && !file) {
@@ -211,7 +214,10 @@ const main = async () => {
211
214
  episodeRegex,
212
215
  episodeSourceOrder,
213
216
  episodeTemplate,
217
+ episodeCustomTemplateOptions,
214
218
  includeEpisodeImages,
219
+ includeEpisodeTranscripts,
220
+ episodeTranscriptTypes,
215
221
  });
216
222
 
217
223
  if (!targetItems.length) {
@@ -230,6 +236,7 @@ const main = async () => {
230
236
  basePath,
231
237
  bitrate,
232
238
  episodeTemplate,
239
+ episodeCustomTemplateOptions,
233
240
  episodeDigits,
234
241
  episodeNumOffset,
235
242
  episodeSourceOrder,
package/bin/commander.js CHANGED
@@ -1,9 +1,13 @@
1
- import { AUDIO_ORDER_TYPES, ITEM_LIST_FORMATS } from "./util.js";
1
+ import {
2
+ AUDIO_ORDER_TYPES,
3
+ ITEM_LIST_FORMATS,
4
+ TRANSCRIPT_TYPES,
5
+ } from "./util.js";
2
6
  import { createParseNumber, hasFfmpeg } from "./validate.js";
3
7
  import { logErrorAndExit } from "./logger.js";
4
8
 
5
- export const setupCommander = (commander, argv) => {
6
- commander
9
+ export const setupCommander = (program) => {
10
+ program
7
11
  .option("--url <string>", "url to podcast rss feed")
8
12
  .option("--file <path>", "local path to podcast rss feed")
9
13
  .option(
@@ -20,6 +24,10 @@ export const setupCommander = (commander, argv) => {
20
24
  "template for generating episode related filenames",
21
25
  "{{release_date}}-{{title}}"
22
26
  )
27
+ .option(
28
+ "--episode-custom-template-options <patterns...>",
29
+ "create custom options for the episode template"
30
+ )
23
31
  .option(
24
32
  "--episode-digits <number>",
25
33
  "minimum number of digits to use for episode numbering (leading zeros)",
@@ -58,6 +66,35 @@ export const setupCommander = (commander, argv) => {
58
66
  "--include-episode-meta",
59
67
  "write out individual episode metadata to json"
60
68
  )
69
+ .option(
70
+ "--include-episode-transcripts",
71
+ "download found episode transcripts"
72
+ )
73
+ .option(
74
+ "--episode-transcript-types <string>",
75
+ "list of allowed transcript types in preferred order",
76
+ (value) => {
77
+ const parsed = value.split(",").map((type) => type.trim());
78
+ const isValid = parsed.every((type) => !!TRANSCRIPT_TYPES[type]);
79
+
80
+ if (!isValid) {
81
+ logErrorAndExit(
82
+ `Invalid type found in --transcript-types: ${value}\n`
83
+ );
84
+ }
85
+
86
+ return parsed;
87
+ },
88
+ [
89
+ TRANSCRIPT_TYPES["application/json"],
90
+ TRANSCRIPT_TYPES["application/x-subrip"],
91
+ TRANSCRIPT_TYPES["application/srr"],
92
+ TRANSCRIPT_TYPES["application/srt"],
93
+ TRANSCRIPT_TYPES["text/vtt"],
94
+ TRANSCRIPT_TYPES["text/html"],
95
+ TRANSCRIPT_TYPES["text/plain"],
96
+ ]
97
+ )
61
98
  .option("--include-episode-images", "download found episode images")
62
99
  .option(
63
100
  "--offset <number>",
@@ -147,6 +184,9 @@ export const setupCommander = (commander, argv) => {
147
184
  "--parser-config <string>",
148
185
  "path to JSON config to override RSS parser"
149
186
  )
150
- .option("--proxy", "enable proxy support via global-agent")
151
- .parse(argv);
187
+ .option("--proxy", "enable proxy support via global-agent");
188
+
189
+ program.parse();
190
+
191
+ return program.opts();
152
192
  };
package/bin/naming.js CHANGED
@@ -25,15 +25,24 @@ const getItemFilename = ({
25
25
  feed,
26
26
  template,
27
27
  width,
28
+ customTemplateOptions = [],
28
29
  offset = 0,
29
30
  }) => {
30
31
  const episodeNum = feed.items.length - item._originalIndex + offset;
32
+ const title = item.title || "";
31
33
  const formattedPubDate = item.pubDate
32
34
  ? dayjs(new Date(item.pubDate)).format("YYYYMMDD")
33
35
  : null;
34
36
 
37
+ const customReplacementTuples = customTemplateOptions.map((option, i) => {
38
+ const matchRegex = new RegExp(option);
39
+ const match = title.match(matchRegex);
40
+
41
+ return match && match[0] ? [`custom_${i}`, match[0]] : [`custom_${i}`, ""];
42
+ });
43
+
35
44
  const templateReplacementsTuples = [
36
- ["title", item.title || ""],
45
+ ["title", title],
37
46
  ["release_date", formattedPubDate || ""],
38
47
  ["episode_num", `${episodeNum}`.padStart(width, "0")],
39
48
  ["url", url],
@@ -41,6 +50,7 @@ const getItemFilename = ({
41
50
  ["podcast_link", feed.link || ""],
42
51
  ["duration", item.itunes?.duration || ""],
43
52
  ["guid", item.guid],
53
+ ...customReplacementTuples,
44
54
  ];
45
55
 
46
56
  const templateSegments = template.trim().split(path.sep);
package/bin/util.js CHANGED
@@ -11,6 +11,13 @@ import { getArchiveFilename, getItemFilename } from "./naming.js";
11
11
  const execWithPromise = util.promisify(exec);
12
12
  const isWin = process.platform === "win32";
13
13
 
14
+ const defaultRssParserConfig = {
15
+ defaultRSS: 2.0,
16
+ customFields: {
17
+ item: [["podcast:transcript", "podcastTranscripts", { keepArray: true }]],
18
+ },
19
+ };
20
+
14
21
  /*
15
22
  Escape arguments for a shell command used with exec.
16
23
  Borrowed from shell-escape: https://github.com/xxorax/node-shell-escape/
@@ -158,7 +165,10 @@ const getItemsToDownload = ({
158
165
  episodeRegex,
159
166
  episodeSourceOrder,
160
167
  episodeTemplate,
168
+ episodeCustomTemplateOptions,
161
169
  includeEpisodeImages,
170
+ includeEpisodeTranscripts,
171
+ episodeTranscriptTypes,
162
172
  }) => {
163
173
  const { startIndex, shouldGo, next } = getLoopControls({
164
174
  offset,
@@ -243,6 +253,7 @@ const getItemsToDownload = ({
243
253
  url: episodeAudioUrl,
244
254
  ext: episodeImageFileExt,
245
255
  template: episodeTemplate,
256
+ customTemplateOptions: episodeCustomTemplateOptions,
246
257
  width: episodeDigits,
247
258
  offset: episodeNumOffset,
248
259
  });
@@ -256,6 +267,46 @@ const getItemsToDownload = ({
256
267
  }
257
268
  }
258
269
 
270
+ if (includeEpisodeTranscripts) {
271
+ const episodeTranscriptUrl = getTranscriptUrl(
272
+ item,
273
+ episodeTranscriptTypes
274
+ );
275
+
276
+ if (episodeTranscriptUrl) {
277
+ const episodeTranscriptFileExt = getUrlExt(episodeTranscriptUrl);
278
+ const episodeTranscriptArchiveKey = getArchiveKey({
279
+ prefix: archiveUrl,
280
+ name: getArchiveFilename({
281
+ pubDate,
282
+ name: title,
283
+ ext: episodeTranscriptFileExt,
284
+ }),
285
+ });
286
+
287
+ const episodeTranscriptName = getItemFilename({
288
+ item,
289
+ feed,
290
+ url: episodeAudioUrl,
291
+ ext: episodeTranscriptFileExt,
292
+ template: episodeTemplate,
293
+ width: episodeDigits,
294
+ offset: episodeNumOffset,
295
+ });
296
+
297
+ const outputTranscriptPath = path.resolve(
298
+ basePath,
299
+ episodeTranscriptName
300
+ );
301
+
302
+ item._extra_downloads.push({
303
+ url: episodeTranscriptUrl,
304
+ outputPath: outputTranscriptPath,
305
+ key: episodeTranscriptArchiveKey,
306
+ });
307
+ }
308
+ }
309
+
259
310
  items.push(item);
260
311
  }
261
312
 
@@ -474,12 +525,40 @@ const getImageUrl = ({ image, itunes }) => {
474
525
  return null;
475
526
  };
476
527
 
477
- const getFileFeed = async (filePath, parserConfig) => {
478
- const defaultConfig = {
479
- defaultRSS: 2.0,
480
- };
528
+ export const TRANSCRIPT_TYPES = {
529
+ "application/json": "application/json",
530
+ "application/srr": "application/srr",
531
+ "application/srt": "application/srt",
532
+ "application/x-subrip": "application/x-subrip",
533
+ "text/html": "text/html",
534
+ "text/plain": "text/plain",
535
+ "text/vtt": "text/vtt",
536
+ };
537
+
538
+ // @see https://github.com/Podcastindex-org/podcast-namespace/blob/main/docs/1.0.md#transcript
539
+ const getTranscriptUrl = (item, transcriptTypes = []) => {
540
+ if (!item.podcastTranscripts?.length) {
541
+ return null;
542
+ }
543
+
544
+ for (const transcriptType of transcriptTypes) {
545
+ const matchingTranscriptType = item.podcastTranscripts.find(
546
+ (transcript) =>
547
+ !!transcript?.["$"]?.url && transcript?.["$"]?.type === transcriptType
548
+ );
549
+
550
+ if (matchingTranscriptType) {
551
+ return matchingTranscriptType?.["$"]?.url;
552
+ }
553
+ }
554
+
555
+ return null;
556
+ };
481
557
 
482
- const config = parserConfig ? getJsonFile(parserConfig) : defaultConfig;
558
+ const getFileFeed = async (filePath, parserConfig) => {
559
+ const config = parserConfig
560
+ ? getJsonFile(parserConfig)
561
+ : defaultRssParserConfig;
483
562
  const rssString = getFileString(filePath);
484
563
 
485
564
  if (parserConfig && !config) {
@@ -499,11 +578,9 @@ const getFileFeed = async (filePath, parserConfig) => {
499
578
  };
500
579
 
501
580
  const getUrlFeed = async (url, parserConfig) => {
502
- const defaultConfig = {
503
- defaultRSS: 2.0,
504
- };
505
-
506
- const config = parserConfig ? getJsonFile(parserConfig) : defaultConfig;
581
+ const config = parserConfig
582
+ ? getJsonFile(parserConfig)
583
+ : defaultRssParserConfig;
507
584
 
508
585
  if (parserConfig && !config) {
509
586
  logErrorAndExit(`Unable to load parser config: ${parserConfig}`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "podcast-dl",
3
- "version": "9.4.0",
3
+ "version": "10.0.0",
4
4
  "description": "A CLI for downloading podcasts.",
5
5
  "type": "module",
6
6
  "bin": "./bin/bin.js",
@@ -26,7 +26,7 @@
26
26
  "cli"
27
27
  ],
28
28
  "engines": {
29
- "node": ">=14.17.6"
29
+ "node": ">=18.17.0"
30
30
  },
31
31
  "repository": {
32
32
  "type": "git",
@@ -49,7 +49,7 @@
49
49
  },
50
50
  "dependencies": {
51
51
  "command-exists": "^1.2.9",
52
- "commander": "^5.1.0",
52
+ "commander": "^12.1.0",
53
53
  "dayjs": "^1.8.25",
54
54
  "filenamify": "^6.0.0",
55
55
  "global-agent": "^3.0.0",