podcast-dl 9.3.4 → 9.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,37 +24,40 @@ Either `--url` or `--file` must be provided.
24
24
 
25
25
  Type values surrounded in square brackets (`[]`) can be used as used as boolean options (no argument required).
26
26
 
27
- | Option | Type | Required | Description |
28
- | ------------------------ | ------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
29
- | --url | String | true\* | URL to podcast RSS feed. |
30
- | --file | String | true\* | Path to local RSS file. |
31
- | --out-dir | String | false | Specify output directory for episodes and metadata. Defaults to "./{{podcast_title}}". See "Template Options" for more details. |
32
- | --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is 1. |
33
- | --attempts | Number | false | Sets the number of download attempts per individual file. Default is 3. |
34
- | --archive | [String] | false | Download or write out items not listed in archive file. Generates archive file at path if not found. Defaults to "./{{podcast_title}}/archive.json" when used as a boolean option. See "Template Options" for more details. |
35
- | --episode-template | String | false | Template for generating episode related filenames. See "Template Options" for details. |
36
- | --include-meta | | false | Write out podcast metadata to JSON. |
37
- | --include-episode-meta | | false | Write out individual episode metadata to JSON. |
38
- | --include-episode-images | | false | Download found episode images. |
39
- | --offset | Number | false | Offset starting download position. Default is 0. |
40
- | --limit | Number | false | Max number of episodes to download. Downloads all by default. |
41
- | --after | String | false | Only download episodes after this date (i.e. MM/DD/YYY, inclusive). |
42
- | --before | String | false | Only download episodes before this date (i.e. MM/DD/YYY, inclusive) |
43
- | --episode-regex | String | false | Match episode title against provided regex before starting download. |
44
- | --episode-digits | Number | false | Minimum number of digits to use for episode numbering (e.g. 3 would generate "001" instead of "1"). Default is 0. |
45
- | --episode-source-order | String | false | Attempted order to extract episode audio URL from RSS feed. Default is "enclosure,link". |
46
- | --add-mp3-metadata | | false | Attempts to add a base level of episode metadata to each episode. Recommended only in cases where the original metadata is of poor quality. (**ffmpeg required**) |
47
- | --adjust-bitrate | String (e.g. "48k") | false | Attempts to adjust bitrate of episodes. (**ffmpeg required**) |
48
- | --mono | | false | Attempts to force episodes into mono. (**ffmpeg required**) |
49
- | --override | | false | Override local files on collision. |
50
- | --always-postprocess | | false | Always run additional tasks on the file regardless if the file already exists. This includes --add-mp3-metadata, --adjust-bitrate, --mono, and --exec. |
51
- | --reverse | | false | Reverse download direction and start at last RSS item. |
52
- | --info | | false | Print retrieved podcast info instead of downloading. |
53
- | --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported. |
54
- | --exec | String | false | Execute a command after each episode is downloaded. See "Template Options" for more details. |
55
- | --parser-config | String | false | Path to JSON file that will be parsed and used to override the default config passed to [rss-parser](https://github.com/rbren/rss-parser#xml-options). |
56
- | --proxy | | false | Enable proxy support. Specify environment variables listed by [global-agent](https://github.com/gajus/global-agent#environment-variables). |
57
- | --help | | false | Output usage information. |
27
+ | Option | Type | Required | Description |
28
+ | ----------------------------- | ------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
29
+ | --url | String | true\* | URL to podcast RSS feed. |
30
+ | --file | String | true\* | Path to local RSS file. |
31
+ | --out-dir | String | false | Specify output directory for episodes and metadata. Defaults to "./{{podcast_title}}". See "Template Options" for more details. |
32
+ | --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is 1. |
33
+ | --attempts | Number | false | Sets the number of download attempts per individual file. Default is 3. |
34
+ | --archive | [String] | false | Download or write out items not listed in archive file. Generates archive file at path if not found. Defaults to "./{{podcast_title}}/archive.json" when used as a boolean option. See "Template Options" for more details. |
35
+ | --episode-template | String | false | Template for generating episode related filenames. See "Template Options" for details. |
36
+ | --include-meta | | false | Write out podcast metadata to JSON. |
37
+ | --include-episode-meta | | false | Write out individual episode metadata **to** JSON. |
38
+ | --include-episode-images | | false | Download found episode images. |
39
+ | --include-episode-transcripts | | false | download found episode transcripts. |
40
+ | --offset | Number | false | Offset starting download position. Default is 0. |
41
+ | --limit | Number | false | Max number of episodes to download. Downloads all by default. |
42
+ | --after | String | false | Only download episodes after this date (i.e. MM/DD/YYY, inclusive). |
43
+ | --before | String | false | Only download episodes before this date (i.e. MM/DD/YYY, inclusive) |
44
+ | --episode-regex | String | false | Match episode title against provided regex before starting download. |
45
+ | --episode-digits | Number | false | Minimum number of digits to use for episode numbering (e.g. 3 would generate "001" instead of "1"). Default is 0. |
46
+ | --episode-num-offset | Number | false | Offset the acquired episode number. Default is 0. |
47
+ | --episode-source-order | String | false | Attempted order to extract episode audio URL from RSS feed. Default is "enclosure,link". |
48
+ | --episode-transcript-types | String | false | List of allowed transcript types in preferred order. Default is "application/json,application/x-subrip,application/srr,application/srt,text/vtt,text/html,text/plain". |
49
+ | --add-mp3-metadata | | false | Attempts to add a base level of episode metadata to each episode. Recommended only in cases where the original metadata is of poor quality. (**ffmpeg required**) |
50
+ | --adjust-bitrate | String (e.g. "48k") | false | Attempts to adjust bitrate of episodes. (**ffmpeg required**) |
51
+ | --mono | | false | Attempts to force episodes into mono. (**ffmpeg required**) |
52
+ | --override | | false | Override local files on collision. |
53
+ | --always-postprocess | | false | Always run additional tasks on the file regardless if the file already exists. This includes --add-mp3-metadata, --adjust-bitrate, --mono, and --exec. |
54
+ | --reverse | | false | Reverse download direction and start at last RSS item. |
55
+ | --info | | false | Print retrieved podcast info instead of downloading. |
56
+ | --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported. |
57
+ | --exec | String | false | Execute a command after each episode is downloaded. See "Template Options" for more details. |
58
+ | --parser-config | String | false | Path to JSON file that will be parsed and used to override the default config passed to [rss-parser](https://github.com/rbren/rss-parser#xml-options). |
59
+ | --proxy | | false | Enable proxy support. Specify environment variables listed by [global-agent](https://github.com/gajus/global-agent#environment-variables). |
60
+ | --help | | false | Output usage information. |
58
61
 
59
62
  ## Archive
60
63
 
package/bin/async.js CHANGED
@@ -165,6 +165,7 @@ const downloadItemsAsync = async ({
165
165
  bitrate,
166
166
  episodeTemplate,
167
167
  episodeDigits,
168
+ episodeNumOffset,
168
169
  episodeSourceOrder,
169
170
  exec,
170
171
  feed,
@@ -199,6 +200,7 @@ const downloadItemsAsync = async ({
199
200
  ext: audioFileExt,
200
201
  template: episodeTemplate,
201
202
  width: episodeDigits,
203
+ offset: episodeNumOffset,
202
204
  });
203
205
  const outputPodcastPath = _path.resolve(basePath, episodeFilename);
204
206
 
@@ -282,6 +284,7 @@ const downloadItemsAsync = async ({
282
284
  ext: episodeMetaExt,
283
285
  template: episodeTemplate,
284
286
  width: episodeDigits,
287
+ offset: episodeNumOffset,
285
288
  });
286
289
  const outputEpisodeMetaPath = _path.resolve(basePath, episodeMetaName);
287
290
 
package/bin/bin.js CHANGED
@@ -32,31 +32,34 @@ import { downloadItemsAsync } from "./async.js";
32
32
  setupCommander(commander, process.argv);
33
33
 
34
34
  const {
35
- file,
36
- url,
37
- outDir,
38
- episodeTemplate,
35
+ after,
36
+ alwaysPostprocess,
37
+ attempts,
38
+ before,
39
39
  episodeDigits,
40
+ episodeNumOffset,
41
+ episodeRegex,
40
42
  episodeSourceOrder,
41
- includeMeta,
42
- includeEpisodeMeta,
43
+ episodeTemplate,
44
+ episodeTranscriptTypes,
45
+ exec,
46
+ file,
43
47
  includeEpisodeImages,
44
- offset,
45
- limit,
46
- episodeRegex,
47
- after,
48
- before,
49
- override,
50
- alwaysPostprocess,
51
- reverse,
48
+ includeEpisodeMeta,
49
+ includeEpisodeTranscripts,
50
+ includeMeta,
52
51
  info,
52
+ limit,
53
53
  list,
54
- exec,
55
54
  mono,
56
- threads,
57
- attempts,
55
+ offset,
56
+ outDir,
57
+ override,
58
58
  parserConfig,
59
59
  proxy,
60
+ reverse,
61
+ threads,
62
+ url,
60
63
  addMp3Metadata: addMp3MetadataFlag,
61
64
  adjustBitrate: bitrate,
62
65
  } = commander;
@@ -206,10 +209,13 @@ const main = async () => {
206
209
  after,
207
210
  before,
208
211
  episodeDigits,
212
+ episodeNumOffset,
209
213
  episodeRegex,
210
214
  episodeSourceOrder,
211
215
  episodeTemplate,
212
216
  includeEpisodeImages,
217
+ includeEpisodeTranscripts,
218
+ episodeTranscriptTypes,
213
219
  });
214
220
 
215
221
  if (!targetItems.length) {
@@ -229,6 +235,7 @@ const main = async () => {
229
235
  bitrate,
230
236
  episodeTemplate,
231
237
  episodeDigits,
238
+ episodeNumOffset,
232
239
  episodeSourceOrder,
233
240
  exec,
234
241
  feed,
package/bin/commander.js CHANGED
@@ -1,4 +1,8 @@
1
- import { AUDIO_ORDER_TYPES, ITEM_LIST_FORMATS } from "./util.js";
1
+ import {
2
+ AUDIO_ORDER_TYPES,
3
+ ITEM_LIST_FORMATS,
4
+ TRANSCRIPT_TYPES,
5
+ } from "./util.js";
2
6
  import { createParseNumber, hasFfmpeg } from "./validate.js";
3
7
  import { logErrorAndExit } from "./logger.js";
4
8
 
@@ -26,6 +30,16 @@ export const setupCommander = (commander, argv) => {
26
30
  createParseNumber({ min: 0, name: "--episode-digits" }),
27
31
  1
28
32
  )
33
+ .option(
34
+ "--episode-num-offset <number>",
35
+ "offset the acquired episode number",
36
+ createParseNumber({
37
+ min: Number.MIN_SAFE_INTEGER,
38
+ max: Number.MAX_SAFE_INTEGER,
39
+ name: "--episode-num-offset",
40
+ }),
41
+ 0
42
+ )
29
43
  .option(
30
44
  "--episode-source-order <string>",
31
45
  "attempted order to extract episode audio URL from rss feed",
@@ -48,6 +62,35 @@ export const setupCommander = (commander, argv) => {
48
62
  "--include-episode-meta",
49
63
  "write out individual episode metadata to json"
50
64
  )
65
+ .option(
66
+ "--include-episode-transcripts",
67
+ "download found episode transcripts"
68
+ )
69
+ .option(
70
+ "--episode-transcript-types <string>",
71
+ "list of allowed transcript types in preferred order",
72
+ (value) => {
73
+ const parsed = value.split(",").map((type) => type.trim());
74
+ const isValid = parsed.every((type) => !!TRANSCRIPT_TYPES[type]);
75
+
76
+ if (!isValid) {
77
+ logErrorAndExit(
78
+ `Invalid type found in --transcript-types: ${value}\n`
79
+ );
80
+ }
81
+
82
+ return parsed;
83
+ },
84
+ [
85
+ TRANSCRIPT_TYPES["application/json"],
86
+ TRANSCRIPT_TYPES["application/x-subrip"],
87
+ TRANSCRIPT_TYPES["application/srr"],
88
+ TRANSCRIPT_TYPES["application/srt"],
89
+ TRANSCRIPT_TYPES["text/vtt"],
90
+ TRANSCRIPT_TYPES["text/html"],
91
+ TRANSCRIPT_TYPES["text/plain"],
92
+ ]
93
+ )
51
94
  .option("--include-episode-images", "download found episode images")
52
95
  .option(
53
96
  "--offset <number>",
package/bin/naming.js CHANGED
@@ -18,8 +18,16 @@ const getSimpleFilename = (name, ext = "") => {
18
18
  return `${getSafeName(name, MAX_LENGTH_FILENAME - (ext?.length ?? 0))}${ext}`;
19
19
  };
20
20
 
21
- const getItemFilename = ({ item, ext, url, feed, template, width }) => {
22
- const episodeNum = feed.items.length - item._originalIndex;
21
+ const getItemFilename = ({
22
+ item,
23
+ ext,
24
+ url,
25
+ feed,
26
+ template,
27
+ width,
28
+ offset = 0,
29
+ }) => {
30
+ const episodeNum = feed.items.length - item._originalIndex + offset;
23
31
  const formattedPubDate = item.pubDate
24
32
  ? dayjs(new Date(item.pubDate)).format("YYYYMMDD")
25
33
  : null;
package/bin/util.js CHANGED
@@ -11,11 +11,18 @@ import { getArchiveFilename, getItemFilename } from "./naming.js";
11
11
  const execWithPromise = util.promisify(exec);
12
12
  const isWin = process.platform === "win32";
13
13
 
14
+ const defaultRssParserConfig = {
15
+ defaultRSS: 2.0,
16
+ customFields: {
17
+ item: [["podcast:transcript", "podcastTranscripts", { keepArray: true }]],
18
+ },
19
+ };
20
+
14
21
  /*
15
22
  Escape arguments for a shell command used with exec.
16
23
  Borrowed from shell-escape: https://github.com/xxorax/node-shell-escape/
17
24
  Additionally, @see https://www.robvanderwoude.com/escapechars.php for why
18
- we avoid trying tp escape complex sequences in Windows.
25
+ we avoid trying to escape complex sequences in Windows.
19
26
  */
20
27
  const escapeArgForShell = (arg) => {
21
28
  let result = arg;
@@ -154,10 +161,13 @@ const getItemsToDownload = ({
154
161
  before,
155
162
  after,
156
163
  episodeDigits,
164
+ episodeNumOffset,
157
165
  episodeRegex,
158
166
  episodeSourceOrder,
159
167
  episodeTemplate,
160
168
  includeEpisodeImages,
169
+ includeEpisodeTranscripts,
170
+ episodeTranscriptTypes,
161
171
  }) => {
162
172
  const { startIndex, shouldGo, next } = getLoopControls({
163
173
  offset,
@@ -243,6 +253,7 @@ const getItemsToDownload = ({
243
253
  ext: episodeImageFileExt,
244
254
  template: episodeTemplate,
245
255
  width: episodeDigits,
256
+ offset: episodeNumOffset,
246
257
  });
247
258
 
248
259
  const outputImagePath = path.resolve(basePath, episodeImageName);
@@ -254,6 +265,46 @@ const getItemsToDownload = ({
254
265
  }
255
266
  }
256
267
 
268
+ if (includeEpisodeTranscripts) {
269
+ const episodeTranscriptUrl = getTranscriptUrl(
270
+ item,
271
+ episodeTranscriptTypes
272
+ );
273
+
274
+ if (episodeTranscriptUrl) {
275
+ const episodeTranscriptFileExt = getUrlExt(episodeTranscriptUrl);
276
+ const episodeTranscriptArchiveKey = getArchiveKey({
277
+ prefix: archiveUrl,
278
+ name: getArchiveFilename({
279
+ pubDate,
280
+ name: title,
281
+ ext: episodeTranscriptFileExt,
282
+ }),
283
+ });
284
+
285
+ const episodeTranscriptName = getItemFilename({
286
+ item,
287
+ feed,
288
+ url: episodeAudioUrl,
289
+ ext: episodeTranscriptFileExt,
290
+ template: episodeTemplate,
291
+ width: episodeDigits,
292
+ offset: episodeNumOffset,
293
+ });
294
+
295
+ const outputTranscriptPath = path.resolve(
296
+ basePath,
297
+ episodeTranscriptName
298
+ );
299
+
300
+ item._extra_downloads.push({
301
+ url: episodeTranscriptUrl,
302
+ outputPath: outputTranscriptPath,
303
+ key: episodeTranscriptArchiveKey,
304
+ });
305
+ }
306
+ }
307
+
257
308
  items.push(item);
258
309
  }
259
310
 
@@ -472,12 +523,40 @@ const getImageUrl = ({ image, itunes }) => {
472
523
  return null;
473
524
  };
474
525
 
475
- const getFileFeed = async (filePath, parserConfig) => {
476
- const defaultConfig = {
477
- defaultRSS: 2.0,
478
- };
526
+ export const TRANSCRIPT_TYPES = {
527
+ "application/json": "application/json",
528
+ "application/srr": "application/srr",
529
+ "application/srt": "application/srt",
530
+ "application/x-subrip": "application/x-subrip",
531
+ "text/html": "text/html",
532
+ "text/plain": "text/plain",
533
+ "text/vtt": "text/vtt",
534
+ };
535
+
536
+ // @see https://github.com/Podcastindex-org/podcast-namespace/blob/main/docs/1.0.md#transcript
537
+ const getTranscriptUrl = (item, transcriptTypes = []) => {
538
+ if (!item.podcastTranscripts?.length) {
539
+ return null;
540
+ }
541
+
542
+ for (const transcriptType of transcriptTypes) {
543
+ const matchingTranscriptType = item.podcastTranscripts.find(
544
+ (transcript) =>
545
+ !!transcript?.["$"]?.url && transcript?.["$"]?.type === transcriptType
546
+ );
547
+
548
+ if (matchingTranscriptType) {
549
+ return matchingTranscriptType?.["$"]?.url;
550
+ }
551
+ }
552
+
553
+ return null;
554
+ };
479
555
 
480
- const config = parserConfig ? getJsonFile(parserConfig) : defaultConfig;
556
+ const getFileFeed = async (filePath, parserConfig) => {
557
+ const config = parserConfig
558
+ ? getJsonFile(parserConfig)
559
+ : defaultRssParserConfig;
481
560
  const rssString = getFileString(filePath);
482
561
 
483
562
  if (parserConfig && !config) {
@@ -497,11 +576,9 @@ const getFileFeed = async (filePath, parserConfig) => {
497
576
  };
498
577
 
499
578
  const getUrlFeed = async (url, parserConfig) => {
500
- const defaultConfig = {
501
- defaultRSS: 2.0,
502
- };
503
-
504
- const config = parserConfig ? getJsonFile(parserConfig) : defaultConfig;
579
+ const config = parserConfig
580
+ ? getJsonFile(parserConfig)
581
+ : defaultRssParserConfig;
505
582
 
506
583
  if (parserConfig && !config) {
507
584
  logErrorAndExit(`Unable to load parser config: ${parserConfig}`);
package/bin/validate.js CHANGED
@@ -10,7 +10,6 @@ const createParseNumber = ({ min, max, name, required = true }) => {
10
10
 
11
11
  try {
12
12
  let number = parseInt(value);
13
-
14
13
  if (isNaN(number)) {
15
14
  logErrorAndExit(`${name} must be a number`);
16
15
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "podcast-dl",
3
- "version": "9.3.4",
3
+ "version": "9.5.0",
4
4
  "description": "A CLI for downloading podcasts.",
5
5
  "type": "module",
6
6
  "bin": "./bin/bin.js",