podcast-dl 11.2.0 → 11.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +66 -41
- package/bin/async.js +8 -3
- package/bin/bin.js +3 -0
- package/bin/commander.js +3 -2
- package/bin/naming.js +45 -21
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -22,45 +22,46 @@ A humble CLI for downloading and archiving podcasts.
|
|
|
22
22
|
|
|
23
23
|
Either `--url` or `--file` must be provided.
|
|
24
24
|
|
|
25
|
-
Type values surrounded in square brackets (`[]`) can be used as
|
|
26
|
-
|
|
27
|
-
| Option | Type | Required | Description
|
|
28
|
-
| --------------------------------- | ------------------- | -------- |
|
|
29
|
-
| --url | String | true\* | URL to podcast RSS feed.
|
|
30
|
-
| --file | String | true\* | Path to local RSS file.
|
|
31
|
-
| --out-dir | String | false | Specify output directory for episodes and metadata. Defaults to "./{{podcast_title}}"
|
|
32
|
-
| --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is 1
|
|
33
|
-
| --attempts | Number | false | Sets the number of download attempts per individual file. Default is 3
|
|
34
|
-
| --archive | [String] | false | Download or write out items not listed in archive file. Generates archive file at path if not found. Defaults to "./{{podcast_title}}/archive.json" when used as a boolean option. See "Template Options" for more details. |
|
|
35
|
-
| --episode-template | String | false | Template for generating episode related filenames. See "Template Options" for details.
|
|
36
|
-
| --episode-custom-template-options | <String...> | false | Provide custom options for the episode template. See "Template Options" for details.
|
|
37
|
-
| --include-meta | | false | Write out podcast metadata to JSON.
|
|
38
|
-
| --include-episode-meta | | false | Write out individual episode metadata
|
|
39
|
-
| --include-episode-images | | false | Download found episode images.
|
|
40
|
-
| --include-episode-transcripts | | false | Download found episode transcripts.
|
|
41
|
-
| --offset | Number | false | Offset starting download position. Default is 0
|
|
42
|
-
| --limit | Number | false | Max number of episodes to download. Downloads all by default.
|
|
43
|
-
| --after | String | false | Only download episodes after this date (i.e. MM/DD/
|
|
44
|
-
| --before | String | false | Only download episodes before this date (i.e. MM/DD/
|
|
45
|
-
| --episode-regex | String | false | Match episode title against provided regex before starting download.
|
|
46
|
-
| --episode-regex-exclude | String | false |
|
|
47
|
-
| --episode-digits | Number | false | Minimum number of digits to use for episode numbering (e.g. 3 would generate "001" instead of "1"). Default is
|
|
48
|
-
| --episode-num-offset | Number | false | Offset the acquired episode number. Default is 0
|
|
49
|
-
| --episode-source-order | String | false | Attempted order to extract episode audio URL from RSS feed. Default is "enclosure,link"
|
|
50
|
-
| --episode-transcript-types | String | false | List of allowed transcript types in preferred order. Default is "application/json,application/x-subrip,application/srr,application/srt,text/vtt,text/html,text/plain".
|
|
51
|
-
| --season | Number | false | Only download episodes from
|
|
52
|
-
| --add-mp3-metadata | | false | Attempts to add a base level of episode metadata to each episode. Recommended only in cases where the original metadata is of poor quality. (**ffmpeg required**)
|
|
53
|
-
| --adjust-bitrate | String (e.g. "48k") | false | Attempts to adjust bitrate of episodes. (**ffmpeg required**)
|
|
54
|
-
| --mono | | false | Attempts to force episodes into mono. (**ffmpeg required**)
|
|
55
|
-
| --override | | false | Override local files on collision.
|
|
56
|
-
| --always-postprocess | | false | Always run additional tasks on the file regardless if the file already exists. This includes
|
|
57
|
-
| --reverse | | false | Reverse download direction and start at last RSS item.
|
|
58
|
-
| --info | | false | Print retrieved podcast info instead of downloading.
|
|
59
|
-
| --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported.
|
|
60
|
-
| --exec | String | false | Execute a command after each episode is downloaded. See "Template Options" for more details.
|
|
61
|
-
| --parser-config | String | false | Path to JSON file that will be parsed and used to override the default config passed to [rss-parser](https://github.com/rbren/rss-parser#xml-options).
|
|
62
|
-
| --
|
|
63
|
-
| --
|
|
25
|
+
Type values surrounded in square brackets (`[]`) can be used as boolean options (no argument required).
|
|
26
|
+
|
|
27
|
+
| Option | Type | Required | Description |
|
|
28
|
+
| --------------------------------- | ------------------- | -------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
|
29
|
+
| --url | String | true\* | URL to podcast RSS feed. |
|
|
30
|
+
| --file | String | true\* | Path to local RSS file. |
|
|
31
|
+
| --out-dir | String | false | Specify output directory for episodes and metadata. Defaults to `"./{{podcast_title}}"`. See "Template Options" for more details. |
|
|
32
|
+
| --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is `1`. |
|
|
33
|
+
| --attempts | Number | false | Sets the number of download attempts per individual file. Default is `3`. |
|
|
34
|
+
| --archive | [String] | false | Download or write out items not listed in archive file. Generates archive file at path if not found. Defaults to `"./{{podcast_title}}/archive.json"` when used as a boolean option. See "Template Options" for more details. |
|
|
35
|
+
| --episode-template | String | false | Template for generating episode related filenames. See "Template Options" for details. |
|
|
36
|
+
| --episode-custom-template-options | <String...> | false | Provide custom options for the episode template. See "Template Options" for details. |
|
|
37
|
+
| --include-meta | | false | Write out podcast metadata to JSON. |
|
|
38
|
+
| --include-episode-meta | | false | Write out individual episode metadata to JSON. |
|
|
39
|
+
| --include-episode-images | | false | Download found episode images. |
|
|
40
|
+
| --include-episode-transcripts | | false | Download found episode transcripts. |
|
|
41
|
+
| --offset | Number | false | Offset starting download position. Default is `0`. |
|
|
42
|
+
| --limit | Number | false | Max number of episodes to download. Downloads all by default. |
|
|
43
|
+
| --after | String | false | Only download episodes after this date (i.e. MM/DD/YYYY, inclusive). |
|
|
44
|
+
| --before | String | false | Only download episodes before this date (i.e. MM/DD/YYYY, inclusive). |
|
|
45
|
+
| --episode-regex | String | false | Match episode title against provided regex before starting download. |
|
|
46
|
+
| --episode-regex-exclude | String | false | Episode titles matching provided regex will be excluded. |
|
|
47
|
+
| --episode-digits | Number | false | Minimum number of digits to use for episode numbering (e.g. 3 would generate "001" instead of "1"). Default is `1`. |
|
|
48
|
+
| --episode-num-offset | Number | false | Offset the acquired episode number. Default is `0`. |
|
|
49
|
+
| --episode-source-order | String | false | Attempted order to extract episode audio URL from RSS feed. Default is `"enclosure,link"`. |
|
|
50
|
+
| --episode-transcript-types | String | false | List of allowed transcript types in preferred order. Default is "application/json,application/x-subrip,application/srr,application/srt,text/vtt,text/html,text/plain". |
|
|
51
|
+
| --season | Number | false | Only download episodes from specified season. Note: this will only work if the RSS feed includes the `itunes:season` tag on episodes. |
|
|
52
|
+
| --add-mp3-metadata | | false | Attempts to add a base level of episode metadata to each episode. Recommended only in cases where the original metadata is of poor quality. (**ffmpeg required**) |
|
|
53
|
+
| --adjust-bitrate | String (e.g. "48k") | false | Attempts to adjust bitrate of episodes. (**ffmpeg required**) |
|
|
54
|
+
| --mono | | false | Attempts to force episodes into mono. (**ffmpeg required**) |
|
|
55
|
+
| --override | | false | Override local files on collision. |
|
|
56
|
+
| --always-postprocess | | false | Always run additional tasks on the file regardless if the file already exists. This includes `--add-mp3-metadata`, `--adjust-bitrate`, `--mono`, and `--exec`. |
|
|
57
|
+
| --reverse | | false | Reverse download direction and start at last RSS item. |
|
|
58
|
+
| --info | | false | Print retrieved podcast info instead of downloading. |
|
|
59
|
+
| --list | [String] | false | Print episode list instead of downloading. Defaults to `"table"` when used as a boolean option. `"json"` is also supported. |
|
|
60
|
+
| --exec | String | false | Execute a command after each episode is downloaded. See "Template Options" for more details. |
|
|
61
|
+
| --parser-config | String | false | Path to JSON file that will be parsed and used to override the default config passed to [rss-parser](https://github.com/rbren/rss-parser#xml-options). |
|
|
62
|
+
| --user-agent | String | false | Specify custom user agent string for HTTP requests. Defaults to a Chrome user agent if not specified. |
|
|
63
|
+
| --proxy | | false | Enable proxy support. Specify environment variables listed by [global-agent](https://github.com/gajus/global-agent#environment-variables). |
|
|
64
|
+
| --help | | false | Output usage information. |
|
|
64
65
|
|
|
65
66
|
## Archive
|
|
66
67
|
|
|
@@ -87,7 +88,7 @@ Options that support templates allow users to specify a template for the generat
|
|
|
87
88
|
- `release_year`: The release year (`YYYY`) of the episode.
|
|
88
89
|
- `release_month`: The release month (`MM`) of the episode.
|
|
89
90
|
- `release_day`: The release day (`DD`) of the episode.
|
|
90
|
-
- `episode_num`: The
|
|
91
|
+
- `episode_num`: The position number of where the episode appears in the feed.
|
|
91
92
|
- `url`: URL of episode audio file.
|
|
92
93
|
- `duration`: Provided `mm:ss` duration (if found).
|
|
93
94
|
- `podcast_title`: Title of the podcast feed.
|
|
@@ -108,13 +109,37 @@ If no match is found, the `custom_<n>` keyword will be replaced with an empty st
|
|
|
108
109
|
- `episode_filename_base`: The filename of the episode without its extension.
|
|
109
110
|
- `url`: URL of episode audio file.
|
|
110
111
|
|
|
112
|
+
### Template Filters
|
|
113
|
+
|
|
114
|
+
Template variables can be transformed using filters. Filters are applied using the pipe (`|`) character and can be chained:
|
|
115
|
+
|
|
116
|
+
`--episode-template "{{podcast_title|underscore}}-{{title|strip_special|camelcase}}"`
|
|
117
|
+
|
|
118
|
+
For example, given `title` = "Serial- S01 E01: The Alibi":
|
|
119
|
+
|
|
120
|
+
- `{{title|strip_special|underscore}}` produces `Serial S01 E01 The Alibi` then `Serial_S01_E01_The_Alibi`
|
|
121
|
+
- `{{title|strip_special|camelcase}}` produces `SerialS01E01TheAlibi`
|
|
122
|
+
|
|
123
|
+
#### Available Filters
|
|
124
|
+
|
|
125
|
+
| Filter | Description | Input | Output |
|
|
126
|
+
| --------------- | --------------------------------------------- | ------------- | ----------- |
|
|
127
|
+
| `strip` | Remove all whitespace | `"foo bar"` | `"foobar"` |
|
|
128
|
+
| `strip_special` | Remove non-alphanumeric chars (except spaces) | `"S01: E01!"` | `"S01 E01"` |
|
|
129
|
+
| `underscore` | Replace whitespace with underscores | `"foo bar"` | `"foo_bar"` |
|
|
130
|
+
| `dash` | Replace whitespace with dashes | `"foo bar"` | `"foo-bar"` |
|
|
131
|
+
| `camelcase` | Convert to UpperCamelCase | `"foo bar"` | `"FooBar"` |
|
|
132
|
+
| `lowercase` | Convert to lowercase | `"FOO Bar"` | `"foo bar"` |
|
|
133
|
+
| `uppercase` | Convert to UPPERCASE | `"foo bar"` | `"FOO BAR"` |
|
|
134
|
+
| `trim` | Remove leading/trailing whitespace | `" foo "` | `"foo"` |
|
|
135
|
+
|
|
111
136
|
## Log Levels
|
|
112
137
|
|
|
113
138
|
By default, all logs and errors are outputted to the console. The amount of logs can be controlled using the environment variable `LOG_LEVEL` with the following options:
|
|
114
139
|
|
|
115
140
|
- `static`: All logs and errors are outputted to the console, but disables any animations.
|
|
116
141
|
- `quiet`: Only important info and non-critical errors will be logged (e.g. episode download started).
|
|
117
|
-
- `silent`: Only critical error messages will be
|
|
142
|
+
- `silent`: Only critical error messages will be logged.
|
|
118
143
|
|
|
119
144
|
## OS Filename Limits
|
|
120
145
|
|
package/bin/async.js
CHANGED
|
@@ -31,7 +31,7 @@ const pipeline = promisify(stream.pipeline);
|
|
|
31
31
|
|
|
32
32
|
const BYTES_IN_MB = 1000000;
|
|
33
33
|
const USER_AGENT =
|
|
34
|
-
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
|
34
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/142.0.0.0 Safari/537.36";
|
|
35
35
|
|
|
36
36
|
export const download = async (options) => {
|
|
37
37
|
const {
|
|
@@ -45,6 +45,7 @@ export const download = async (options) => {
|
|
|
45
45
|
onAfterDownload,
|
|
46
46
|
attempt = 1,
|
|
47
47
|
maxAttempts = 3,
|
|
48
|
+
userAgent = USER_AGENT,
|
|
48
49
|
} = options;
|
|
49
50
|
|
|
50
51
|
const logMessage = getLogMessageWithMarker(marker);
|
|
@@ -71,7 +72,7 @@ export const download = async (options) => {
|
|
|
71
72
|
responseType: "json",
|
|
72
73
|
headers: {
|
|
73
74
|
accept: "*/*",
|
|
74
|
-
"user-agent":
|
|
75
|
+
"user-agent": userAgent,
|
|
75
76
|
},
|
|
76
77
|
});
|
|
77
78
|
} catch (error) {
|
|
@@ -114,7 +115,7 @@ export const download = async (options) => {
|
|
|
114
115
|
|
|
115
116
|
await pipeline(
|
|
116
117
|
got
|
|
117
|
-
.stream(url, { headers: { "user-agent":
|
|
118
|
+
.stream(url, { headers: { "user-agent": userAgent } })
|
|
118
119
|
.on("downloadProgress", onDownloadProgress),
|
|
119
120
|
fs.createWriteStream(tempOutputPath)
|
|
120
121
|
);
|
|
@@ -184,6 +185,7 @@ export const downloadItemsAsync = async ({
|
|
|
184
185
|
alwaysPostprocess,
|
|
185
186
|
targetItems,
|
|
186
187
|
threads = 1,
|
|
188
|
+
userAgent = USER_AGENT,
|
|
187
189
|
}) => {
|
|
188
190
|
let numEpisodesDownloaded = 0;
|
|
189
191
|
let hasErrors = false;
|
|
@@ -222,6 +224,7 @@ export const downloadItemsAsync = async ({
|
|
|
222
224
|
override,
|
|
223
225
|
alwaysPostprocess,
|
|
224
226
|
marker,
|
|
227
|
+
userAgent,
|
|
225
228
|
key: getArchiveKey({
|
|
226
229
|
prefix: archivePrefix,
|
|
227
230
|
name: getArchiveFilename({
|
|
@@ -239,6 +242,7 @@ export const downloadItemsAsync = async ({
|
|
|
239
242
|
await download({
|
|
240
243
|
archive,
|
|
241
244
|
override,
|
|
245
|
+
userAgent,
|
|
242
246
|
key: item._episodeImage.key,
|
|
243
247
|
marker: item._episodeImage.url,
|
|
244
248
|
maxAttempts: attempts,
|
|
@@ -265,6 +269,7 @@ export const downloadItemsAsync = async ({
|
|
|
265
269
|
maxAttempts: attempts,
|
|
266
270
|
outputPath: item._episodeTranscript.outputPath,
|
|
267
271
|
url: item._episodeTranscript.url,
|
|
272
|
+
userAgent,
|
|
268
273
|
});
|
|
269
274
|
} catch (error) {
|
|
270
275
|
hasErrors = true;
|
package/bin/bin.js
CHANGED
|
@@ -59,6 +59,7 @@ const {
|
|
|
59
59
|
reverse,
|
|
60
60
|
threads,
|
|
61
61
|
url,
|
|
62
|
+
userAgent,
|
|
62
63
|
addMp3Metadata: addMp3MetadataFlag,
|
|
63
64
|
adjustBitrate: bitrate,
|
|
64
65
|
season,
|
|
@@ -158,6 +159,7 @@ const main = async () => {
|
|
|
158
159
|
await download({
|
|
159
160
|
archive,
|
|
160
161
|
override,
|
|
162
|
+
userAgent,
|
|
161
163
|
marker: podcastImageUrl,
|
|
162
164
|
key: getArchiveKey({
|
|
163
165
|
prefix: archivePrefix,
|
|
@@ -260,6 +262,7 @@ const main = async () => {
|
|
|
260
262
|
alwaysPostprocess,
|
|
261
263
|
targetItems,
|
|
262
264
|
threads,
|
|
265
|
+
userAgent,
|
|
263
266
|
});
|
|
264
267
|
|
|
265
268
|
if (hasErrors && numEpisodesDownloaded !== targetItems.length) {
|
package/bin/commander.js
CHANGED
|
@@ -110,7 +110,7 @@ export const setupCommander = (program) => {
|
|
|
110
110
|
)
|
|
111
111
|
.option(
|
|
112
112
|
"--episode-regex-exclude <string>",
|
|
113
|
-
"
|
|
113
|
+
"episode titles matching regex will be excluded"
|
|
114
114
|
)
|
|
115
115
|
.option(
|
|
116
116
|
"--season <number>",
|
|
@@ -190,7 +190,8 @@ export const setupCommander = (program) => {
|
|
|
190
190
|
"--parser-config <string>",
|
|
191
191
|
"path to JSON config to override RSS parser"
|
|
192
192
|
)
|
|
193
|
-
.option("--proxy", "enable proxy support via global-agent")
|
|
193
|
+
.option("--proxy", "enable proxy support via global-agent")
|
|
194
|
+
.option("--user-agent <string>", "specify custom user agent string");
|
|
194
195
|
|
|
195
196
|
program.parse();
|
|
196
197
|
|
package/bin/naming.js
CHANGED
|
@@ -3,6 +3,36 @@ import filenamify from "filenamify";
|
|
|
3
3
|
import path from "path";
|
|
4
4
|
|
|
5
5
|
const INVALID_CHAR_REPLACE = "_";
|
|
6
|
+
|
|
7
|
+
const FILTER_FUNCTIONS = {
|
|
8
|
+
strip: (val) => val.replace(/\s+/g, ""),
|
|
9
|
+
strip_special: (val) => val.replace(/[^a-zA-Z0-9\s]/g, ""),
|
|
10
|
+
underscore: (val) => val.replace(/\s+/g, "_"),
|
|
11
|
+
dash: (val) => val.replace(/\s+/g, "-"),
|
|
12
|
+
camelcase: (val) =>
|
|
13
|
+
val
|
|
14
|
+
.split(/\s+/)
|
|
15
|
+
.map((w) =>
|
|
16
|
+
w ? w.charAt(0).toUpperCase() + w.slice(1).toLowerCase() : ""
|
|
17
|
+
)
|
|
18
|
+
.join(""),
|
|
19
|
+
lowercase: (val) => val.toLowerCase(),
|
|
20
|
+
uppercase: (val) => val.toUpperCase(),
|
|
21
|
+
trim: (val) => val.trim(),
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
const applyFilters = (value, filterStr) => {
|
|
25
|
+
if (!filterStr) {
|
|
26
|
+
return value;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const filters = filterStr.slice(1).split("|");
|
|
30
|
+
return filters.reduce((val, filter) => {
|
|
31
|
+
const filterFn = FILTER_FUNCTIONS[filter];
|
|
32
|
+
return filterFn ? filterFn(val) : val;
|
|
33
|
+
}, value);
|
|
34
|
+
};
|
|
35
|
+
|
|
6
36
|
const MAX_LENGTH_FILENAME = process.env.MAX_LENGTH_FILENAME
|
|
7
37
|
? parseInt(process.env.MAX_LENGTH_FILENAME)
|
|
8
38
|
: 255;
|
|
@@ -69,16 +99,13 @@ export const getItemFilename = ({
|
|
|
69
99
|
...customReplacementTuples,
|
|
70
100
|
];
|
|
71
101
|
|
|
102
|
+
const replacementsMap = Object.fromEntries(templateReplacementsTuples);
|
|
72
103
|
const templateSegments = template.trim().split(path.sep);
|
|
73
104
|
const nameSegments = templateSegments.map((segment) => {
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
const
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
name = replacement
|
|
80
|
-
? name.replace(replaceRegex, replacement)
|
|
81
|
-
: name.replace(replaceRegex, "");
|
|
105
|
+
const replaceRegex = /{{(\w+)(\|[^}]+)?}}/g;
|
|
106
|
+
const name = segment.replace(replaceRegex, (match, varName, filterStr) => {
|
|
107
|
+
const replacement = replacementsMap[varName] || "";
|
|
108
|
+
return applyFilters(replacement, filterStr);
|
|
82
109
|
});
|
|
83
110
|
|
|
84
111
|
return getSimpleFilename(name);
|
|
@@ -93,19 +120,16 @@ export const getItemFilename = ({
|
|
|
93
120
|
};
|
|
94
121
|
|
|
95
122
|
export const getFolderName = ({ feed, template }) => {
|
|
96
|
-
const
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
const
|
|
104
|
-
const
|
|
105
|
-
|
|
106
|
-
name = replacement
|
|
107
|
-
? name.replace(replaceRegex, getSafeName(replacement))
|
|
108
|
-
: name.replace(replaceRegex, "");
|
|
123
|
+
const replacementsMap = {
|
|
124
|
+
podcast_title: feed.title || "",
|
|
125
|
+
podcast_link: feed.link || "",
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
const replaceRegex = /{{(\w+)(\|[^}]+)?}}/g;
|
|
129
|
+
const name = template.replace(replaceRegex, (_, varName, filterStr) => {
|
|
130
|
+
const replacement = replacementsMap[varName] || "";
|
|
131
|
+
const filtered = applyFilters(replacement, filterStr);
|
|
132
|
+
return getSafeName(filtered);
|
|
109
133
|
});
|
|
110
134
|
|
|
111
135
|
return name;
|