podcast-dl 6.1.0 → 7.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,18 +2,8 @@
2
2
 
3
3
  ## A CLI for downloading podcasts with a focus on archiving.
4
4
 
5
- ![podcast-dl example gif](./docs/podcast-dl-example.gif)
6
-
7
5
  ## How to Use
8
6
 
9
- ### Binaries
10
-
11
- [Visit the releases page](https://github.com/lightpohl/podcast-dl/releases) and download the latest binary for your system.
12
-
13
- `podcast-dl --url <PODCAST_RSS_URL>`
14
-
15
- `podcast-dl --url "http://friendsatthetable.libsyn.com/rss"`
16
-
17
7
  ### npx
18
8
 
19
9
  **[Node Required](https://nodejs.org/en/)**
@@ -24,30 +14,32 @@
24
14
 
25
15
  Type values surrounded in square brackets (`[]`) can be used as used as boolean options (no argument required).
26
16
 
27
- | Option | Type | Required | Description |
28
- | ----------------------- | ------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
29
- | --url | String | true | URL to podcast RSS feed. |
30
- | --out-dir | String | false | Specify output directory for episodes and metadata. Defaults to "./{{podcast_title}}". See "Templating" for more details. |
31
- | --archive | [String] | false | Download or write out items not listed in archive file. Generates archive file at path if not found. Defaults to "./{{podcast_title}}/archive.json" when used as a boolean option. See "Templating" for more details. |
32
- | --episode-template | String | false | Template for generating episode related filenames. See "Templating" for details. |
33
- | --include-meta | | false | Write out podcast metadata to JSON. |
34
- | --include-episode-meta | | false | Write out individual episode metadata to JSON. |
35
- | --ignore-episode-images | | false | Ignore downloading found images from --include-episode-meta. |
36
- | --offset | Number | false | Offset starting download position. Default is 0. |
37
- | --limit | Number | false | Max number of episodes to download. Downloads all by default. |
38
- | --after | String | false | Only download episodes after this date (i.e. MM/DD/YYY, inclusive). |
39
- | --before | String | false | Only download episodes before this date (i.e. MM/DD/YYY, inclusive) |
40
- | --episode-regex | String | false | Match episode title against provided regex before starting download. |
41
- | --add-mp3-metadata | | false | Attempts to add a base level of MP3 metadata to each episode. Recommended only in cases where the original metadata is of poor quality. (**ffmpeg required**) |
42
- | --adjust-bitrate | String (e.g. "48k") | false | Attempts to adjust bitrate of MP3s. (**ffmpeg required**) |
43
- | --mono | | false | Attempts to force MP3s into mono. (**ffmpeg required**) |
44
- | --override | | false | Override local files on collision. |
45
- | --reverse | | false | Reverse download direction and start at last RSS item. |
46
- | --info | | false | Print retrieved podcast info instead of downloading. |
47
- | --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported. |
48
- | --exec | String | false | Execute a command after each episode is downloaded. |
49
- | --version | | false | Output the version number. |
50
- | --help | | false | Output usage information. |
17
+ | Option | Type | Required | Description |
18
+ | ------------------------ | ------------------- | -------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
19
+ | --url | String | true | URL to podcast RSS feed. |
20
+ | --out-dir | String | false | Specify output directory for episodes and metadata. Defaults to "./{{podcast_title}}". See "Templating" for more details. |
21
+ | --archive | [String] | false | Download or write out items not listed in archive file. Generates archive file at path if not found. Defaults to "./{{podcast_title}}/archive.json" when used as a boolean option. See "Templating" for more details. |
22
+ | --episode-template | String | false | Template for generating episode related filenames. See "Templating" for details. |
23
+ | --include-meta | | false | Write out podcast metadata to JSON. |
24
+ | --include-episode-meta | | false | Write out individual episode metadata to JSON. |
25
+ | --include-episode-images | | false | Download found episode images. |
26
+ | --offset | Number | false | Offset starting download position. Default is 0. |
27
+ | --limit | Number | false | Max number of episodes to download. Downloads all by default. |
28
+ | --after | String | false | Only download episodes after this date (i.e. MM/DD/YYY, inclusive). |
29
+ | --before | String | false | Only download episodes before this date (i.e. MM/DD/YYY, inclusive) |
30
+ | --episode-regex | String | false | Match episode title against provided regex before starting download. |
31
+ | --add-mp3-metadata | | false | Attempts to add a base level of MP3 metadata to each episode. Recommended only in cases where the original metadata is of poor quality. (**ffmpeg required**) |
32
+ | --adjust-bitrate | String (e.g. "48k") | false | Attempts to adjust bitrate of MP3s. (**ffmpeg required**) |
33
+ | --mono | | false | Attempts to force MP3s into mono. (**ffmpeg required**) |
34
+ | --override | | false | Override local files on collision. |
35
+ | --reverse | | false | Reverse download direction and start at last RSS item. |
36
+ | --info | | false | Print retrieved podcast info instead of downloading. |
37
+ | --list | [String] | false | Print episode list instead of downloading. Defaults to "table" when used as a boolean option. "json" is also supported. |
38
+ | --exec | String | false | Execute a command after each episode is downloaded. |
39
+ | --threads | Number | false | Determines the number of downloads that will happen concurrently. Default is 1. |
40
+ | --filter-url-tacking | | false | Attempts to extract the direct download link of an episode if detected (**experimental**). |
41
+ | --version | | false | Output the version number. |
42
+ | --help | | false | Output usage information. |
51
43
 
52
44
  ## Archive
53
45
 
package/bin/async.js ADDED
@@ -0,0 +1,294 @@
1
+ import pLimit from "p-limit";
2
+ import _path from "path";
3
+ import { promisify } from "util";
4
+ import stream from "stream";
5
+ import fs from "fs";
6
+ import got from "got";
7
+ import { throttle } from "throttle-debounce";
8
+
9
+ import {
10
+ logError,
11
+ LOG_LEVELS,
12
+ getLogMessageWithMarker,
13
+ getShouldOutputProgressIndicator,
14
+ } from "./logger.js";
15
+ import { getArchiveFilename, getFilename } from "./naming.js";
16
+ import {
17
+ getEpisodeAudioUrlAndExt,
18
+ getArchiveKey,
19
+ runFfmpeg,
20
+ runExec,
21
+ writeItemMeta,
22
+ writeToArchive,
23
+ getUrlEmbed,
24
+ } from "./util.js";
25
+
26
+ const pipeline = promisify(stream.pipeline);
27
+
28
+ const BYTES_IN_MB = 1000000;
29
+
30
+ const download = async ({
31
+ marker,
32
+ url,
33
+ outputPath,
34
+ key,
35
+ archive,
36
+ override,
37
+ onAfterDownload,
38
+ filterUrlTracking,
39
+ }) => {
40
+ const logMessage = getLogMessageWithMarker(marker);
41
+ if (!override && fs.existsSync(outputPath)) {
42
+ logMessage("Download exists locally. Skipping...");
43
+ return;
44
+ }
45
+
46
+ let embeddedUrl = null;
47
+ if (filterUrlTracking) {
48
+ logMessage("Attempting to find embedded URL...");
49
+ embeddedUrl = await getUrlEmbed(url);
50
+
51
+ if (!embeddedUrl) {
52
+ logMessage("Unable to find embedded URL. Defaulting to full address");
53
+ }
54
+ }
55
+
56
+ const finalUrl = embeddedUrl || url;
57
+ const headResponse = await got(finalUrl, {
58
+ timeout: 5000,
59
+ method: "HEAD",
60
+ responseType: "json",
61
+ headers: {
62
+ accept: "*/*",
63
+ },
64
+ });
65
+
66
+ const removeFile = () => {
67
+ if (fs.existsSync(outputPath)) {
68
+ fs.unlinkSync(outputPath);
69
+ }
70
+ };
71
+
72
+ const expectedSize =
73
+ headResponse &&
74
+ headResponse.headers &&
75
+ headResponse.headers["content-length"]
76
+ ? parseInt(headResponse.headers["content-length"])
77
+ : 0;
78
+
79
+ logMessage(
80
+ `Starting download${
81
+ expectedSize
82
+ ? ` of ${(expectedSize / BYTES_IN_MB).toFixed(2)} MB...`
83
+ : "..."
84
+ }`
85
+ );
86
+
87
+ try {
88
+ const onDownloadProgress = throttle(3000, (progress) => {
89
+ if (
90
+ getShouldOutputProgressIndicator() &&
91
+ progress.transferred > 0 &&
92
+ progress.percent < 1
93
+ ) {
94
+ logMessage(
95
+ `${(progress.percent * 100).toFixed(0)}% of ${(
96
+ progress.total / BYTES_IN_MB
97
+ ).toFixed(2)} MB...`
98
+ );
99
+ }
100
+ });
101
+
102
+ await pipeline(
103
+ got.stream(finalUrl).on("downloadProgress", onDownloadProgress),
104
+ fs.createWriteStream(outputPath)
105
+ );
106
+ } catch (error) {
107
+ removeFile();
108
+ throw error;
109
+ }
110
+
111
+ const fileSize = fs.statSync(outputPath).size;
112
+
113
+ if (fileSize === 0) {
114
+ removeFile();
115
+
116
+ logMessage(
117
+ "Unable to write to file. Suggestion: verify permissions",
118
+ LOG_LEVELS.important
119
+ );
120
+
121
+ return;
122
+ }
123
+
124
+ if (expectedSize && !isNaN(expectedSize) && expectedSize !== fileSize) {
125
+ logMessage(
126
+ "File size differs from expected content length. Suggestion: verify file works as expected",
127
+ LOG_LEVELS.important
128
+ );
129
+ logMessage(`${outputPath}`, LOG_LEVELS.important);
130
+ }
131
+
132
+ logMessage("Download complete!");
133
+
134
+ if (onAfterDownload) {
135
+ await onAfterDownload();
136
+ }
137
+
138
+ if (key && archive) {
139
+ try {
140
+ writeToArchive({ key, archive });
141
+ } catch (error) {
142
+ throw new Error(`Error writing to archive: ${error.toString()}`);
143
+ }
144
+ }
145
+ };
146
+
147
+ let downloadItemsAsync = async ({
148
+ addMp3MetadataFlag,
149
+ archive,
150
+ archiveUrl,
151
+ basePath,
152
+ bitrate,
153
+ episodeTemplate,
154
+ exec,
155
+ feed,
156
+ filterUrlTracking,
157
+ includeEpisodeMeta,
158
+ mono,
159
+ override,
160
+ targetItems,
161
+ threads = 1,
162
+ }) => {
163
+ let numEpisodesDownloaded = 0;
164
+ let hasErrors = false;
165
+
166
+ const limit = pLimit(threads);
167
+ const downloadItem = async (item, index) => {
168
+ const threadIndex = index % threads;
169
+ const marker = threads > 1 ? `[${threadIndex}] ${item.title}` : item.title;
170
+ const logMessage = getLogMessageWithMarker(marker);
171
+ const { url: episodeAudioUrl, ext: audioFileExt } =
172
+ getEpisodeAudioUrlAndExt(item);
173
+
174
+ if (!episodeAudioUrl) {
175
+ hasErrors = true;
176
+ logError(`${marker} | Unable to find episode download URL`);
177
+ return;
178
+ }
179
+
180
+ const episodeFilename = getFilename({
181
+ item,
182
+ feed,
183
+ url: episodeAudioUrl,
184
+ ext: audioFileExt,
185
+ template: episodeTemplate,
186
+ });
187
+ const outputPodcastPath = _path.resolve(basePath, episodeFilename);
188
+
189
+ try {
190
+ await download({
191
+ archive,
192
+ override,
193
+ marker,
194
+ filterUrlTracking,
195
+ key: getArchiveKey({
196
+ prefix: archiveUrl,
197
+ name: getArchiveFilename({
198
+ name: item.title,
199
+ pubDate: item.pubDate,
200
+ ext: audioFileExt,
201
+ }),
202
+ }),
203
+ outputPath: outputPodcastPath,
204
+ url: episodeAudioUrl,
205
+ onAfterDownload: async () => {
206
+ if (addMp3MetadataFlag || bitrate || mono) {
207
+ logMessage("Running ffmpeg...");
208
+ await runFfmpeg({
209
+ feed,
210
+ item,
211
+ bitrate,
212
+ mono,
213
+ itemIndex: item._originalIndex,
214
+ outputPath: outputPodcastPath,
215
+ });
216
+ }
217
+
218
+ if (exec) {
219
+ logMessage("Running exec...");
220
+ await runExec({ exec, outputPodcastPath, episodeFilename });
221
+ }
222
+
223
+ numEpisodesDownloaded += 1;
224
+ },
225
+ });
226
+ } catch (error) {
227
+ hasErrors = true;
228
+ logError(`${marker} | Error downloading episode: ${error.toString()}`);
229
+ }
230
+
231
+ for (const extra of item._extra_downloads) {
232
+ try {
233
+ logMessage("Downloading episode image...");
234
+ await download({
235
+ archive,
236
+ override,
237
+ marker: extra.url,
238
+ key: extra.key,
239
+ outputPath: extra.outputPath,
240
+ url: extra.url,
241
+ });
242
+ } catch (error) {
243
+ hasErrors = true;
244
+ logError(
245
+ `${marker} | Error downloading ${extra.url}: ${error.toString()}`
246
+ );
247
+ }
248
+ }
249
+
250
+ if (includeEpisodeMeta) {
251
+ const episodeMetaExt = ".meta.json";
252
+ const episodeMetaName = getFilename({
253
+ item,
254
+ feed,
255
+ url: episodeAudioUrl,
256
+ ext: episodeMetaExt,
257
+ template: episodeTemplate,
258
+ });
259
+ const outputEpisodeMetaPath = _path.resolve(basePath, episodeMetaName);
260
+
261
+ try {
262
+ logMessage("Saving episode metadata...");
263
+ writeItemMeta({
264
+ marker,
265
+ archive,
266
+ override,
267
+ item,
268
+ key: getArchiveKey({
269
+ prefix: archiveUrl,
270
+ name: getArchiveFilename({
271
+ pubDate: item.pubDate,
272
+ name: item.title,
273
+ ext: episodeMetaExt,
274
+ }),
275
+ }),
276
+ outputPath: outputEpisodeMetaPath,
277
+ });
278
+ } catch (error) {
279
+ hasErrors = true;
280
+ logError(`${marker} | ${error.toString()}`);
281
+ }
282
+ }
283
+ };
284
+
285
+ const itemPromises = targetItems.map((item, index) =>
286
+ limit(() => downloadItem(item, index))
287
+ );
288
+
289
+ await Promise.all(itemPromises);
290
+
291
+ return { numEpisodesDownloaded, hasErrors };
292
+ };
293
+
294
+ export { download, downloadItemsAsync };