coomer-downloader 3.3.2 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,9 +2,12 @@
2
2
 
3
3
  [![NPM](https://nodei.co/npm/coomer-downloader.svg?color=lightgrey)](https://www.npmjs.com/package/coomer-downloader)
4
4
 
5
+ <img src="https://github.com/smartacephale/coomer-downloader/blob/main/docs/images/Screenshot%2001.jpg?raw=true" width="500"></img>
5
6
 
6
7
  ### Features
7
- * script keeps track of downloaded files and resume downloading if it's crashed.
8
+ * Script keeps track of downloaded files and resume downloading if it's crashed.
9
+ * Skip files, filter by text and media type
10
+ * Preview images in console. For now partial downloads not supported.
8
11
 
9
12
  ### Install:
10
13
  ```
@@ -42,12 +45,20 @@ npx coomer-downloader --u https://random.com/file.mp4
42
45
  ### Options:
43
46
  ```
44
47
  --dir <directory>
45
- --media <video|image|all>
48
+ --media <video|image>
46
49
  --skip <number>
47
50
 
48
- # include/exclude filters files by name
51
+ # Removes duplicates by url and file hash
52
+ --remove-dupilicates <true|false>
53
+
54
+ # Filter by file name
49
55
  --include <keyword1>
50
56
  --exclude <keyword2>
57
+
58
+ # Filter by min/max file size. Example: "1mb" or "500kb"
59
+ --min-size <keyword>
60
+ --max-size <keyword>
61
+
51
62
  ```
52
63
 
53
64
  ```
package/biome.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "$schema": "https://biomejs.dev/schemas/2.2.6/schema.json",
2
+ "$schema": "https://biomejs.dev/schemas/2.3.10/schema.json",
3
3
  "assist": {
4
4
  "actions": {
5
5
  "source": {
@@ -14,13 +14,14 @@
14
14
  "indentStyle": "space",
15
15
  "indentWidth": 2,
16
16
  "lineEnding": "lf",
17
- "lineWidth": 100
17
+ "lineWidth": 90
18
18
  },
19
19
  "javascript": {
20
20
  "formatter": {
21
21
  "quoteStyle": "single",
22
22
  "semicolons": "always"
23
- }
23
+ },
24
+ "jsxRuntime": "reactClassic"
24
25
  },
25
26
  "linter": {
26
27
  "enabled": true,
@@ -37,7 +38,8 @@
37
38
  "useNumberNamespace": "off"
38
39
  },
39
40
  "suspicious": {
40
- "noRedundantUseStrict": "off"
41
+ "noRedundantUseStrict": "off",
42
+ "noControlCharactersInRegex": "info"
41
43
  }
42
44
  }
43
45
  }
package/dist/index.js CHANGED
@@ -11,16 +11,41 @@ import { fetch } from "undici";
11
11
  import os from "node:os";
12
12
  import path from "node:path";
13
13
 
14
- // src/utils/filters.ts
15
- function isImage(name) {
16
- return /\.(jpg|jpeg|png|gif|bmp|tiff|webp|avif)$/i.test(name);
17
- }
18
- function isVideo(name) {
19
- return /\.(mp4|m4v|avi|mov|mkv|webm|flv|wmv|mpeg|mpg|3gp)$/i.test(name);
14
+ // src/logger/index.ts
15
+ import pino from "pino";
16
+ var logger = pino(
17
+ {
18
+ level: "debug"
19
+ },
20
+ pino.destination({
21
+ dest: "./debug.log",
22
+ append: false,
23
+ sync: true
24
+ })
25
+ );
26
+ var logger_default = logger;
27
+
28
+ // src/utils/duplicates.ts
29
+ function collectUniquesAndDuplicatesBy(xs, k) {
30
+ const seen = /* @__PURE__ */ new Set();
31
+ return xs.reduce(
32
+ (acc, item) => {
33
+ if (seen.has(item[k])) {
34
+ acc.duplicates.push(item);
35
+ } else {
36
+ seen.add(item[k]);
37
+ acc.uniques.push(item);
38
+ }
39
+ return acc;
40
+ },
41
+ { uniques: [], duplicates: [] }
42
+ );
20
43
  }
21
- function testMediaType(name, type) {
22
- return type === "all" ? true : type === "image" ? isImage(name) : isVideo(name);
44
+ function removeDuplicatesBy(xs, k) {
45
+ return [...new Map(xs.map((x) => [x[k], x])).values()];
23
46
  }
47
+
48
+ // src/utils/filters.ts
24
49
  function includesAllWords(str, words) {
25
50
  if (!words.length) return true;
26
51
  return words.every((w) => str.includes(w));
@@ -35,9 +60,21 @@ function parseQuery(query) {
35
60
  function filterString(text, include, exclude) {
36
61
  return includesAllWords(text, parseQuery(include)) && includesNoWords(text, parseQuery(exclude));
37
62
  }
63
+ function parseSizeValue(s) {
64
+ if (!s) return NaN;
65
+ const m = s.match(/^([0-9]+(?:\.[0-9]+)?)(b|kb|mb|gb)?$/i);
66
+ if (!m) return NaN;
67
+ const val = parseFloat(m[1]);
68
+ const unit = (m[2] || "b").toLowerCase();
69
+ const mult = unit === "kb" ? 1024 : unit === "mb" ? 1024 ** 2 : unit === "gb" ? 1024 ** 3 : 1;
70
+ return Math.floor(val * mult);
71
+ }
38
72
 
39
73
  // src/utils/io.ts
74
+ import { createHash } from "node:crypto";
40
75
  import fs from "node:fs";
76
+ import { access, constants, unlink } from "node:fs/promises";
77
+ import { pipeline } from "node:stream/promises";
41
78
  async function getFileSize(filepath) {
42
79
  let size = 0;
43
80
  if (fs.existsSync(filepath)) {
@@ -45,15 +82,40 @@ async function getFileSize(filepath) {
45
82
  }
46
83
  return size;
47
84
  }
85
+ async function getFileHash(filepath) {
86
+ const hash = createHash("sha256");
87
+ const filestream = fs.createReadStream(filepath);
88
+ await pipeline(filestream, hash);
89
+ return hash.digest("hex");
90
+ }
48
91
  function mkdir(filepath) {
49
92
  if (!fs.existsSync(filepath)) {
50
93
  fs.mkdirSync(filepath, { recursive: true });
51
94
  }
52
95
  }
96
+ async function deleteFile(path2) {
97
+ await access(path2, constants.F_OK);
98
+ await unlink(path2);
99
+ }
100
+ function sanitizeFilename(name) {
101
+ if (!name) return name;
102
+ return name.replace(/[<>"/\\|?*\x00-\x1F]/g, "-").replace(/\s+/g, " ").trim().replace(/[.]+$/, "");
103
+ }
104
+
105
+ // src/utils/mediatypes.ts
106
+ function isImage(name) {
107
+ return /\.(jpg|jpeg|png|gif|bmp|tiff|webp|avif)$/i.test(name);
108
+ }
109
+ function isVideo(name) {
110
+ return /\.(mp4|m4v|avi|mov|mkv|webm|flv|wmv|mpeg|mpg|3gp)$/i.test(name);
111
+ }
112
+ function testMediaType(name, type) {
113
+ return type === "image" ? isImage(name) : isVideo(name);
114
+ }
53
115
 
54
116
  // src/services/file.ts
55
117
  var CoomerFile = class _CoomerFile {
56
- constructor(name, url, filepath, size, downloaded = 0, content) {
118
+ constructor(name, url, filepath = "", size, downloaded = 0, content) {
57
119
  this.name = name;
58
120
  this.url = url;
59
121
  this.filepath = filepath;
@@ -62,6 +124,7 @@ var CoomerFile = class _CoomerFile {
62
124
  this.content = content;
63
125
  }
64
126
  active = false;
127
+ hash;
65
128
  async getDownloadedSize() {
66
129
  this.downloaded = await getFileSize(this.filepath);
67
130
  return this;
@@ -88,7 +151,8 @@ var CoomerFileList = class {
88
151
  this.dirPath = path.join(os.homedir(), path.join(dir, dirName));
89
152
  }
90
153
  this.files.forEach((file) => {
91
- file.filepath = path.join(this.dirPath, file.name);
154
+ const safeName = sanitizeFilename(file.name) || file.name;
155
+ file.filepath = path.join(this.dirPath, safeName);
92
156
  });
93
157
  return this;
94
158
  }
@@ -110,6 +174,7 @@ var CoomerFileList = class {
110
174
  for (const file of this.files) {
111
175
  await file.getDownloadedSize();
112
176
  }
177
+ return this;
113
178
  }
114
179
  getActiveFiles() {
115
180
  return this.files.filter((f) => f.active);
@@ -117,6 +182,21 @@ var CoomerFileList = class {
117
182
  getDownloaded() {
118
183
  return this.files.filter((f) => f.size && f.size <= f.downloaded);
119
184
  }
185
+ async removeDuplicatesByHash() {
186
+ for (const file of this.files) {
187
+ file.hash = await getFileHash(file.filepath);
188
+ }
189
+ const { duplicates } = collectUniquesAndDuplicatesBy(this.files, "hash");
190
+ console.log({ duplicates });
191
+ logger_default.debug(`duplicates: ${JSON.stringify(duplicates)}`);
192
+ duplicates.forEach((f) => {
193
+ deleteFile(f.filepath);
194
+ });
195
+ }
196
+ removeURLDuplicates() {
197
+ this.files = removeDuplicatesBy(this.files, "url");
198
+ return this;
199
+ }
120
200
  };
121
201
 
122
202
  // src/api/bunkr.ts
@@ -225,10 +305,10 @@ async function getUserPostsAPI(user, offset) {
225
305
  async function getUserFiles(user) {
226
306
  const userPosts = [];
227
307
  const offset = 50;
228
- for (let i = 0; i < 1e3; i++) {
308
+ for (let i = 0; i < 1e4; i++) {
229
309
  const posts = await getUserPostsAPI(user, i * offset);
230
310
  userPosts.push(...posts);
231
- if (posts.length < 50) break;
311
+ if (posts.length < offset) break;
232
312
  }
233
313
  const filelist = new CoomerFileList();
234
314
  for (const p of userPosts) {
@@ -239,13 +319,23 @@ async function getUserFiles(user) {
239
319
  const postFiles = [...p.attachments, p.file].filter((f) => f.path).map((f, i) => {
240
320
  const ext = f.name.split(".").pop();
241
321
  const name = `${datentitle} ${i + 1}.${ext}`;
242
- const url = `${user.domain}/${f.path}`;
322
+ const url = getUrl(f, user);
243
323
  return CoomerFile.from({ name, url, content });
244
324
  });
245
325
  filelist.files.push(...postFiles);
246
326
  }
247
327
  return filelist;
248
328
  }
329
+ function getUrl(f, user) {
330
+ const normalizedPath = f.path.replace(/^\/+/, "/");
331
+ let url = "";
332
+ try {
333
+ url = new URL(normalizedPath, user.domain).toString();
334
+ } catch (_) {
335
+ url = `${user.domain}/${normalizedPath.replace(/^\//, "")}`;
336
+ }
337
+ return url;
338
+ }
249
339
  async function parseUser(url) {
250
340
  const [_, domain, service, id] = url.match(
251
341
  /(https:\/\/\w+\.\w+)\/(\w+)\/user\/([\w|.|-]+)/
@@ -320,7 +410,6 @@ async function getUserPage(user, offset) {
320
410
  return fetch4(url).then((r) => r.text());
321
411
  }
322
412
  async function getUserPosts(user) {
323
- console.log("Fetching user posts...");
324
413
  const posts = [];
325
414
  for (let i = 1; i < 1e5; i++) {
326
415
  const page = await getUserPage(user, i);
@@ -332,7 +421,6 @@ async function getUserPosts(user) {
332
421
  return posts;
333
422
  }
334
423
  async function getPostsData(posts) {
335
- console.log("Fetching posts data...");
336
424
  const filelist = new CoomerFileList();
337
425
  for (const post of posts) {
338
426
  const page = await fetch4(post).then((r) => r.text());
@@ -349,7 +437,9 @@ async function getPostsData(posts) {
349
437
  }
350
438
  async function getRedditData(url) {
351
439
  const user = url.match(/u\/(\w+)/)?.[1];
440
+ console.log("Fetching user posts...");
352
441
  const posts = await getUserPosts(user);
442
+ console.log("Fetching posts data...");
353
443
  const filelist = await getPostsData(posts);
354
444
  filelist.dirName = `${user}-reddit`;
355
445
  return filelist;
@@ -400,8 +490,7 @@ function argumentHander() {
400
490
  default: "./"
401
491
  }).option("media", {
402
492
  type: "string",
403
- choices: ["video", "image", "all"],
404
- default: "all",
493
+ choices: ["video", "image"],
405
494
  description: "The type of media to download: 'video', 'image', or 'all'. 'all' is the default."
406
495
  }).option("include", {
407
496
  type: "string",
@@ -411,10 +500,22 @@ function argumentHander() {
411
500
  type: "string",
412
501
  default: "",
413
502
  description: "Filter file names by a comma-separated list of keywords to exclude"
503
+ }).option("min-size", {
504
+ type: "string",
505
+ default: "",
506
+ description: 'Minimum file size to download. Example: "1mb" or "500kb"'
507
+ }).option("max-size", {
508
+ type: "string",
509
+ default: "",
510
+ description: 'Maximum file size to download. Example: "1mb" or "500kb"'
414
511
  }).option("skip", {
415
512
  type: "number",
416
513
  default: 0,
417
514
  description: "Skips the first N files in the download queue"
515
+ }).option("remove-dupilicates", {
516
+ type: "boolean",
517
+ default: true,
518
+ description: "removes duplicates by url and file hash"
418
519
  }).help().alias("help", "h").parseSync();
419
520
  }
420
521
 
@@ -558,7 +659,7 @@ import { Box as Box6, Spacer as Spacer2, Text as Text6 } from "ink";
558
659
  import React7 from "react";
559
660
 
560
661
  // package.json
561
- var version = "3.3.2";
662
+ var version = "3.4.0";
562
663
 
563
664
  // src/cli/ui/components/titlebar.tsx
564
665
  function TitleBar() {
@@ -602,7 +703,7 @@ function App() {
602
703
  const downloader = useInkStore((state) => state.downloader);
603
704
  const filelist = downloader?.filelist;
604
705
  const isFilelist = filelist instanceof CoomerFileList;
605
- return /* @__PURE__ */ React8.createElement(Box7, { borderStyle: "single", flexDirection: "column", borderColor: "blue", width: 80 }, /* @__PURE__ */ React8.createElement(TitleBar, null), !isFilelist ? /* @__PURE__ */ React8.createElement(Loading, null) : /* @__PURE__ */ React8.createElement(React8.Fragment, null, /* @__PURE__ */ React8.createElement(Box7, null, /* @__PURE__ */ React8.createElement(Box7, null, /* @__PURE__ */ React8.createElement(FileListStateBox, { filelist })), /* @__PURE__ */ React8.createElement(Box7, { flexBasis: 29 }, /* @__PURE__ */ React8.createElement(KeyboardControlsInfo, null))), filelist.getActiveFiles().map((file) => {
706
+ return /* @__PURE__ */ React8.createElement(Box7, { borderStyle: "single", flexDirection: "column", borderColor: "blue", width: 80 }, /* @__PURE__ */ React8.createElement(TitleBar, null), !isFilelist ? /* @__PURE__ */ React8.createElement(Loading, null) : /* @__PURE__ */ React8.createElement(React8.Fragment, null, /* @__PURE__ */ React8.createElement(Box7, null, /* @__PURE__ */ React8.createElement(Box7, null, /* @__PURE__ */ React8.createElement(FileListStateBox, { filelist })), /* @__PURE__ */ React8.createElement(Box7, { flexBasis: 30 }, /* @__PURE__ */ React8.createElement(KeyboardControlsInfo, null))), filelist.getActiveFiles().map((file) => {
606
707
  return /* @__PURE__ */ React8.createElement(FileBox, { file, key: file.name });
607
708
  })));
608
709
  }
@@ -615,7 +716,7 @@ function createReactInk() {
615
716
  // src/services/downloader.ts
616
717
  import fs2 from "node:fs";
617
718
  import { Readable, Transform } from "node:stream";
618
- import { pipeline } from "node:stream/promises";
719
+ import { pipeline as pipeline2 } from "node:stream/promises";
619
720
  import { Subject } from "rxjs";
620
721
 
621
722
  // src/utils/promise.ts
@@ -650,7 +751,7 @@ var Timer = class _Timer {
650
751
  this.start();
651
752
  return this;
652
753
  }
653
- static withAbortController(timeout, abortControllerSubject, message = "Timeout") {
754
+ static withAbortController(timeout, abortControllerSubject, message = "TIMEOUT") {
654
755
  const callback = () => {
655
756
  abortControllerSubject.next(message);
656
757
  };
@@ -661,8 +762,10 @@ var Timer = class _Timer {
661
762
 
662
763
  // src/services/downloader.ts
663
764
  var Downloader = class {
664
- constructor(filelist, chunkTimeout = 3e4, chunkFetchRetries = 5, fetchRetries = 7) {
765
+ constructor(filelist, minSize, maxSize, chunkTimeout = 3e4, chunkFetchRetries = 5, fetchRetries = 7) {
665
766
  this.filelist = filelist;
767
+ this.minSize = minSize;
768
+ this.maxSize = maxSize;
666
769
  this.chunkTimeout = chunkTimeout;
667
770
  this.chunkFetchRetries = chunkFetchRetries;
668
771
  this.fetchRetries = fetchRetries;
@@ -680,8 +783,10 @@ var Downloader = class {
680
783
  async fetchStream(file, stream, sizeOld = 0, retries = this.chunkFetchRetries) {
681
784
  const signal = this.abortController.signal;
682
785
  const subject = this.subject;
683
- const { timer } = Timer.withAbortController(this.chunkTimeout, this.abortControllerSubject);
684
- let i;
786
+ const { timer } = Timer.withAbortController(
787
+ this.chunkTimeout,
788
+ this.abortControllerSubject
789
+ );
685
790
  try {
686
791
  const fileStream = fs2.createWriteStream(file.filepath, { flags: "a" });
687
792
  const progressStream = new Transform({
@@ -694,7 +799,7 @@ var Downloader = class {
694
799
  }
695
800
  });
696
801
  subject.next({ type: "CHUNK_DOWNLOADING_START" });
697
- await pipeline(stream, progressStream, fileStream, { signal });
802
+ await pipeline2(stream, progressStream, fileStream, { signal });
698
803
  } catch (error) {
699
804
  if (signal.aborted) {
700
805
  if (signal.reason === "FILE_SKIP") return;
@@ -711,12 +816,22 @@ var Downloader = class {
711
816
  } finally {
712
817
  subject.next({ type: "CHUNK_DOWNLOADING_END" });
713
818
  timer.stop();
714
- clearInterval(i);
715
819
  }
716
820
  }
717
821
  skip() {
718
822
  this.abortControllerSubject.next("FILE_SKIP");
719
823
  }
824
+ filterFileSize(file) {
825
+ if (!file.size) return;
826
+ if (this.minSize && file.size < this.minSize || this.maxSize && file.size > this.maxSize) {
827
+ try {
828
+ deleteFile(file.filepath);
829
+ } catch {
830
+ }
831
+ this.skip();
832
+ return;
833
+ }
834
+ }
720
835
  async downloadFile(file, retries = this.fetchRetries) {
721
836
  const signal = this.abortController.signal;
722
837
  try {
@@ -729,6 +844,7 @@ var Downloader = class {
729
844
  if (!contentLength && file.downloaded > 0) return;
730
845
  const restFileSize = parseInt(contentLength);
731
846
  file.size = restFileSize + file.downloaded;
847
+ this.filterFileSize(file);
732
848
  if (file.size > file.downloaded && response.body) {
733
849
  const stream = Readable.fromWeb(response.body);
734
850
  stream.setMaxListeners(20);
@@ -765,14 +881,29 @@ var Downloader = class {
765
881
  // src/index.ts
766
882
  async function run() {
767
883
  createReactInk();
768
- const { url, dir, media, include, exclude, skip } = argumentHander();
884
+ const { url, dir, media, include, exclude, minSize, maxSize, skip, removeDupilicates } = argumentHander();
769
885
  const filelist = await apiHandler(url);
770
886
  filelist.setDirPath(dir).skip(skip).filterByText(include, exclude).filterByMediaType(media);
887
+ if (removeDupilicates) {
888
+ filelist.removeURLDuplicates();
889
+ }
890
+ const minSizeBytes = minSize ? parseSizeValue(minSize) : void 0;
891
+ const maxSizeBytes = maxSize ? parseSizeValue(maxSize) : void 0;
771
892
  await filelist.calculateFileSizes();
772
893
  setGlobalHeaders({ Referer: url });
773
- const downloader = new Downloader(filelist);
894
+ const downloader = new Downloader(filelist, minSizeBytes, maxSizeBytes);
774
895
  useInkStore.getState().setDownloader(downloader);
775
896
  await downloader.downloadFiles();
776
- process2.kill(process2.pid, "SIGINT");
897
+ if (removeDupilicates) {
898
+ await filelist.removeDuplicatesByHash();
899
+ }
777
900
  }
778
- run();
901
+ (async () => {
902
+ try {
903
+ await run();
904
+ process2.exit(0);
905
+ } catch (err) {
906
+ console.error("Fatal error:", err);
907
+ process2.exit(1);
908
+ }
909
+ })();
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "coomer-downloader",
3
- "version": "3.3.2",
3
+ "version": "3.4.0",
4
4
  "author": "smartacephal",
5
5
  "license": "MIT",
6
6
  "description": "Downloads images/videos from Coomer/Kemono, Bunkr, GoFile, Reddit-NSFW user posts",
@@ -58,6 +58,7 @@
58
58
  "eslint-config-xo-react": "^0.27.0",
59
59
  "eslint-plugin-react": "^7.32.2",
60
60
  "eslint-plugin-react-hooks": "^4.6.0",
61
+ "pino": "^10.1.0",
61
62
  "tsx": "^4.20.6"
62
63
  }
63
64
  }
@@ -1,5 +1,5 @@
1
1
  import { CoomerFile, CoomerFileList } from '../services/file';
2
- import { isImage } from '../utils/filters';
2
+ import { isImage } from '../utils/mediatypes';
3
3
  import { fetchWithGlobalHeader, setGlobalHeaders } from '../utils/requests';
4
4
 
5
5
  type CoomerAPIUser = { domain: string; service: string; id: string; name?: string };
@@ -34,7 +34,10 @@ async function getUserProfileData(user: CoomerAPIUser): Promise<CoomerAPIUserDat
34
34
  return result as CoomerAPIUserData;
35
35
  }
36
36
 
37
- async function getUserPostsAPI(user: CoomerAPIUser, offset: number): Promise<CoomerAPIPost[]> {
37
+ async function getUserPostsAPI(
38
+ user: CoomerAPIUser,
39
+ offset: number,
40
+ ): Promise<CoomerAPIPost[]> {
38
41
  const url = `${user.domain}/api/v1/${user.service}/user/${user.id}/posts?o=${offset}`;
39
42
  const posts = await fetchWithGlobalHeader(url).then((r) => r.json());
40
43
  return posts as CoomerAPIPost[];
@@ -44,10 +47,10 @@ export async function getUserFiles(user: CoomerAPIUser): Promise<CoomerFileList>
44
47
  const userPosts = [];
45
48
 
46
49
  const offset = 50;
47
- for (let i = 0; i < 1000; i++) {
50
+ for (let i = 0; i < 10_000; i++) {
48
51
  const posts = await getUserPostsAPI(user, i * offset);
49
52
  userPosts.push(...posts);
50
- if (posts.length < 50) break;
53
+ if (posts.length < offset) break;
51
54
  }
52
55
 
53
56
  const filelist = new CoomerFileList();
@@ -63,7 +66,7 @@ export async function getUserFiles(user: CoomerAPIUser): Promise<CoomerFileList>
63
66
  .map((f, i) => {
64
67
  const ext = f.name.split('.').pop();
65
68
  const name = `${datentitle} ${i + 1}.${ext}`;
66
- const url = `${user.domain}/${f.path}`;
69
+ const url = getUrl(f, user);
67
70
  return CoomerFile.from({ name, url, content });
68
71
  });
69
72
 
@@ -73,6 +76,19 @@ export async function getUserFiles(user: CoomerAPIUser): Promise<CoomerFileList>
73
76
  return filelist;
74
77
  }
75
78
 
79
+ function getUrl(f: CoomerAPIFile, user: CoomerAPIUser) {
80
+ // Normalize f.path to avoid protocol-relative or multiple-leading-slash paths
81
+ const normalizedPath = f.path.replace(/^\/+/, '/');
82
+ let url = '';
83
+ try {
84
+ url = new URL(normalizedPath, user.domain).toString();
85
+ } catch (_) {
86
+ // Fallback: join with a single slash
87
+ url = `${user.domain}/${normalizedPath.replace(/^\//, '')}`;
88
+ }
89
+ return url;
90
+ }
91
+
76
92
  async function parseUser(url: string): Promise<CoomerAPIUser> {
77
93
  const [_, domain, service, id] = url.match(
78
94
  /(https:\/\/\w+\.\w+)\/(\w+)\/user\/([\w|.|-]+)/,
@@ -8,7 +8,6 @@ async function getUserPage(user: string, offset: number) {
8
8
  }
9
9
 
10
10
  async function getUserPosts(user: string): Promise<string[]> {
11
- console.log('Fetching user posts...');
12
11
  const posts = [];
13
12
  for (let i = 1; i < 100000; i++) {
14
13
  const page = await getUserPage(user, i);
@@ -26,7 +25,6 @@ async function getUserPosts(user: string): Promise<string[]> {
26
25
  }
27
26
 
28
27
  async function getPostsData(posts: string[]): Promise<CoomerFileList> {
29
- console.log('Fetching posts data...');
30
28
  const filelist = new CoomerFileList();
31
29
  for (const post of posts) {
32
30
  const page = await fetch(post).then((r) => r.text());
@@ -53,7 +51,9 @@ async function getPostsData(posts: string[]): Promise<CoomerFileList> {
53
51
 
54
52
  export async function getRedditData(url: string): Promise<CoomerFileList> {
55
53
  const user = url.match(/u\/(\w+)/)?.[1] as string;
54
+ console.log('Fetching user posts...');
56
55
  const posts = await getUserPosts(user);
56
+ console.log('Fetching posts data...');
57
57
  const filelist = await getPostsData(posts);
58
58
  filelist.dirName = `${user}-reddit`;
59
59
  return filelist;
@@ -1,16 +1,7 @@
1
1
  import yargs from 'yargs';
2
2
  import { hideBin } from 'yargs/helpers';
3
3
 
4
- type ArgumentHandlerResult = {
5
- url: string;
6
- dir: string;
7
- media: string;
8
- include: string;
9
- exclude: string;
10
- skip: number;
11
- };
12
-
13
- export function argumentHander(): ArgumentHandlerResult {
4
+ export function argumentHander() {
14
5
  return yargs(hideBin(process.argv))
15
6
  .option('url', {
16
7
  alias: 'u',
@@ -26,8 +17,7 @@ export function argumentHander(): ArgumentHandlerResult {
26
17
  })
27
18
  .option('media', {
28
19
  type: 'string',
29
- choices: ['video', 'image', 'all'],
30
- default: 'all',
20
+ choices: ['video', 'image'],
31
21
  description:
32
22
  "The type of media to download: 'video', 'image', or 'all'. 'all' is the default.",
33
23
  })
@@ -41,11 +31,26 @@ export function argumentHander(): ArgumentHandlerResult {
41
31
  default: '',
42
32
  description: 'Filter file names by a comma-separated list of keywords to exclude',
43
33
  })
34
+ .option('min-size', {
35
+ type: 'string',
36
+ default: '',
37
+ description: 'Minimum file size to download. Example: "1mb" or "500kb"',
38
+ })
39
+ .option('max-size', {
40
+ type: 'string',
41
+ default: '',
42
+ description: 'Maximum file size to download. Example: "1mb" or "500kb"',
43
+ })
44
44
  .option('skip', {
45
45
  type: 'number',
46
46
  default: 0,
47
47
  description: 'Skips the first N files in the download queue',
48
48
  })
49
+ .option('remove-dupilicates', {
50
+ type: 'boolean',
51
+ default: true,
52
+ description: 'removes duplicates by url and file hash',
53
+ })
49
54
  .help()
50
55
  .alias('help', 'h')
51
56
  .parseSync();
@@ -25,7 +25,7 @@ export function App() {
25
25
  <Box>
26
26
  <FileListStateBox filelist={filelist} />
27
27
  </Box>
28
- <Box flexBasis={29}>
28
+ <Box flexBasis={30}>
29
29
  <KeyboardControlsInfo />
30
30
  </Box>
31
31
  </Box>
@@ -2,7 +2,7 @@ import { Box } from 'ink';
2
2
  import Image, { TerminalInfoProvider } from 'ink-picture';
3
3
  import React from 'react';
4
4
  import type { CoomerFile } from '../../../services/file';
5
- import { isImage } from '../../../utils/filters';
5
+ import { isImage } from '../../../utils/mediatypes';
6
6
  import { useInkStore } from '../store';
7
7
 
8
8
  interface PreviewProps {
package/src/index.ts CHANGED
@@ -6,27 +6,50 @@ import { argumentHander } from './cli/args-handler';
6
6
  import { createReactInk } from './cli/ui';
7
7
  import { useInkStore } from './cli/ui/store';
8
8
  import { Downloader } from './services/downloader';
9
+ import { parseSizeValue } from './utils/filters';
9
10
  import { setGlobalHeaders } from './utils/requests';
10
11
 
11
12
  async function run() {
12
13
  createReactInk();
13
14
 
14
- const { url, dir, media, include, exclude, skip } = argumentHander();
15
+ const { url, dir, media, include, exclude, minSize, maxSize, skip, removeDupilicates } =
16
+ argumentHander();
15
17
 
16
18
  const filelist = await apiHandler(url);
17
19
 
18
- filelist.setDirPath(dir).skip(skip).filterByText(include, exclude).filterByMediaType(media);
20
+ filelist
21
+ .setDirPath(dir)
22
+ .skip(skip)
23
+ .filterByText(include, exclude)
24
+ .filterByMediaType(media);
25
+
26
+ if (removeDupilicates) {
27
+ filelist.removeURLDuplicates();
28
+ }
29
+
30
+ const minSizeBytes = minSize ? parseSizeValue(minSize) : undefined;
31
+ const maxSizeBytes = maxSize ? parseSizeValue(maxSize) : undefined;
19
32
 
20
33
  await filelist.calculateFileSizes();
21
34
 
22
35
  setGlobalHeaders({ Referer: url });
23
36
 
24
- const downloader = new Downloader(filelist);
37
+ const downloader = new Downloader(filelist, minSizeBytes, maxSizeBytes);
25
38
  useInkStore.getState().setDownloader(downloader);
26
39
 
27
40
  await downloader.downloadFiles();
28
41
 
29
- process.kill(process.pid, 'SIGINT');
42
+ if (removeDupilicates) {
43
+ await filelist.removeDuplicatesByHash();
44
+ }
30
45
  }
31
46
 
32
- run();
47
+ (async () => {
48
+ try {
49
+ await run();
50
+ process.exit(0);
51
+ } catch (err) {
52
+ console.error('Fatal error:', err);
53
+ process.exit(1);
54
+ }
55
+ })();
@@ -0,0 +1,15 @@
1
+ import pino from 'pino';
2
+
3
+ // tail -f debug.log | jq -C .
4
+ const logger = pino(
5
+ {
6
+ level: 'debug',
7
+ },
8
+ pino.destination({
9
+ dest: './debug.log',
10
+ append: false,
11
+ sync: true,
12
+ }),
13
+ );
14
+
15
+ export default logger;
@@ -3,8 +3,8 @@ import { Readable, Transform } from 'node:stream';
3
3
  import { pipeline } from 'node:stream/promises';
4
4
  import { Subject } from 'rxjs';
5
5
  import { tryFixCoomerUrl } from '../api/coomer-api';
6
- import type { DownloaderSubject } from '../types';
7
- import { getFileSize, mkdir } from '../utils/io';
6
+ import type { AbortControllerSubject, DownloaderSubject } from '../types';
7
+ import { deleteFile, getFileSize, mkdir } from '../utils/io';
8
8
  import { sleep } from '../utils/promise';
9
9
  import { fetchByteRange } from '../utils/requests';
10
10
  import { Timer } from '../utils/timer';
@@ -14,7 +14,7 @@ export class Downloader {
14
14
  public subject = new Subject<DownloaderSubject>();
15
15
 
16
16
  private abortController = new AbortController();
17
- public abortControllerSubject = new Subject<string>();
17
+ public abortControllerSubject = new Subject<AbortControllerSubject>();
18
18
 
19
19
  setAbortControllerListener() {
20
20
  this.abortControllerSubject.subscribe((type) => {
@@ -25,6 +25,8 @@ export class Downloader {
25
25
 
26
26
  constructor(
27
27
  public filelist: CoomerFileList,
28
+ public minSize?: number,
29
+ public maxSize?: number,
28
30
  public chunkTimeout = 30_000,
29
31
  public chunkFetchRetries = 5,
30
32
  public fetchRetries = 7,
@@ -40,8 +42,10 @@ export class Downloader {
40
42
  ): Promise<void> {
41
43
  const signal = this.abortController.signal;
42
44
  const subject = this.subject;
43
- const { timer } = Timer.withAbortController(this.chunkTimeout, this.abortControllerSubject);
44
- let i: NodeJS.Timeout | undefined;
45
+ const { timer } = Timer.withAbortController(
46
+ this.chunkTimeout,
47
+ this.abortControllerSubject,
48
+ );
45
49
 
46
50
  try {
47
51
  const fileStream = fs.createWriteStream(file.filepath as string, { flags: 'a' });
@@ -74,7 +78,6 @@ export class Downloader {
74
78
  } finally {
75
79
  subject.next({ type: 'CHUNK_DOWNLOADING_END' });
76
80
  timer.stop();
77
- clearInterval(i);
78
81
  }
79
82
  }
80
83
 
@@ -82,6 +85,20 @@ export class Downloader {
82
85
  this.abortControllerSubject.next('FILE_SKIP');
83
86
  }
84
87
 
88
+ private filterFileSize(file: CoomerFile) {
89
+ if (!file.size) return;
90
+ if (
91
+ (this.minSize && file.size < this.minSize) ||
92
+ (this.maxSize && file.size > this.maxSize)
93
+ ) {
94
+ try {
95
+ deleteFile(file.filepath);
96
+ } catch {}
97
+ this.skip();
98
+ return;
99
+ }
100
+ }
101
+
85
102
  async downloadFile(file: CoomerFile, retries = this.fetchRetries): Promise<void> {
86
103
  const signal = this.abortController.signal;
87
104
  try {
@@ -100,6 +117,8 @@ export class Downloader {
100
117
  const restFileSize = parseInt(contentLength);
101
118
  file.size = restFileSize + file.downloaded;
102
119
 
120
+ this.filterFileSize(file);
121
+
103
122
  if (file.size > file.downloaded && response.body) {
104
123
  const stream = Readable.fromWeb(response.body);
105
124
  stream.setMaxListeners(20);
@@ -124,6 +143,7 @@ export class Downloader {
124
143
  mkdir(this.filelist.dirPath as string);
125
144
 
126
145
  this.subject.next({ type: 'FILES_DOWNLOADING_START' });
146
+
127
147
  for (const file of this.filelist.files) {
128
148
  file.active = true;
129
149
 
@@ -1,25 +1,20 @@
1
1
  import os from 'node:os';
2
2
  import path from 'node:path';
3
+ import logger from '../logger';
3
4
  import type { MediaType } from '../types';
4
- import { filterString, testMediaType } from '../utils/filters';
5
- import { getFileSize } from '../utils/io';
6
-
7
- interface ICoomerFile {
8
- name: string;
9
- url: string;
10
- filepath?: string;
11
- size?: number;
12
- downloaded?: number;
13
- content?: string;
14
- }
5
+ import { collectUniquesAndDuplicatesBy, removeDuplicatesBy } from '../utils/duplicates';
6
+ import { filterString } from '../utils/filters';
7
+ import { deleteFile, getFileHash, getFileSize, sanitizeFilename } from '../utils/io';
8
+ import { testMediaType } from '../utils/mediatypes';
15
9
 
16
10
  export class CoomerFile {
17
11
  public active = false;
12
+ public hash?: string;
18
13
 
19
14
  constructor(
20
15
  public name: string,
21
16
  public url: string,
22
- public filepath?: string,
17
+ public filepath = '',
23
18
  public size?: number,
24
19
  public downloaded = 0,
25
20
  public content?: string,
@@ -35,7 +30,7 @@ export class CoomerFile {
35
30
  return text;
36
31
  }
37
32
 
38
- public static from(f: ICoomerFile) {
33
+ public static from(f: Pick<CoomerFile, 'name' | 'url'> & Partial<CoomerFile>) {
39
34
  return new CoomerFile(f.name, f.url, f.filepath, f.size, f.downloaded, f.content);
40
35
  }
41
36
  }
@@ -56,7 +51,8 @@ export class CoomerFileList {
56
51
  }
57
52
 
58
53
  this.files.forEach((file) => {
59
- file.filepath = path.join(this.dirPath as string, file.name);
54
+ const safeName = sanitizeFilename(file.name) || file.name;
55
+ file.filepath = path.join(this.dirPath as string, safeName);
60
56
  });
61
57
 
62
58
  return this;
@@ -83,6 +79,7 @@ export class CoomerFileList {
83
79
  for (const file of this.files) {
84
80
  await file.getDownloadedSize();
85
81
  }
82
+ return this;
86
83
  }
87
84
 
88
85
  public getActiveFiles() {
@@ -92,4 +89,25 @@ export class CoomerFileList {
92
89
  public getDownloaded() {
93
90
  return this.files.filter((f) => f.size && f.size <= f.downloaded);
94
91
  }
92
+
93
+ public async removeDuplicatesByHash() {
94
+ for (const file of this.files) {
95
+ file.hash = await getFileHash(file.filepath);
96
+ }
97
+
98
+ const { duplicates } = collectUniquesAndDuplicatesBy(this.files, 'hash');
99
+
100
+ console.log({ duplicates });
101
+
102
+ logger.debug(`duplicates: ${JSON.stringify(duplicates)}`);
103
+
104
+ duplicates.forEach((f) => {
105
+ deleteFile(f.filepath);
106
+ });
107
+ }
108
+
109
+ public removeURLDuplicates() {
110
+ this.files = removeDuplicatesBy(this.files, 'url');
111
+ return this;
112
+ }
95
113
  }
@@ -1,15 +1,16 @@
1
- export type MediaType = 'video' | 'image' | 'all';
1
+ export type MediaType = 'video' | 'image';
2
2
 
3
3
  export type DownloaderSubjectSignal =
4
4
  | 'FILES_DOWNLOADING_START'
5
5
  | 'FILES_DOWNLOADING_END'
6
6
  | 'FILE_DOWNLOADING_START'
7
7
  | 'FILE_DOWNLOADING_END'
8
- | 'FILE_SKIP'
9
8
  | 'CHUNK_DOWNLOADING_START'
10
9
  | 'CHUNK_DOWNLOADING_UPDATE'
11
10
  | 'CHUNK_DOWNLOADING_END';
12
11
 
12
+ export type AbortControllerSubject = 'FILE_SKIP' | 'TIMEOUT';
13
+
13
14
  export type DownloaderSubject = {
14
15
  type: DownloaderSubjectSignal;
15
16
  };
@@ -0,0 +1,23 @@
1
+ export function collectUniquesAndDuplicatesBy<T extends {}, K extends keyof T>(
2
+ xs: T[],
3
+ k: K,
4
+ ): { uniques: T[]; duplicates: T[] } {
5
+ const seen = new Set<T[K]>();
6
+
7
+ return xs.reduce(
8
+ (acc, item) => {
9
+ if (seen.has(item[k])) {
10
+ acc.duplicates.push(item);
11
+ } else {
12
+ seen.add(item[k]);
13
+ acc.uniques.push(item);
14
+ }
15
+ return acc;
16
+ },
17
+ { uniques: [] as T[], duplicates: [] as T[] },
18
+ );
19
+ }
20
+
21
+ export function removeDuplicatesBy<T extends {}, K extends keyof T>(xs: T[], k: K) {
22
+ return [...new Map(xs.map((x) => [x[k], x])).values()];
23
+ }
@@ -1,17 +1,3 @@
1
- import type { MediaType } from '../types';
2
-
3
- export function isImage(name: string) {
4
- return /\.(jpg|jpeg|png|gif|bmp|tiff|webp|avif)$/i.test(name);
5
- }
6
-
7
- export function isVideo(name: string) {
8
- return /\.(mp4|m4v|avi|mov|mkv|webm|flv|wmv|mpeg|mpg|3gp)$/i.test(name);
9
- }
10
-
11
- export function testMediaType(name: string, type: MediaType) {
12
- return type === 'all' ? true : type === 'image' ? isImage(name) : isVideo(name);
13
- }
14
-
15
1
  function includesAllWords(str: string, words: string[]) {
16
2
  if (!words.length) return true;
17
3
  return words.every((w) => str.includes(w));
@@ -30,5 +16,19 @@ function parseQuery(query: string) {
30
16
  }
31
17
 
32
18
  export function filterString(text: string, include: string, exclude: string): boolean {
33
- return includesAllWords(text, parseQuery(include)) && includesNoWords(text, parseQuery(exclude));
19
+ return (
20
+ includesAllWords(text, parseQuery(include)) &&
21
+ includesNoWords(text, parseQuery(exclude))
22
+ );
23
+ }
24
+
25
+ export function parseSizeValue(s: string) {
26
+ if (!s) return NaN;
27
+ const m = s.match(/^([0-9]+(?:\.[0-9]+)?)(b|kb|mb|gb)?$/i);
28
+ if (!m) return NaN;
29
+ const val = parseFloat(m[1]);
30
+ const unit = (m[2] || 'b').toLowerCase();
31
+ const mult =
32
+ unit === 'kb' ? 1024 : unit === 'mb' ? 1024 ** 2 : unit === 'gb' ? 1024 ** 3 : 1;
33
+ return Math.floor(val * mult);
34
34
  }
package/src/utils/io.ts CHANGED
@@ -1,4 +1,7 @@
1
+ import { createHash } from 'node:crypto';
1
2
  import fs from 'node:fs';
3
+ import { access, constants, unlink } from 'node:fs/promises';
4
+ import { pipeline } from 'node:stream/promises';
2
5
 
3
6
  export async function getFileSize(filepath: string) {
4
7
  let size = 0;
@@ -8,8 +11,30 @@ export async function getFileSize(filepath: string) {
8
11
  return size;
9
12
  }
10
13
 
14
+ export async function getFileHash(filepath: string) {
15
+ const hash = createHash('sha256');
16
+ const filestream = fs.createReadStream(filepath);
17
+ await pipeline(filestream, hash);
18
+ return hash.digest('hex');
19
+ }
20
+
11
21
  export function mkdir(filepath: string) {
12
22
  if (!fs.existsSync(filepath)) {
13
23
  fs.mkdirSync(filepath, { recursive: true });
14
24
  }
15
25
  }
26
+
27
+ export async function deleteFile(path: string) {
28
+ await access(path, constants.F_OK);
29
+ await unlink(path);
30
+ }
31
+
32
+ export function sanitizeFilename(name: string) {
33
+ if (!name) return name;
34
+
35
+ return name
36
+ .replace(/[<>:"/\\|?*\x00-\x1F]/g, '-') // Newlines (\r \n) are caught here
37
+ .replace(/\s+/g, ' ') // Turn tabs/multiple spaces into one space
38
+ .trim()
39
+ .replace(/[.]+$/, ''); // Remove trailing dots
40
+ }
@@ -0,0 +1,13 @@
1
+ import type { MediaType } from '../types';
2
+
3
+ export function isImage(name: string) {
4
+ return /\.(jpg|jpeg|png|gif|bmp|tiff|webp|avif)$/i.test(name);
5
+ }
6
+
7
+ export function isVideo(name: string) {
8
+ return /\.(mp4|m4v|avi|mov|mkv|webm|flv|wmv|mpeg|mpg|3gp)$/i.test(name);
9
+ }
10
+
11
+ export function testMediaType(name: string, type: MediaType) {
12
+ return type === 'image' ? isImage(name) : isVideo(name);
13
+ }
@@ -1,4 +1,5 @@
1
1
  import type { Subject } from 'rxjs';
2
+ import type { AbortControllerSubject } from '../types';
2
3
 
3
4
  export class Timer {
4
5
  private timer: NodeJS.Timeout | undefined;
@@ -34,8 +35,8 @@ export class Timer {
34
35
 
35
36
  static withAbortController(
36
37
  timeout: number,
37
- abortControllerSubject: Subject<string>,
38
- message: string = 'Timeout',
38
+ abortControllerSubject: Subject<AbortControllerSubject>,
39
+ message: AbortControllerSubject = 'TIMEOUT',
39
40
  ) {
40
41
  const callback = () => {
41
42
  abortControllerSubject.next(message);