@nitpicker/crawler 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. package/CHANGELOG.md +8 -0
  2. package/LICENSE +191 -0
  3. package/README.md +13 -0
  4. package/lib/archive/archive-accessor.d.ts +107 -0
  5. package/lib/archive/archive-accessor.js +264 -0
  6. package/lib/archive/archive.d.ts +174 -0
  7. package/lib/archive/archive.js +331 -0
  8. package/lib/archive/database.d.ts +207 -0
  9. package/lib/archive/database.js +972 -0
  10. package/lib/archive/debug.d.ts +8 -0
  11. package/lib/archive/debug.js +9 -0
  12. package/lib/archive/filesystem/append-text.d.ts +9 -0
  13. package/lib/archive/filesystem/append-text.js +14 -0
  14. package/lib/archive/filesystem/copy-dir-sync.d.ts +6 -0
  15. package/lib/archive/filesystem/copy-dir-sync.js +9 -0
  16. package/lib/archive/filesystem/copy-dir.d.ts +7 -0
  17. package/lib/archive/filesystem/copy-dir.js +13 -0
  18. package/lib/archive/filesystem/exists.d.ts +6 -0
  19. package/lib/archive/filesystem/exists.js +9 -0
  20. package/lib/archive/filesystem/get-file-list.d.ts +8 -0
  21. package/lib/archive/filesystem/get-file-list.js +12 -0
  22. package/lib/archive/filesystem/index.d.ts +17 -0
  23. package/lib/archive/filesystem/index.js +17 -0
  24. package/lib/archive/filesystem/is-dir.d.ts +6 -0
  25. package/lib/archive/filesystem/is-dir.js +10 -0
  26. package/lib/archive/filesystem/mkdir.d.ts +8 -0
  27. package/lib/archive/filesystem/mkdir.js +15 -0
  28. package/lib/archive/filesystem/output-json.d.ts +9 -0
  29. package/lib/archive/filesystem/output-json.js +14 -0
  30. package/lib/archive/filesystem/output-text.d.ts +11 -0
  31. package/lib/archive/filesystem/output-text.js +32 -0
  32. package/lib/archive/filesystem/read-json.d.ts +7 -0
  33. package/lib/archive/filesystem/read-json.js +11 -0
  34. package/lib/archive/filesystem/read-text.d.ts +6 -0
  35. package/lib/archive/filesystem/read-text.js +10 -0
  36. package/lib/archive/filesystem/readline.d.ts +11 -0
  37. package/lib/archive/filesystem/readline.js +26 -0
  38. package/lib/archive/filesystem/remove.d.ts +5 -0
  39. package/lib/archive/filesystem/remove.js +10 -0
  40. package/lib/archive/filesystem/rename.d.ts +11 -0
  41. package/lib/archive/filesystem/rename.js +18 -0
  42. package/lib/archive/filesystem/tar.d.ts +11 -0
  43. package/lib/archive/filesystem/tar.js +22 -0
  44. package/lib/archive/filesystem/untar.d.ts +20 -0
  45. package/lib/archive/filesystem/untar.js +24 -0
  46. package/lib/archive/filesystem/utils.d.ts +109 -0
  47. package/lib/archive/filesystem/utils.js +185 -0
  48. package/lib/archive/filesystem/zip.d.ts +29 -0
  49. package/lib/archive/filesystem/zip.js +53 -0
  50. package/lib/archive/index.d.ts +6 -0
  51. package/lib/archive/index.js +11 -0
  52. package/lib/archive/page.d.ts +263 -0
  53. package/lib/archive/page.js +316 -0
  54. package/lib/archive/resource.d.ts +46 -0
  55. package/lib/archive/resource.js +62 -0
  56. package/lib/archive/safe-path.d.ts +9 -0
  57. package/lib/archive/safe-path.js +17 -0
  58. package/lib/archive/types.d.ts +210 -0
  59. package/lib/archive/types.js +1 -0
  60. package/lib/crawler/clear-destination-cache.d.ts +5 -0
  61. package/lib/crawler/clear-destination-cache.js +8 -0
  62. package/lib/crawler/crawler.d.ts +73 -0
  63. package/lib/crawler/crawler.js +748 -0
  64. package/lib/crawler/decompose-url.d.ts +25 -0
  65. package/lib/crawler/decompose-url.js +71 -0
  66. package/lib/crawler/destination-cache.d.ts +7 -0
  67. package/lib/crawler/destination-cache.js +6 -0
  68. package/lib/crawler/detect-pagination-pattern.d.ts +16 -0
  69. package/lib/crawler/detect-pagination-pattern.js +61 -0
  70. package/lib/crawler/fetch-destination.d.ts +38 -0
  71. package/lib/crawler/fetch-destination.js +208 -0
  72. package/lib/crawler/fetch-robots-txt.d.ts +42 -0
  73. package/lib/crawler/fetch-robots-txt.js +44 -0
  74. package/lib/crawler/find-best-matching-scope.d.ts +12 -0
  75. package/lib/crawler/find-best-matching-scope.js +46 -0
  76. package/lib/crawler/generate-predicted-urls.d.ts +13 -0
  77. package/lib/crawler/generate-predicted-urls.js +27 -0
  78. package/lib/crawler/handle-ignore-and-skip.d.ts +16 -0
  79. package/lib/crawler/handle-ignore-and-skip.js +19 -0
  80. package/lib/crawler/handle-resource-response.d.ts +13 -0
  81. package/lib/crawler/handle-resource-response.js +16 -0
  82. package/lib/crawler/handle-scrape-end.d.ts +24 -0
  83. package/lib/crawler/handle-scrape-end.js +82 -0
  84. package/lib/crawler/handle-scrape-error.d.ts +37 -0
  85. package/lib/crawler/handle-scrape-error.js +38 -0
  86. package/lib/crawler/index.d.ts +2 -0
  87. package/lib/crawler/index.js +2 -0
  88. package/lib/crawler/inject-scope-auth.d.ts +11 -0
  89. package/lib/crawler/inject-scope-auth.js +21 -0
  90. package/lib/crawler/is-external-url.d.ts +11 -0
  91. package/lib/crawler/is-external-url.js +12 -0
  92. package/lib/crawler/is-in-any-lower-layer.d.ts +13 -0
  93. package/lib/crawler/is-in-any-lower-layer.js +15 -0
  94. package/lib/crawler/link-list.d.ts +112 -0
  95. package/lib/crawler/link-list.js +248 -0
  96. package/lib/crawler/link-to-page-data.d.ts +14 -0
  97. package/lib/crawler/link-to-page-data.js +32 -0
  98. package/lib/crawler/net-timeout-error.d.ts +9 -0
  99. package/lib/crawler/net-timeout-error.js +11 -0
  100. package/lib/crawler/network.d.ts +30 -0
  101. package/lib/crawler/network.js +226 -0
  102. package/lib/crawler/protocol-agnostic-key.d.ts +9 -0
  103. package/lib/crawler/protocol-agnostic-key.js +11 -0
  104. package/lib/crawler/reconstruct-url.d.ts +10 -0
  105. package/lib/crawler/reconstruct-url.js +28 -0
  106. package/lib/crawler/result-handler.d.ts +118 -0
  107. package/lib/crawler/result-handler.js +153 -0
  108. package/lib/crawler/robots-checker.d.ts +26 -0
  109. package/lib/crawler/robots-checker.js +62 -0
  110. package/lib/crawler/should-discard-predicted.d.ts +14 -0
  111. package/lib/crawler/should-discard-predicted.js +31 -0
  112. package/lib/crawler/should-skip-url.d.ts +23 -0
  113. package/lib/crawler/should-skip-url.js +15 -0
  114. package/lib/crawler/speculative-pagination.d.ts +52 -0
  115. package/lib/crawler/speculative-pagination.js +215 -0
  116. package/lib/crawler/types.d.ts +119 -0
  117. package/lib/crawler/types.js +1 -0
  118. package/lib/crawler/url-filter.d.ts +56 -0
  119. package/lib/crawler/url-filter.js +110 -0
  120. package/lib/crawler-orchestrator.d.ts +142 -0
  121. package/lib/crawler-orchestrator.js +309 -0
  122. package/lib/debug.d.ts +8 -0
  123. package/lib/debug.js +9 -0
  124. package/lib/index.d.ts +16 -0
  125. package/lib/index.js +18 -0
  126. package/lib/qzilla.d.ts +136 -0
  127. package/lib/qzilla.js +292 -0
  128. package/lib/types.d.ts +27 -0
  129. package/lib/types.js +1 -0
  130. package/lib/utils/array/each-splitted.d.ts +10 -0
  131. package/lib/utils/array/each-splitted.js +14 -0
  132. package/lib/utils/array/index.d.ts +1 -0
  133. package/lib/utils/array/index.js +1 -0
  134. package/lib/utils/async/index.d.ts +1 -0
  135. package/lib/utils/async/index.js +1 -0
  136. package/lib/utils/debug.d.ts +5 -0
  137. package/lib/utils/debug.js +5 -0
  138. package/lib/utils/error/dom-evaluation-error.d.ts +7 -0
  139. package/lib/utils/error/dom-evaluation-error.js +7 -0
  140. package/lib/utils/error/error-emitter.d.ts +18 -0
  141. package/lib/utils/error/error-emitter.js +29 -0
  142. package/lib/utils/error/index.d.ts +3 -0
  143. package/lib/utils/error/index.js +2 -0
  144. package/lib/utils/event-emitter/index.d.ts +6 -0
  145. package/lib/utils/event-emitter/index.js +6 -0
  146. package/lib/utils/index.d.ts +5 -0
  147. package/lib/utils/index.js +5 -0
  148. package/lib/utils/network/index.d.ts +1 -0
  149. package/lib/utils/network/index.js +1 -0
  150. package/lib/utils/object/clean-object.d.ts +8 -0
  151. package/lib/utils/object/clean-object.js +13 -0
  152. package/lib/utils/object/index.d.ts +1 -0
  153. package/lib/utils/object/index.js +1 -0
  154. package/lib/utils/path/index.d.ts +1 -0
  155. package/lib/utils/path/index.js +1 -0
  156. package/lib/utils/path/safe-filepath.d.ts +7 -0
  157. package/lib/utils/path/safe-filepath.js +12 -0
  158. package/lib/utils/regexp/index.d.ts +1 -0
  159. package/lib/utils/regexp/index.js +1 -0
  160. package/lib/utils/retryable/index.d.ts +2 -0
  161. package/lib/utils/retryable/index.js +1 -0
  162. package/lib/utils/sort/index.d.ts +14 -0
  163. package/lib/utils/sort/index.js +61 -0
  164. package/lib/utils/sort/remove-matches.d.ts +9 -0
  165. package/lib/utils/sort/remove-matches.js +23 -0
  166. package/lib/utils/types/index.d.ts +1 -0
  167. package/lib/utils/types/index.js +1 -0
  168. package/lib/utils/types/types.d.ts +46 -0
  169. package/lib/utils/types/types.js +1 -0
  170. package/lib/utils/url/index.d.ts +5 -0
  171. package/lib/utils/url/index.js +5 -0
  172. package/lib/utils/url/is-lower-layer.d.ts +15 -0
  173. package/lib/utils/url/is-lower-layer.js +55 -0
  174. package/lib/utils/url/parse-url.d.ts +11 -0
  175. package/lib/utils/url/parse-url.js +20 -0
  176. package/lib/utils/url/path-match.d.ts +11 -0
  177. package/lib/utils/url/path-match.js +18 -0
  178. package/lib/utils/url/sort-url.d.ts +10 -0
  179. package/lib/utils/url/sort-url.js +24 -0
  180. package/lib/utils/url/url-partial-match.d.ts +11 -0
  181. package/lib/utils/url/url-partial-match.js +32 -0
  182. package/package.json +49 -0
  183. package/src/archive/__mock__/.gitignore +3 -0
  184. package/src/archive/__mock__/mock.sqlite +0 -0
  185. package/src/archive/archive-accessor.ts +337 -0
  186. package/src/archive/archive.ts +408 -0
  187. package/src/archive/database.spec.ts +469 -0
  188. package/src/archive/database.ts +1059 -0
  189. package/src/archive/debug.ts +10 -0
  190. package/src/archive/filesystem/append-text.spec.ts +26 -0
  191. package/src/archive/filesystem/append-text.ts +16 -0
  192. package/src/archive/filesystem/copy-dir-sync.spec.ts +27 -0
  193. package/src/archive/filesystem/copy-dir-sync.ts +10 -0
  194. package/src/archive/filesystem/copy-dir.spec.ts +33 -0
  195. package/src/archive/filesystem/copy-dir.ts +14 -0
  196. package/src/archive/filesystem/exists.spec.ts +33 -0
  197. package/src/archive/filesystem/exists.ts +10 -0
  198. package/src/archive/filesystem/get-file-list.spec.ts +37 -0
  199. package/src/archive/filesystem/get-file-list.ts +13 -0
  200. package/src/archive/filesystem/index.ts +17 -0
  201. package/src/archive/filesystem/is-dir.spec.ts +29 -0
  202. package/src/archive/filesystem/is-dir.ts +11 -0
  203. package/src/archive/filesystem/mkdir.spec.ts +37 -0
  204. package/src/archive/filesystem/mkdir.ts +16 -0
  205. package/src/archive/filesystem/output-json.spec.ts +34 -0
  206. package/src/archive/filesystem/output-json.ts +16 -0
  207. package/src/archive/filesystem/output-text.spec.ts +31 -0
  208. package/src/archive/filesystem/output-text.ts +35 -0
  209. package/src/archive/filesystem/read-json.spec.ts +26 -0
  210. package/src/archive/filesystem/read-json.ts +12 -0
  211. package/src/archive/filesystem/read-text.spec.ts +25 -0
  212. package/src/archive/filesystem/read-text.ts +11 -0
  213. package/src/archive/filesystem/readline.spec.ts +29 -0
  214. package/src/archive/filesystem/readline.ts +30 -0
  215. package/src/archive/filesystem/remove.spec.ts +34 -0
  216. package/src/archive/filesystem/remove.ts +11 -0
  217. package/src/archive/filesystem/rename.spec.ts +46 -0
  218. package/src/archive/filesystem/rename.ts +21 -0
  219. package/src/archive/filesystem/tar.spec.ts +33 -0
  220. package/src/archive/filesystem/tar.ts +27 -0
  221. package/src/archive/filesystem/untar.spec.ts +34 -0
  222. package/src/archive/filesystem/untar.ts +36 -0
  223. package/src/archive/index.ts +13 -0
  224. package/src/archive/page.spec.ts +368 -0
  225. package/src/archive/page.ts +420 -0
  226. package/src/archive/resource.spec.ts +101 -0
  227. package/src/archive/resource.ts +73 -0
  228. package/src/archive/safe-path.spec.ts +44 -0
  229. package/src/archive/safe-path.ts +18 -0
  230. package/src/archive/types.ts +227 -0
  231. package/src/crawler/clear-destination-cache.spec.ts +20 -0
  232. package/src/crawler/clear-destination-cache.ts +9 -0
  233. package/src/crawler/crawler.ts +873 -0
  234. package/src/crawler/decompose-url.spec.ts +48 -0
  235. package/src/crawler/decompose-url.ts +90 -0
  236. package/src/crawler/destination-cache.spec.ts +23 -0
  237. package/src/crawler/destination-cache.ts +8 -0
  238. package/src/crawler/detect-pagination-pattern.spec.ts +169 -0
  239. package/src/crawler/detect-pagination-pattern.ts +66 -0
  240. package/src/crawler/fetch-destination.ts +257 -0
  241. package/src/crawler/fetch-robots-txt.spec.ts +83 -0
  242. package/src/crawler/fetch-robots-txt.ts +91 -0
  243. package/src/crawler/find-best-matching-scope.spec.ts +39 -0
  244. package/src/crawler/find-best-matching-scope.ts +57 -0
  245. package/src/crawler/generate-predicted-urls.spec.ts +42 -0
  246. package/src/crawler/generate-predicted-urls.ts +34 -0
  247. package/src/crawler/handle-ignore-and-skip.spec.ts +66 -0
  248. package/src/crawler/handle-ignore-and-skip.ts +30 -0
  249. package/src/crawler/handle-resource-response.spec.ts +45 -0
  250. package/src/crawler/handle-resource-response.ts +21 -0
  251. package/src/crawler/handle-scrape-end.spec.ts +109 -0
  252. package/src/crawler/handle-scrape-end.ts +115 -0
  253. package/src/crawler/handle-scrape-error.spec.ts +105 -0
  254. package/src/crawler/handle-scrape-error.ts +58 -0
  255. package/src/crawler/index.ts +2 -0
  256. package/src/crawler/inject-scope-auth.spec.ts +36 -0
  257. package/src/crawler/inject-scope-auth.ts +27 -0
  258. package/src/crawler/is-external-url.spec.ts +31 -0
  259. package/src/crawler/is-external-url.ts +17 -0
  260. package/src/crawler/is-in-any-lower-layer.spec.ts +31 -0
  261. package/src/crawler/is-in-any-lower-layer.ts +22 -0
  262. package/src/crawler/link-list.spec.ts +355 -0
  263. package/src/crawler/link-list.ts +275 -0
  264. package/src/crawler/link-to-page-data.spec.ts +133 -0
  265. package/src/crawler/link-to-page-data.ts +34 -0
  266. package/src/crawler/net-timeout-error.spec.ts +25 -0
  267. package/src/crawler/net-timeout-error.ts +11 -0
  268. package/src/crawler/protocol-agnostic-key.spec.ts +40 -0
  269. package/src/crawler/protocol-agnostic-key.ts +11 -0
  270. package/src/crawler/reconstruct-url.spec.ts +37 -0
  271. package/src/crawler/reconstruct-url.ts +37 -0
  272. package/src/crawler/robots-checker.spec.ts +104 -0
  273. package/src/crawler/robots-checker.ts +73 -0
  274. package/src/crawler/should-discard-predicted.spec.ts +125 -0
  275. package/src/crawler/should-discard-predicted.ts +33 -0
  276. package/src/crawler/should-skip-url.spec.ts +77 -0
  277. package/src/crawler/should-skip-url.ts +37 -0
  278. package/src/crawler/types.ts +146 -0
  279. package/src/crawler-orchestrator.ts +401 -0
  280. package/src/debug.ts +10 -0
  281. package/src/index.ts +25 -0
  282. package/src/types.ts +30 -0
  283. package/src/utils/array/each-splitted.spec.ts +38 -0
  284. package/src/utils/array/each-splitted.ts +19 -0
  285. package/src/utils/array/index.ts +1 -0
  286. package/src/utils/debug.ts +6 -0
  287. package/src/utils/error/dom-evaluation-error.spec.ts +20 -0
  288. package/src/utils/error/dom-evaluation-error.ts +6 -0
  289. package/src/utils/error/error-emitter.spec.ts +78 -0
  290. package/src/utils/error/error-emitter.ts +44 -0
  291. package/src/utils/error/index.ts +3 -0
  292. package/src/utils/index.ts +5 -0
  293. package/src/utils/object/clean-object.spec.ts +24 -0
  294. package/src/utils/object/clean-object.ts +13 -0
  295. package/src/utils/object/index.ts +1 -0
  296. package/src/utils/types/index.ts +1 -0
  297. package/src/utils/types/types.ts +65 -0
  298. package/tsconfig.json +11 -0
  299. package/tsconfig.tsbuildinfo +1 -0
@@ -0,0 +1,8 @@
1
+ /** Debug logger for the archive package. Namespace: `Nitpicker:Utils:Archive`. */
2
+ export declare const log: import("debug").Debugger;
3
+ /** Debug logger for archive save operations. Namespace: `Nitpicker:Utils:Archive:Save`. */
4
+ export declare const saveLog: import("debug").Debugger;
5
+ /** Debug logger for database operations. Namespace: `Nitpicker:Utils:Archive:DB`. */
6
+ export declare const dbLog: import("debug").Debugger;
7
+ /** Debug logger for archive errors. Namespace: `Nitpicker:Utils:Archive:Error`. */
8
+ export declare const errorLog: import("debug").Debugger;
@@ -0,0 +1,9 @@
1
+ import { log as globalLog } from '../utils/debug.js';
2
+ /** Debug logger for the archive package. Namespace: `Nitpicker:Utils:Archive`. */
3
+ export const log = globalLog.extend('Archive');
4
+ /** Debug logger for archive save operations. Namespace: `Nitpicker:Utils:Archive:Save`. */
5
+ export const saveLog = log.extend('Save');
6
+ /** Debug logger for database operations. Namespace: `Nitpicker:Utils:Archive:DB`. */
7
+ export const dbLog = log.extend('DB');
8
+ /** Debug logger for archive errors. Namespace: `Nitpicker:Utils:Archive:Error`. */
9
+ export const errorLog = log.extend('Error');
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Appends text data to a file at the specified path.
3
+ *
4
+ * Creates parent directories if they do not exist.
5
+ * A newline character is prepended to the data before appending.
6
+ * @param filePath - The absolute or relative path to the file to append to.
7
+ * @param data - The text content to append to the file.
8
+ */
9
+ export declare function appendText(filePath: string, data: string): Promise<void>;
@@ -0,0 +1,14 @@
1
+ import { promises as fs } from 'node:fs';
2
+ import { mkdir } from './mkdir.js';
3
+ /**
4
+ * Appends text data to a file at the specified path.
5
+ *
6
+ * Creates parent directories if they do not exist.
7
+ * A newline character is prepended to the data before appending.
8
+ * @param filePath - The absolute or relative path to the file to append to.
9
+ * @param data - The text content to append to the file.
10
+ */
11
+ export async function appendText(filePath, data) {
12
+ mkdir(filePath);
13
+ await fs.appendFile(filePath, `\n${data}`, { encoding: 'utf8' });
14
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Synchronously copies a directory and its contents from one location to another.
3
+ * @param from - The source directory path to copy from.
4
+ * @param to - The destination directory path to copy to.
5
+ */
6
+ export declare function copyDirSync(from: string, to: string): void;
@@ -0,0 +1,9 @@
1
+ import fsx from 'fs-extra';
2
+ /**
3
+ * Synchronously copies a directory and its contents from one location to another.
4
+ * @param from - The source directory path to copy from.
5
+ * @param to - The destination directory path to copy to.
6
+ */
7
+ export function copyDirSync(from, to) {
8
+ fsx.copySync(from, to);
9
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Recursively copies a directory and its contents from one location to another.
3
+ * @param from - The source directory path to copy from.
4
+ * @param to - The destination directory path to copy to.
5
+ * @returns `true` if the copy succeeded, `false` if an error occurred.
6
+ */
7
+ export declare function copyDir(from: string, to: string): Promise<boolean>;
@@ -0,0 +1,13 @@
1
+ import fsx from 'fs-extra';
2
+ /**
3
+ * Recursively copies a directory and its contents from one location to another.
4
+ * @param from - The source directory path to copy from.
5
+ * @param to - The destination directory path to copy to.
6
+ * @returns `true` if the copy succeeded, `false` if an error occurred.
7
+ */
8
+ export async function copyDir(from, to) {
9
+ return fsx
10
+ .copy(from, to)
11
+ .then(() => true)
12
+ .catch(() => false);
13
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Checks whether a file or directory exists at the given path.
3
+ * @param filePath - The path to check for existence.
4
+ * @returns `true` if the path exists, `false` otherwise.
5
+ */
6
+ export declare function exists(filePath: string): boolean;
@@ -0,0 +1,9 @@
1
+ import { existsSync } from 'node:fs';
2
+ /**
3
+ * Checks whether a file or directory exists at the given path.
4
+ * @param filePath - The path to check for existence.
5
+ * @returns `true` if the path exists, `false` otherwise.
6
+ */
7
+ export function exists(filePath) {
8
+ return existsSync(filePath);
9
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Lists the file names in a directory, optionally filtered by a pattern.
3
+ * @param dirPath - The directory path to list files from.
4
+ * @param filter - An optional RegExp or string pattern to filter file names.
5
+ * Only file names matching this pattern are included in the result.
6
+ * @returns An array of file names in the directory that match the filter (or all if no filter is provided).
7
+ */
8
+ export declare function getFileList(dirPath: string, filter?: RegExp | string): Promise<string[]>;
@@ -0,0 +1,12 @@
1
+ import fsx from 'fs-extra';
2
+ /**
3
+ * Lists the file names in a directory, optionally filtered by a pattern.
4
+ * @param dirPath - The directory path to list files from.
5
+ * @param filter - An optional RegExp or string pattern to filter file names.
6
+ * Only file names matching this pattern are included in the result.
7
+ * @returns An array of file names in the directory that match the filter (or all if no filter is provided).
8
+ */
9
+ export async function getFileList(dirPath, filter) {
10
+ const list = await fsx.readdir(dirPath);
11
+ return filter ? list.filter((fileName) => fileName.match(filter)) : list;
12
+ }
@@ -0,0 +1,17 @@
1
+ export { outputJSON } from './output-json.js';
2
+ export { readJSON } from './read-json.js';
3
+ export { outputText } from './output-text.js';
4
+ export { appendText } from './append-text.js';
5
+ export { readText } from './read-text.js';
6
+ export { copyDir } from './copy-dir.js';
7
+ export { copyDirSync } from './copy-dir-sync.js';
8
+ export { isDir } from './is-dir.js';
9
+ export { remove } from './remove.js';
10
+ export { rename } from './rename.js';
11
+ export { getFileList } from './get-file-list.js';
12
+ export { readline } from './readline.js';
13
+ export { mkdir } from './mkdir.js';
14
+ export { exists } from './exists.js';
15
+ export { tar } from './tar.js';
16
+ export { untar } from './untar.js';
17
+ export { zip, unzip, extractZip } from '@d-zero/fs/zip';
@@ -0,0 +1,17 @@
1
+ export { outputJSON } from './output-json.js';
2
+ export { readJSON } from './read-json.js';
3
+ export { outputText } from './output-text.js';
4
+ export { appendText } from './append-text.js';
5
+ export { readText } from './read-text.js';
6
+ export { copyDir } from './copy-dir.js';
7
+ export { copyDirSync } from './copy-dir-sync.js';
8
+ export { isDir } from './is-dir.js';
9
+ export { remove } from './remove.js';
10
+ export { rename } from './rename.js';
11
+ export { getFileList } from './get-file-list.js';
12
+ export { readline } from './readline.js';
13
+ export { mkdir } from './mkdir.js';
14
+ export { exists } from './exists.js';
15
+ export { tar } from './tar.js';
16
+ export { untar } from './untar.js';
17
+ export { zip, unzip, extractZip } from '@d-zero/fs/zip';
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Checks whether the given path points to a directory.
3
+ * @param dirPath - The path to check.
4
+ * @returns `true` if the path is a directory, `false` otherwise.
5
+ */
6
+ export declare function isDir(dirPath: string): Promise<boolean>;
@@ -0,0 +1,10 @@
1
+ import fsx from 'fs-extra';
2
+ /**
3
+ * Checks whether the given path points to a directory.
4
+ * @param dirPath - The path to check.
5
+ * @returns `true` if the path is a directory, `false` otherwise.
6
+ */
7
+ export async function isDir(dirPath) {
8
+ const stat = await fsx.stat(dirPath);
9
+ return stat.isDirectory();
10
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Ensures the parent directory of the given file path exists.
3
+ *
4
+ * If the parent directory does not exist, it is created recursively
5
+ * with permissions `0o700` (owner-only access).
6
+ * @param filePath - The file path whose parent directory should be created.
7
+ */
8
+ export declare function mkdir(filePath: string): void;
@@ -0,0 +1,15 @@
1
+ import { existsSync, mkdirSync } from 'node:fs';
2
+ import path from 'node:path';
3
+ /**
4
+ * Ensures the parent directory of the given file path exists.
5
+ *
6
+ * If the parent directory does not exist, it is created recursively
7
+ * with permissions `0o700` (owner-only access).
8
+ * @param filePath - The file path whose parent directory should be created.
9
+ */
10
+ export function mkdir(filePath) {
11
+ const { dir } = path.parse(filePath);
12
+ if (!existsSync(dir)) {
13
+ mkdirSync(path.resolve(dir), { recursive: true, mode: 0o700 });
14
+ }
15
+ }
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Writes data to a JSON file at the specified path.
3
+ *
4
+ * Creates parent directories if they do not exist.
5
+ * The output is formatted with 2-space indentation.
6
+ * @param filePath - The absolute or relative path to the JSON file to write.
7
+ * @param data - The data to serialize as JSON and write to the file.
8
+ */
9
+ export declare function outputJSON(filePath: string, data: unknown): Promise<void>;
@@ -0,0 +1,14 @@
1
+ import { promises as fs } from 'node:fs';
2
+ import { mkdir } from './mkdir.js';
3
+ /**
4
+ * Writes data to a JSON file at the specified path.
5
+ *
6
+ * Creates parent directories if they do not exist.
7
+ * The output is formatted with 2-space indentation.
8
+ * @param filePath - The absolute or relative path to the JSON file to write.
9
+ * @param data - The data to serialize as JSON and write to the file.
10
+ */
11
+ export async function outputJSON(filePath, data) {
12
+ mkdir(filePath);
13
+ await fs.writeFile(filePath, JSON.stringify(data, null, 2), { encoding: 'utf8' });
14
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Writes text data to a file at the specified path.
3
+ *
4
+ * Creates parent directories if they do not exist.
5
+ * If the file path exceeds the OS limit (ENAMETOOLONG), the file is saved
6
+ * with an auto-generated short name and an accompanying `.meta.txt` file
7
+ * that records the original file path.
8
+ * @param filePath - The absolute or relative path to the text file to write.
9
+ * @param data - The text content to write to the file.
10
+ */
11
+ export declare function outputText(filePath: string, data: string): Promise<void>;
@@ -0,0 +1,32 @@
1
+ import { promises as fs } from 'node:fs';
2
+ import path from 'node:path';
3
+ import { mkdir } from './mkdir.js';
4
+ let filePathTooLongCount = 0;
5
+ /**
6
+ * Writes text data to a file at the specified path.
7
+ *
8
+ * Creates parent directories if they do not exist.
9
+ * If the file path exceeds the OS limit (ENAMETOOLONG), the file is saved
10
+ * with an auto-generated short name and an accompanying `.meta.txt` file
11
+ * that records the original file path.
12
+ * @param filePath - The absolute or relative path to the text file to write.
13
+ * @param data - The text content to write to the file.
14
+ */
15
+ export async function outputText(filePath, data) {
16
+ mkdir(filePath);
17
+ await fs.writeFile(filePath, data, { encoding: 'utf8' }).catch(async (error) => {
18
+ if (error instanceof Error && 'code' in error && error.code === 'ENAMETOOLONG') {
19
+ // eslint-disable-next-line no-console
20
+ console.error(`File path too long: ${filePath}`);
21
+ const dir = path.dirname(filePath);
22
+ const altFileName = `__file_path_too_long_${(filePathTooLongCount++).toString().padStart(4, '0')}`;
23
+ const ext = path.extname(filePath);
24
+ const altFilePath = path.resolve(dir, `${altFileName}${ext}`);
25
+ // eslint-disable-next-line no-console
26
+ console.error(`Try to save to: ${altFilePath}`);
27
+ const altMetaFilePath = path.resolve(dir, `${altFileName}.meta.txt`);
28
+ await outputText(altFilePath, data);
29
+ await outputText(altMetaFilePath, `Original file path: ${filePath}`);
30
+ }
31
+ });
32
+ }
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Reads and parses a JSON file from the specified path.
3
+ * @template T - The expected type of the parsed JSON content. Defaults to `unknown`.
4
+ * @param filePath - The absolute or relative path to the JSON file to read.
5
+ * @returns The parsed JSON content, cast to the specified generic type.
6
+ */
7
+ export declare function readJSON<T = unknown>(filePath: string): Promise<T>;
@@ -0,0 +1,11 @@
1
+ import { promises as fs } from 'node:fs';
2
+ /**
3
+ * Reads and parses a JSON file from the specified path.
4
+ * @template T - The expected type of the parsed JSON content. Defaults to `unknown`.
5
+ * @param filePath - The absolute or relative path to the JSON file to read.
6
+ * @returns The parsed JSON content, cast to the specified generic type.
7
+ */
8
+ export async function readJSON(filePath) {
9
+ const data = await fs.readFile(filePath, { encoding: 'utf8' });
10
+ return JSON.parse(data);
11
+ }
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Reads the entire contents of a text file as a UTF-8 string.
3
+ * @param filePath - The absolute or relative path to the text file to read.
4
+ * @returns The text content of the file.
5
+ */
6
+ export declare function readText(filePath: string): Promise<string>;
@@ -0,0 +1,10 @@
1
+ import { promises as fs } from 'node:fs';
2
+ /**
3
+ * Reads the entire contents of a text file as a UTF-8 string.
4
+ * @param filePath - The absolute or relative path to the text file to read.
5
+ * @returns The text content of the file.
6
+ */
7
+ export async function readText(filePath) {
8
+ const data = await fs.readFile(filePath, { encoding: 'utf8' });
9
+ return data;
10
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Reads a file line by line and invokes the callback for each line.
3
+ *
4
+ * The callback may return a Promise for asynchronous processing.
5
+ * All callback results are collected and awaited via `Promise.all` before returning.
6
+ * @param filePath - The path to the file to read line by line.
7
+ * @param callback - A function invoked for each line of the file.
8
+ * May return a Promise for asynchronous operations.
9
+ * @returns A promise that resolves when all line callbacks have completed.
10
+ */
11
+ export declare function readline(filePath: string, callback: (line: string) => Promise<void> | void): Promise<void[]>;
@@ -0,0 +1,26 @@
1
+ import { createReadStream } from 'node:fs';
2
+ import Readline from 'node:readline';
3
+ /**
4
+ * Reads a file line by line and invokes the callback for each line.
5
+ *
6
+ * The callback may return a Promise for asynchronous processing.
7
+ * All callback results are collected and awaited via `Promise.all` before returning.
8
+ * @param filePath - The path to the file to read line by line.
9
+ * @param callback - A function invoked for each line of the file.
10
+ * May return a Promise for asynchronous operations.
11
+ * @returns A promise that resolves when all line callbacks have completed.
12
+ */
13
+ export async function readline(filePath, callback) {
14
+ const stream = createReadStream(filePath);
15
+ const rLine = Readline.createInterface(stream);
16
+ const promiseBuffer = [];
17
+ await new Promise((resolve) => {
18
+ rLine.on('line', (line) => {
19
+ promiseBuffer.push(callback(line));
20
+ });
21
+ rLine.on('close', () => {
22
+ resolve();
23
+ });
24
+ });
25
+ return Promise.all(promiseBuffer);
26
+ }
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Recursively removes a file or directory at the specified path.
3
+ * @param dirPath - The path of the file or directory to remove.
4
+ */
5
+ export declare function remove(dirPath: string): Promise<void>;
@@ -0,0 +1,10 @@
1
+ import { promises as fs } from 'node:fs';
2
+ /**
3
+ * Recursively removes a file or directory at the specified path.
4
+ * @param dirPath - The path of the file or directory to remove.
5
+ */
6
+ export async function remove(dirPath) {
7
+ await fs.rm(dirPath, {
8
+ recursive: true,
9
+ });
10
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Renames (moves) a file or directory from one path to another.
3
+ *
4
+ * If `override` is `true`, the destination is unconditionally removed
5
+ * before renaming. This avoids a TOCTOU race condition between
6
+ * checking existence and performing the removal.
7
+ * @param oldPath - The current path of the file or directory.
8
+ * @param newPath - The new path for the file or directory.
9
+ * @param override - Whether to overwrite the destination if it already exists. Defaults to `false`.
10
+ */
11
+ export declare function rename(oldPath: string, newPath: string, override?: boolean): Promise<void>;
@@ -0,0 +1,18 @@
1
+ import { promises as fs } from 'node:fs';
2
+ import { remove } from './remove.js';
3
+ /**
4
+ * Renames (moves) a file or directory from one path to another.
5
+ *
6
+ * If `override` is `true`, the destination is unconditionally removed
7
+ * before renaming. This avoids a TOCTOU race condition between
8
+ * checking existence and performing the removal.
9
+ * @param oldPath - The current path of the file or directory.
10
+ * @param newPath - The new path for the file or directory.
11
+ * @param override - Whether to overwrite the destination if it already exists. Defaults to `false`.
12
+ */
13
+ export async function rename(oldPath, newPath, override = false) {
14
+ if (override) {
15
+ await remove(newPath).catch(() => { });
16
+ }
17
+ await fs.rename(oldPath, newPath);
18
+ }
@@ -0,0 +1,11 @@
1
+ /**
2
+ * Creates an uncompressed TAR archive from a directory.
3
+ *
4
+ * The archive preserves the relative directory structure.
5
+ * The `dir` parameter is resolved relative to its parent directory
6
+ * so only the target directory name appears in the archive.
7
+ * @param dir - The absolute path of the directory to archive.
8
+ * @param outputPath - The file path where the TAR archive will be written.
9
+ * @returns A promise that resolves when the TAR archive has been created.
10
+ */
11
+ export declare function tar(dir: string, outputPath: string): Promise<void>;
@@ -0,0 +1,22 @@
1
+ import path from 'node:path';
2
+ import { create } from 'tar';
3
+ /**
4
+ * Creates an uncompressed TAR archive from a directory.
5
+ *
6
+ * The archive preserves the relative directory structure.
7
+ * The `dir` parameter is resolved relative to its parent directory
8
+ * so only the target directory name appears in the archive.
9
+ * @param dir - The absolute path of the directory to archive.
10
+ * @param outputPath - The file path where the TAR archive will be written.
11
+ * @returns A promise that resolves when the TAR archive has been created.
12
+ */
13
+ export function tar(dir, outputPath) {
14
+ const baseDir = path.dirname(dir);
15
+ const targetDir = path.relative(baseDir, dir);
16
+ return create({
17
+ gzip: false,
18
+ cwd: baseDir,
19
+ file: outputPath,
20
+ preservePaths: false,
21
+ }, [targetDir]);
22
+ }
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Extracts files from a TAR archive.
3
+ *
4
+ * Only files newer than existing files in the target directory are extracted
5
+ * (uses the `newer` option). Optionally restricts extraction to a specific
6
+ * working directory and/or a subset of files.
7
+ * @param tarFilePath - The path to the TAR archive to extract.
8
+ * @param options - Optional extraction settings.
9
+ * @param options.cwd - The working directory to extract files into.
10
+ * If omitted, the current working directory is used.
11
+ * @param options.fileList - An array of specific file paths within the archive
12
+ * to extract. If omitted, all files in the archive are extracted.
13
+ * @returns A promise that resolves when extraction is complete.
14
+ */
15
+ export declare function untar(tarFilePath: string, options?: {
16
+ /** The working directory to extract files into. */
17
+ cwd?: string;
18
+ /** An array of specific file paths within the archive to extract. */
19
+ fileList?: string[];
20
+ }): Promise<void>;
@@ -0,0 +1,24 @@
1
+ import { extract } from 'tar';
2
+ /**
3
+ * Extracts files from a TAR archive.
4
+ *
5
+ * Only files newer than existing files in the target directory are extracted
6
+ * (uses the `newer` option). Optionally restricts extraction to a specific
7
+ * working directory and/or a subset of files.
8
+ * @param tarFilePath - The path to the TAR archive to extract.
9
+ * @param options - Optional extraction settings.
10
+ * @param options.cwd - The working directory to extract files into.
11
+ * If omitted, the current working directory is used.
12
+ * @param options.fileList - An array of specific file paths within the archive
13
+ * to extract. If omitted, all files in the archive are extracted.
14
+ * @returns A promise that resolves when extraction is complete.
15
+ */
16
+ export function untar(tarFilePath, options) {
17
+ return extract({
18
+ file: tarFilePath,
19
+ newer: true,
20
+ cwd: options?.cwd,
21
+ preservePaths: false,
22
+ noMtime: true,
23
+ }, options?.fileList ?? []);
24
+ }
@@ -0,0 +1,109 @@
1
+ /**
2
+ * Writes data to a JSON file at the specified path.
3
+ *
4
+ * Creates parent directories if they do not exist.
5
+ * The output is formatted with 2-space indentation.
6
+ * @param filePath - The absolute or relative path to the JSON file to write.
7
+ * @param data - The data to serialize as JSON and write to the file.
8
+ */
9
+ export declare function outputJSON(filePath: string, data: unknown): Promise<void>;
10
+ /**
11
+ * Reads and parses a JSON file from the specified path.
12
+ * @template T - The expected type of the parsed JSON content. Defaults to `unknown`.
13
+ * @param filePath - The absolute or relative path to the JSON file to read.
14
+ * @returns The parsed JSON content, cast to the specified generic type.
15
+ */
16
+ export declare function readJSON<T = unknown>(filePath: string): Promise<T>;
17
+ /**
18
+ * Writes text data to a file at the specified path.
19
+ *
20
+ * Creates parent directories if they do not exist.
21
+ * If the file path exceeds the OS limit (ENAMETOOLONG), the file is saved
22
+ * with an auto-generated short name and an accompanying `.meta.txt` file
23
+ * that records the original file path.
24
+ * @param filePath - The absolute or relative path to the text file to write.
25
+ * @param data - The text content to write to the file.
26
+ */
27
+ export declare function outputText(filePath: string, data: string): Promise<void>;
28
+ /**
29
+ * Appends text data to a file at the specified path.
30
+ *
31
+ * Creates parent directories if they do not exist.
32
+ * A newline character is prepended to the data before appending.
33
+ * @param filePath - The absolute or relative path to the file to append to.
34
+ * @param data - The text content to append to the file.
35
+ */
36
+ export declare function appendText(filePath: string, data: string): Promise<void>;
37
+ /**
38
+ * Reads the entire contents of a text file as a UTF-8 string.
39
+ * @param filePath - The absolute or relative path to the text file to read.
40
+ * @returns The text content of the file.
41
+ */
42
+ export declare function readText(filePath: string): Promise<string>;
43
+ /**
44
+ * Recursively copies a directory and its contents from one location to another.
45
+ * @param from - The source directory path to copy from.
46
+ * @param to - The destination directory path to copy to.
47
+ * @returns `true` if the copy succeeded, `false` if an error occurred.
48
+ */
49
+ export declare function copyDir(from: string, to: string): Promise<boolean>;
50
+ /**
51
+ * Synchronously copies a directory and its contents from one location to another.
52
+ * @param from - The source directory path to copy from.
53
+ * @param to - The destination directory path to copy to.
54
+ */
55
+ export declare function copyDirSync(from: string, to: string): void;
56
+ /**
57
+ * Checks whether the given path points to a directory.
58
+ * @param dirPath - The path to check.
59
+ * @returns `true` if the path is a directory, `false` otherwise.
60
+ */
61
+ export declare function isDir(dirPath: string): Promise<boolean>;
62
+ /**
63
+ * Recursively removes a file or directory at the specified path.
64
+ * @param dirPath - The path of the file or directory to remove.
65
+ */
66
+ export declare function remove(dirPath: string): Promise<void>;
67
+ /**
68
+ * Renames (moves) a file or directory from one path to another.
69
+ *
70
+ * If `override` is `true` and the destination already exists,
71
+ * the destination is removed before renaming.
72
+ * @param oldPath - The current path of the file or directory.
73
+ * @param newPath - The new path for the file or directory.
74
+ * @param override - Whether to overwrite the destination if it already exists. Defaults to `false`.
75
+ */
76
+ export declare function rename(oldPath: string, newPath: string, override?: boolean): Promise<void>;
77
+ /**
78
+ * Lists the file names in a directory, optionally filtered by a pattern.
79
+ * @param dirPath - The directory path to list files from.
80
+ * @param filter - An optional RegExp or string pattern to filter file names.
81
+ * Only file names matching this pattern are included in the result.
82
+ * @returns An array of file names in the directory that match the filter (or all if no filter is provided).
83
+ */
84
+ export declare function getFileList(dirPath: string, filter?: RegExp | string): Promise<string[]>;
85
+ /**
86
+ * Reads a file line by line and invokes the callback for each line.
87
+ *
88
+ * The callback may return a Promise for asynchronous processing.
89
+ * All callback results are collected and awaited via `Promise.all` before returning.
90
+ * @param filePath - The path to the file to read line by line.
91
+ * @param callback - A function invoked for each line of the file.
92
+ * May return a Promise for asynchronous operations.
93
+ * @returns A promise that resolves when all line callbacks have completed.
94
+ */
95
+ export declare function readline(filePath: string, callback: (line: string) => Promise<void> | void): Promise<void[]>;
96
+ /**
97
+ * Ensures the parent directory of the given file path exists.
98
+ *
99
+ * If the parent directory does not exist, it is created recursively
100
+ * with permissions `0o755`.
101
+ * @param filePath - The file path whose parent directory should be created.
102
+ */
103
+ export declare function mkdir(filePath: string): void;
104
+ /**
105
+ * Checks whether a file or directory exists at the given path.
106
+ * @param filePath - The path to check for existence.
107
+ * @returns `true` if the path exists, `false` otherwise.
108
+ */
109
+ export declare function exists(filePath: string): boolean;