@transcend-io/utils 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +210 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +377 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +44 -0
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
import Bluebird from "bluebird";
|
|
2
|
+
|
|
3
|
+
//#region src/logger.d.ts
|
|
4
|
+
interface Logger {
|
|
5
|
+
info(...args: unknown[]): void;
|
|
6
|
+
warn(...args: unknown[]): void;
|
|
7
|
+
error(...args: unknown[]): void;
|
|
8
|
+
debug(...args: unknown[]): void;
|
|
9
|
+
}
|
|
10
|
+
//#endregion
|
|
11
|
+
//#region src/splitInHalf.d.ts
|
|
12
|
+
/**
|
|
13
|
+
* Split an array roughly in half. Stable for even/odd lengths.
|
|
14
|
+
*
|
|
15
|
+
* @param entries - Items to split
|
|
16
|
+
* @returns A tuple [left, right] halves
|
|
17
|
+
*/
|
|
18
|
+
declare function splitInHalf<T>(entries: T[]): [T[], T[]];
|
|
19
|
+
//#endregion
|
|
20
|
+
//#region src/sleepPromise.d.ts
|
|
21
|
+
/**
|
|
22
|
+
* Sleep in a promise
|
|
23
|
+
*
|
|
24
|
+
* @param sleepTime - The time to sleep in milliseconds.
|
|
25
|
+
* @returns Resolves promise
|
|
26
|
+
*/
|
|
27
|
+
declare function sleepPromise(sleepTime: number): Promise<number>;
|
|
28
|
+
//#endregion
|
|
29
|
+
//#region src/extractErrorMessage.d.ts
|
|
30
|
+
/**
|
|
31
|
+
* Extract a human-readable error message from a thrown error.
|
|
32
|
+
*
|
|
33
|
+
* Tries to parse JSON bodies that follow common REST/GraphQL error patterns:
|
|
34
|
+
* { error: { message: string } }
|
|
35
|
+
* { errors: [{ message: string }, ...] }
|
|
36
|
+
*
|
|
37
|
+
* Falls back to `err.message` or 'Unknown error'.
|
|
38
|
+
*
|
|
39
|
+
* @param err - Unknown error thrown by network call
|
|
40
|
+
* @returns A concise error string safe to log/show
|
|
41
|
+
*/
|
|
42
|
+
declare function extractErrorMessage(err: unknown): string;
|
|
43
|
+
//#endregion
|
|
44
|
+
//#region src/getErrorStatus.d.ts
|
|
45
|
+
/**
|
|
46
|
+
* Extract an HTTP status code from a thrown error (got compatible).
|
|
47
|
+
*
|
|
48
|
+
* @param err - Unknown error thrown by network call
|
|
49
|
+
* @returns HTTP status code, if present
|
|
50
|
+
*/
|
|
51
|
+
declare function getErrorStatus(err: unknown): number | undefined;
|
|
52
|
+
//#endregion
|
|
53
|
+
//#region src/limitRecords.d.ts
|
|
54
|
+
/**
|
|
55
|
+
* Limits the number of records in the returned object to a maximum.
|
|
56
|
+
* For entries beyond the max, sets their value to `true`.
|
|
57
|
+
*
|
|
58
|
+
* @param obj - Object
|
|
59
|
+
* @param max - Maximum number of entries to retain original value.
|
|
60
|
+
* @returns Object with keys mapped to their value or `true` if over the limit.
|
|
61
|
+
*/
|
|
62
|
+
declare function limitRecords<T>(obj: Record<string, T>, max: number): Record<string, T | true>;
|
|
63
|
+
//#endregion
|
|
64
|
+
//#region src/RateCounter.d.ts
|
|
65
|
+
/**
|
|
66
|
+
* Tracks counts over time and calculates rates within a specified time window.
|
|
67
|
+
*
|
|
68
|
+
* This class maintains a rolling window of count "buckets" (timestamped values)
|
|
69
|
+
* and provides methods to add new counts and compute the rate of events over a
|
|
70
|
+
* configurable time window.
|
|
71
|
+
*
|
|
72
|
+
* Example usage:
|
|
73
|
+
* ```typescript
|
|
74
|
+
* const counter = new RateCounter();
|
|
75
|
+
* counter.add(5); // Add 5 events
|
|
76
|
+
* const rate = counter.rate(60_000); // Get rate over last 60 seconds
|
|
77
|
+
* ```
|
|
78
|
+
*/
|
|
79
|
+
declare class RateCounter {
|
|
80
|
+
private buckets;
|
|
81
|
+
/**
|
|
82
|
+
* Adds a new count to the counter.
|
|
83
|
+
*
|
|
84
|
+
* @param n - The number of events to add to the counter.
|
|
85
|
+
*/
|
|
86
|
+
add(n: number): void;
|
|
87
|
+
/**
|
|
88
|
+
* rate over the last `windowMs` milliseconds
|
|
89
|
+
*
|
|
90
|
+
* @param windowMs - The time window in milliseconds to calculate the rate over.
|
|
91
|
+
* @returns The average rate of events per second over the specified time window.
|
|
92
|
+
*/
|
|
93
|
+
rate(windowMs: number): number;
|
|
94
|
+
}
|
|
95
|
+
//#endregion
|
|
96
|
+
//#region src/time.d.ts
|
|
97
|
+
declare const DAY_MS: number;
|
|
98
|
+
declare const HOUR_MS: number;
|
|
99
|
+
declare const FIVE_MIN_MS: number;
|
|
100
|
+
/**
|
|
101
|
+
* Clamp 1..50 per API spec
|
|
102
|
+
*
|
|
103
|
+
* @param n - Number
|
|
104
|
+
* @returns Clamped number
|
|
105
|
+
*/
|
|
106
|
+
declare const clampPageSize: (n?: number) => number;
|
|
107
|
+
/**
|
|
108
|
+
* TRUE UTC day start (00:00:00Z)
|
|
109
|
+
*
|
|
110
|
+
* @param d - Date
|
|
111
|
+
* @returns Day start
|
|
112
|
+
*/
|
|
113
|
+
declare const startOfUtcDay: (d: Date) => Date;
|
|
114
|
+
/**
|
|
115
|
+
* TRUE UTC hour start (HH:00:00Z)
|
|
116
|
+
*
|
|
117
|
+
* @param d - Date
|
|
118
|
+
* @returns Hour start
|
|
119
|
+
*/
|
|
120
|
+
declare const startOfHour: (d: Date) => Date;
|
|
121
|
+
/**
|
|
122
|
+
* Add ms safely
|
|
123
|
+
*
|
|
124
|
+
* @param d - Date
|
|
125
|
+
* @param ms - Milliseconds to add
|
|
126
|
+
* @returns New date
|
|
127
|
+
*/
|
|
128
|
+
declare const addMs: (d: Date, ms: number) => Date;
|
|
129
|
+
/**
|
|
130
|
+
* Add whole UTC days (exclusive bound helper)
|
|
131
|
+
*
|
|
132
|
+
* @param d - Date
|
|
133
|
+
* @param n - Number of days to add
|
|
134
|
+
* @returns New date
|
|
135
|
+
*/
|
|
136
|
+
declare const addDaysUtc: (d: Date, n: number) => Date;
|
|
137
|
+
//#endregion
|
|
138
|
+
//#region src/retrySamePromise.d.ts
|
|
139
|
+
interface RetryPolicy {
|
|
140
|
+
/** Maximum retry attempts (not counting the initial try) */
|
|
141
|
+
maxAttempts: number;
|
|
142
|
+
/** Fixed delay between attempts in milliseconds */
|
|
143
|
+
delayMs: number;
|
|
144
|
+
/**
|
|
145
|
+
* Decide whether a given error should be retried.
|
|
146
|
+
*
|
|
147
|
+
* @param status - HTTP status code (if known)
|
|
148
|
+
* @param message - Extracted error message (if known)
|
|
149
|
+
*/
|
|
150
|
+
shouldRetry(status?: number, message?: string): boolean;
|
|
151
|
+
}
|
|
152
|
+
/**
|
|
153
|
+
* Retry a single async operation according to the provided policy.
|
|
154
|
+
* The operation is executed once initially, then up to `maxAttempts` retries.
|
|
155
|
+
*
|
|
156
|
+
* @param op - Operation to run
|
|
157
|
+
* @param policy - Retry policy
|
|
158
|
+
* @param onBackoff - Observer called before each retry (for logging/metrics)
|
|
159
|
+
* @returns Result of the operation if it eventually succeeds
|
|
160
|
+
* @throws The last error encountered if all retries fail
|
|
161
|
+
*/
|
|
162
|
+
declare function retrySamePromise<T>(op: () => Promise<T>, policy: RetryPolicy, onBackoff: (note: string) => void): Promise<T>;
|
|
163
|
+
//#endregion
|
|
164
|
+
//#region src/chunkOneCsvFile.d.ts
|
|
165
|
+
/**
|
|
166
|
+
* Options for chunking a single CSV file
|
|
167
|
+
*/
|
|
168
|
+
type ChunkOpts = {
|
|
169
|
+
/** Path to the CSV file to chunk */filePath: string; /** Output directory for chunk files; defaults to the same directory as the input file */
|
|
170
|
+
outputDir?: string; /** Clear output directory before starting */
|
|
171
|
+
clearOutputDir: boolean; /** Chunk size in MB */
|
|
172
|
+
chunkSizeMB: number; /** Optional report interval in milliseconds for progress updates */
|
|
173
|
+
reportEveryMs?: number; /** Callback for progress updates */
|
|
174
|
+
onProgress: (processed: number, total?: number) => void; /** Logger instance (required — no default; callers must inject) */
|
|
175
|
+
logger: Logger;
|
|
176
|
+
};
|
|
177
|
+
/**
|
|
178
|
+
* Stream a single CSV file and write chunk files of roughly chunkSizeMB.
|
|
179
|
+
* - Writes header to each chunk.
|
|
180
|
+
* - Logs periodic progress via onProgress.
|
|
181
|
+
*
|
|
182
|
+
* @param opts - Options for chunking the file
|
|
183
|
+
* @returns Promise that resolves when done
|
|
184
|
+
*/
|
|
185
|
+
declare function chunkOneCsvFile(opts: ChunkOpts): Promise<void>;
|
|
186
|
+
//#endregion
|
|
187
|
+
//#region src/bluebird.d.ts
|
|
188
|
+
/**
|
|
189
|
+
* Concurrent map with configurable concurrency limit.
|
|
190
|
+
* Re-export of Bluebird.map for use across the monorepo.
|
|
191
|
+
*/
|
|
192
|
+
declare const map: typeof Bluebird.map;
|
|
193
|
+
/**
|
|
194
|
+
* Sequential map (concurrency = 1).
|
|
195
|
+
* Re-export of Bluebird.mapSeries for use across the monorepo.
|
|
196
|
+
*/
|
|
197
|
+
declare const mapSeries: typeof Bluebird.mapSeries;
|
|
198
|
+
//#endregion
|
|
199
|
+
//#region src/index.d.ts
|
|
200
|
+
interface DeveloperToolNameParts {
|
|
201
|
+
displayName: string;
|
|
202
|
+
slug: string;
|
|
203
|
+
}
|
|
204
|
+
declare function normalizeDeveloperToolName(value: string): string;
|
|
205
|
+
declare function toPackageDisplayName(value: string): string;
|
|
206
|
+
declare function toPackageSlug(value: string): string;
|
|
207
|
+
declare function describePackageName(value: string): DeveloperToolNameParts;
|
|
208
|
+
//#endregion
|
|
209
|
+
export { ChunkOpts, DAY_MS, DeveloperToolNameParts, FIVE_MIN_MS, HOUR_MS, Logger, RateCounter, RetryPolicy, addDaysUtc, addMs, chunkOneCsvFile, clampPageSize, describePackageName, extractErrorMessage, getErrorStatus, limitRecords, map, mapSeries, normalizeDeveloperToolName, retrySamePromise, sleepPromise, splitInHalf, startOfHour, startOfUtcDay, toPackageDisplayName, toPackageSlug };
|
|
210
|
+
//# sourceMappingURL=index.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.mts","names":[],"sources":["../src/logger.ts","../src/splitInHalf.ts","../src/sleepPromise.ts","../src/extractErrorMessage.ts","../src/getErrorStatus.ts","../src/limitRecords.ts","../src/RateCounter.ts","../src/time.ts","../src/retrySamePromise.ts","../src/chunkOneCsvFile.ts","../src/bluebird.ts","../src/index.ts"],"mappings":";;;UAAiB,MAAA;EACf,IAAA,IAAQ,IAAA;EACR,IAAA,IAAQ,IAAA;EACR,KAAA,IAAS,IAAA;EACT,KAAA,IAAS,IAAA;AAAA;;;;;;AAJX;;;iBCMgB,WAAA,GAAA,CAAe,OAAA,EAAS,CAAA,MAAO,CAAA,IAAK,CAAA;;;;;;ADNpD;;;iBEMgB,YAAA,CAAa,SAAA,WAAoB,OAAA;;;;;;AFNjD;;;;;;;;;iBGYgB,mBAAA,CAAoB,GAAA;;;;;;AHZpC;;;iBIMgB,cAAA,CAAe,GAAA;;;;;;AJN/B;;;;;iBKQgB,YAAA,GAAA,CAAgB,GAAA,EAAK,MAAA,SAAe,CAAA,GAAI,GAAA,WAAc,MAAA,SAAe,CAAA;;;;;;ALRrF;;;;;;;;;;;cMca,WAAA;EAAA,QACH,OAAA;ENXC;;;;ACEX;EKqBE,GAAA,CAAI,CAAA;;;;;;;EAgBJ,IAAA,CAAK,QAAA;AAAA;;;cC3CM,MAAA;AAAA,cACA,OAAA;AAAA,cACA,WAAA;APFb;;;;;;AAAA,cOUa,aAAA,GAAiB,CAAA;;;;;;;cAQjB,aAAA,GAAiB,CAAA,EAAG,IAAA,KAAO,IAAA;;;;ANZxC;;;cMqBa,WAAA,GAAe,CAAA,EAAG,IAAA,KAAO,IAAA;;;;;;;;cAUzB,KAAA,GAAS,CAAA,EAAG,IAAA,EAAM,EAAA,aAAa,IAAA;;;;;;;AL/B5C;cKwCa,UAAA,GAAc,CAAA,EAAG,IAAA,EAAM,CAAA,aAAY,IAAA;;;UC1C/B,WAAA;;EAEf,WAAA;ERNF;EQQE,OAAA;;;;;;;EAOA,WAAA,CAAY,MAAA,WAAiB,OAAA;AAAA;;;;;;;;APT/B;;;iBOsBsB,gBAAA,GAAA,CACpB,EAAA,QAAU,OAAA,CAAQ,CAAA,GAClB,MAAA,EAAQ,WAAA,EACR,SAAA,GAAY,IAAA,oBACX,OAAA,CAAQ,CAAA;;;;;ARhCX;KSeY,SAAA;sCAEV,QAAA;EAEA,SAAA;EAEA,cAAA;EAEA,WAAA;EAEA,aAAA;EAEA,UAAA,GAAa,SAAA,UAAmB,KAAA;EAEhC,MAAA,EAAQ,MAAA;AAAA;ARvBV;;;;;;;;AAAA,iBQ4GsB,eAAA,CAAgB,IAAA,EAAM,SAAA,GAAY,OAAA;;;;;ATlHxD;;cUMa,GAAA,SAAY,QAAA,CAAS,GAAA;;;;;cAMrB,SAAA,SAAkB,QAAA,CAAS,SAAA;;;UCVvB,sBAAA;EACf,WAAA;EACA,IAAA;AAAA;AAAA,iBAGc,0BAAA,CAA2B,KAAA;AAAA,iBAI3B,oBAAA,CAAqB,KAAA;AAAA,iBAOrB,aAAA,CAAc,KAAA;AAAA,iBAId,mBAAA,CAAoB,KAAA,WAAgB,sBAAA"}
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
import { once } from "node:events";
|
|
2
|
+
import { createReadStream, createWriteStream } from "node:fs";
|
|
3
|
+
import { mkdir, readdir, stat, unlink } from "node:fs/promises";
|
|
4
|
+
import { basename, dirname, join } from "node:path";
|
|
5
|
+
import { Transform } from "node:stream";
|
|
6
|
+
import { pipeline } from "node:stream/promises";
|
|
7
|
+
import { Parser } from "csv-parse";
|
|
8
|
+
import * as fastcsv from "fast-csv";
|
|
9
|
+
import Bluebird from "bluebird";
|
|
10
|
+
//#region src/splitInHalf.ts
|
|
11
|
+
/**
|
|
12
|
+
* Split an array roughly in half. Stable for even/odd lengths.
|
|
13
|
+
*
|
|
14
|
+
* @param entries - Items to split
|
|
15
|
+
* @returns A tuple [left, right] halves
|
|
16
|
+
*/
|
|
17
|
+
function splitInHalf(entries) {
|
|
18
|
+
const mid = Math.floor(entries.length / 2);
|
|
19
|
+
return [entries.slice(0, mid), entries.slice(mid)];
|
|
20
|
+
}
|
|
21
|
+
//#endregion
|
|
22
|
+
//#region src/sleepPromise.ts
|
|
23
|
+
/**
|
|
24
|
+
* Sleep in a promise
|
|
25
|
+
*
|
|
26
|
+
* @param sleepTime - The time to sleep in milliseconds.
|
|
27
|
+
* @returns Resolves promise
|
|
28
|
+
*/
|
|
29
|
+
function sleepPromise(sleepTime) {
|
|
30
|
+
return new Promise((resolve) => {
|
|
31
|
+
setTimeout(() => resolve(sleepTime), sleepTime);
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
//#endregion
|
|
35
|
+
//#region src/extractErrorMessage.ts
|
|
36
|
+
/**
|
|
37
|
+
* Extract a human-readable error message from a thrown error.
|
|
38
|
+
*
|
|
39
|
+
* Tries to parse JSON bodies that follow common REST/GraphQL error patterns:
|
|
40
|
+
* { error: { message: string } }
|
|
41
|
+
* { errors: [{ message: string }, ...] }
|
|
42
|
+
*
|
|
43
|
+
* Falls back to `err.message` or 'Unknown error'.
|
|
44
|
+
*
|
|
45
|
+
* @param err - Unknown error thrown by network call
|
|
46
|
+
* @returns A concise error string safe to log/show
|
|
47
|
+
*/
|
|
48
|
+
function extractErrorMessage(err) {
|
|
49
|
+
const anyErr = err;
|
|
50
|
+
let errorMsg = anyErr?.response?.body || anyErr?.message || "Unknown error";
|
|
51
|
+
try {
|
|
52
|
+
const parsed = JSON.parse(errorMsg);
|
|
53
|
+
const candidates = parsed.errors || parsed.error?.errors || [parsed.error?.message || parsed.error];
|
|
54
|
+
errorMsg = (Array.isArray(candidates) ? candidates : [candidates]).filter(Boolean).join(", ");
|
|
55
|
+
} catch {}
|
|
56
|
+
return errorMsg;
|
|
57
|
+
}
|
|
58
|
+
//#endregion
|
|
59
|
+
//#region src/getErrorStatus.ts
|
|
60
|
+
/**
|
|
61
|
+
* Extract an HTTP status code from a thrown error (got compatible).
|
|
62
|
+
*
|
|
63
|
+
* @param err - Unknown error thrown by network call
|
|
64
|
+
* @returns HTTP status code, if present
|
|
65
|
+
*/
|
|
66
|
+
function getErrorStatus(err) {
|
|
67
|
+
const anyErr = err;
|
|
68
|
+
return anyErr?.response?.statusCode ?? anyErr?.response?.status;
|
|
69
|
+
}
|
|
70
|
+
//#endregion
|
|
71
|
+
//#region src/limitRecords.ts
|
|
72
|
+
/**
|
|
73
|
+
* Limits the number of records in the returned object to a maximum.
|
|
74
|
+
* For entries beyond the max, sets their value to `true`.
|
|
75
|
+
*
|
|
76
|
+
* @param obj - Object
|
|
77
|
+
* @param max - Maximum number of entries to retain original value.
|
|
78
|
+
* @returns Object with keys mapped to their value or `true` if over the limit.
|
|
79
|
+
*/
|
|
80
|
+
function limitRecords(obj, max) {
|
|
81
|
+
return Object.entries(obj).reduce((acc, [userId, value], i) => {
|
|
82
|
+
acc[userId] = i < max ? value : true;
|
|
83
|
+
return acc;
|
|
84
|
+
}, {});
|
|
85
|
+
}
|
|
86
|
+
//#endregion
|
|
87
|
+
//#region src/RateCounter.ts
|
|
88
|
+
/**
|
|
89
|
+
* Tracks counts over time and calculates rates within a specified time window.
|
|
90
|
+
*
|
|
91
|
+
* This class maintains a rolling window of count "buckets" (timestamped values)
|
|
92
|
+
* and provides methods to add new counts and compute the rate of events over a
|
|
93
|
+
* configurable time window.
|
|
94
|
+
*
|
|
95
|
+
* Example usage:
|
|
96
|
+
* ```typescript
|
|
97
|
+
* const counter = new RateCounter();
|
|
98
|
+
* counter.add(5); // Add 5 events
|
|
99
|
+
* const rate = counter.rate(60_000); // Get rate over last 60 seconds
|
|
100
|
+
* ```
|
|
101
|
+
*/
|
|
102
|
+
var RateCounter = class {
|
|
103
|
+
buckets = [];
|
|
104
|
+
/**
|
|
105
|
+
* Adds a new count to the counter.
|
|
106
|
+
*
|
|
107
|
+
* @param n - The number of events to add to the counter.
|
|
108
|
+
*/
|
|
109
|
+
add(n) {
|
|
110
|
+
const now = Date.now();
|
|
111
|
+
this.buckets.push({
|
|
112
|
+
t: now,
|
|
113
|
+
n
|
|
114
|
+
});
|
|
115
|
+
const cutoff = now - 12e4;
|
|
116
|
+
while (this.buckets.length > 0 && this.buckets[0].t < cutoff) this.buckets.shift();
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* rate over the last `windowMs` milliseconds
|
|
120
|
+
*
|
|
121
|
+
* @param windowMs - The time window in milliseconds to calculate the rate over.
|
|
122
|
+
* @returns The average rate of events per second over the specified time window.
|
|
123
|
+
*/
|
|
124
|
+
rate(windowMs) {
|
|
125
|
+
const cutoff = Date.now() - windowMs;
|
|
126
|
+
let sum = 0;
|
|
127
|
+
for (let i = this.buckets.length - 1; i >= 0; i -= 1) {
|
|
128
|
+
const b = this.buckets[i];
|
|
129
|
+
if (b.t < cutoff) break;
|
|
130
|
+
sum += b.n;
|
|
131
|
+
}
|
|
132
|
+
return sum / (windowMs / 1e3);
|
|
133
|
+
}
|
|
134
|
+
};
|
|
135
|
+
//#endregion
|
|
136
|
+
//#region src/time.ts
|
|
137
|
+
const DAY_MS = 1440 * 60 * 1e3;
|
|
138
|
+
const HOUR_MS = 3600 * 1e3;
|
|
139
|
+
const FIVE_MIN_MS = 300 * 1e3;
|
|
140
|
+
/**
|
|
141
|
+
* Clamp 1..50 per API spec
|
|
142
|
+
*
|
|
143
|
+
* @param n - Number
|
|
144
|
+
* @returns Clamped number
|
|
145
|
+
*/
|
|
146
|
+
const clampPageSize = (n) => Math.max(1, Math.min(50, n ?? 50));
|
|
147
|
+
/**
|
|
148
|
+
* TRUE UTC day start (00:00:00Z)
|
|
149
|
+
*
|
|
150
|
+
* @param d - Date
|
|
151
|
+
* @returns Day start
|
|
152
|
+
*/
|
|
153
|
+
const startOfUtcDay = (d) => new Date(Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate()));
|
|
154
|
+
/**
|
|
155
|
+
* TRUE UTC hour start (HH:00:00Z)
|
|
156
|
+
*
|
|
157
|
+
* @param d - Date
|
|
158
|
+
* @returns Hour start
|
|
159
|
+
*/
|
|
160
|
+
const startOfHour = (d) => new Date(Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate(), d.getUTCHours()));
|
|
161
|
+
/**
|
|
162
|
+
* Add ms safely
|
|
163
|
+
*
|
|
164
|
+
* @param d - Date
|
|
165
|
+
* @param ms - Milliseconds to add
|
|
166
|
+
* @returns New date
|
|
167
|
+
*/
|
|
168
|
+
const addMs = (d, ms) => new Date(d.getTime() + ms);
|
|
169
|
+
/**
|
|
170
|
+
* Add whole UTC days (exclusive bound helper)
|
|
171
|
+
*
|
|
172
|
+
* @param d - Date
|
|
173
|
+
* @param n - Number of days to add
|
|
174
|
+
* @returns New date
|
|
175
|
+
*/
|
|
176
|
+
const addDaysUtc = (d, n) => new Date(d.getTime() + n * DAY_MS);
|
|
177
|
+
//#endregion
|
|
178
|
+
//#region src/retrySamePromise.ts
|
|
179
|
+
/**
|
|
180
|
+
* Retry a single async operation according to the provided policy.
|
|
181
|
+
* The operation is executed once initially, then up to `maxAttempts` retries.
|
|
182
|
+
*
|
|
183
|
+
* @param op - Operation to run
|
|
184
|
+
* @param policy - Retry policy
|
|
185
|
+
* @param onBackoff - Observer called before each retry (for logging/metrics)
|
|
186
|
+
* @returns Result of the operation if it eventually succeeds
|
|
187
|
+
* @throws The last error encountered if all retries fail
|
|
188
|
+
*/
|
|
189
|
+
async function retrySamePromise(op, policy, onBackoff) {
|
|
190
|
+
let attempt = 0;
|
|
191
|
+
while (true) try {
|
|
192
|
+
return await op();
|
|
193
|
+
} catch (err) {
|
|
194
|
+
attempt += 1;
|
|
195
|
+
const status = getErrorStatus(err);
|
|
196
|
+
const msg = extractErrorMessage(err);
|
|
197
|
+
if (!(attempt <= policy.maxAttempts && policy.shouldRetry(status, msg))) throw err;
|
|
198
|
+
onBackoff(`Retrying after status=${status} attempt=${attempt}/${policy.maxAttempts} — ${msg}`);
|
|
199
|
+
await sleepPromise(policy.delayMs);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
//#endregion
|
|
203
|
+
//#region src/chunkOneCsvFile.ts
|
|
204
|
+
/**
|
|
205
|
+
* Create a CSV writer (fast-csv formatter piped to a write stream) that writes
|
|
206
|
+
* a header line first, and then accepts object rows. Returns a tiny API to
|
|
207
|
+
* write rows with backpressure handling and to close the file cleanly.
|
|
208
|
+
*
|
|
209
|
+
* @param filePath - The path to the output CSV file
|
|
210
|
+
* @param headers - The headers for the CSV file
|
|
211
|
+
* @returns An object with `write` and `end` methods
|
|
212
|
+
*/
|
|
213
|
+
function createCsvChunkWriter(filePath, headers) {
|
|
214
|
+
const ws = createWriteStream(filePath);
|
|
215
|
+
const csv = fastcsv.format({
|
|
216
|
+
headers,
|
|
217
|
+
writeHeaders: true,
|
|
218
|
+
objectMode: true
|
|
219
|
+
});
|
|
220
|
+
csv.pipe(ws);
|
|
221
|
+
return {
|
|
222
|
+
async write(row) {
|
|
223
|
+
if (!csv.write(row)) await once(csv, "drain");
|
|
224
|
+
},
|
|
225
|
+
async end() {
|
|
226
|
+
const finished = Promise.all([once(ws, "finish")]);
|
|
227
|
+
csv.end();
|
|
228
|
+
await finished;
|
|
229
|
+
}
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Zero-pad chunk numbers to four digits (e.g., 1 -> "0001").
|
|
234
|
+
*
|
|
235
|
+
* @param n - The chunk number to pad
|
|
236
|
+
* @returns The padded chunk number as a string
|
|
237
|
+
*/
|
|
238
|
+
function pad4(n) {
|
|
239
|
+
return String(n).padStart(4, "0");
|
|
240
|
+
}
|
|
241
|
+
/**
|
|
242
|
+
* Approximate row size in bytes using comma-joined field values.
|
|
243
|
+
*
|
|
244
|
+
* @param obj - The row object to estimate size for
|
|
245
|
+
* @returns Approximate byte size of the row when serialized as CSV
|
|
246
|
+
*/
|
|
247
|
+
function approxRowBytes(obj) {
|
|
248
|
+
return Buffer.byteLength(Object.values(obj).map((v) => v == null ? "" : String(v)).join(","), "utf8");
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Stream a single CSV file and write chunk files of roughly chunkSizeMB.
|
|
252
|
+
* - Writes header to each chunk.
|
|
253
|
+
* - Logs periodic progress via onProgress.
|
|
254
|
+
*
|
|
255
|
+
* @param opts - Options for chunking the file
|
|
256
|
+
* @returns Promise that resolves when done
|
|
257
|
+
*/
|
|
258
|
+
async function chunkOneCsvFile(opts) {
|
|
259
|
+
const { filePath, outputDir, clearOutputDir, chunkSizeMB, onProgress, reportEveryMs = 500, logger } = opts;
|
|
260
|
+
const { size: fileBytes } = await stat(filePath);
|
|
261
|
+
let lastTick = 0;
|
|
262
|
+
logger.info(`Chunking ${filePath} into ~${chunkSizeMB}MB files...`);
|
|
263
|
+
const chunkSizeBytes = Math.floor(chunkSizeMB * 1024 * 1024);
|
|
264
|
+
const baseName = basename(filePath, ".csv");
|
|
265
|
+
const outDir = outputDir || dirname(filePath);
|
|
266
|
+
logger.info(`Output directory: ${outDir}`);
|
|
267
|
+
await mkdir(outDir, { recursive: true });
|
|
268
|
+
if (clearOutputDir) {
|
|
269
|
+
logger.warn(`Clearing output directory: ${outDir}`);
|
|
270
|
+
const files = await readdir(outDir);
|
|
271
|
+
await Promise.all(files.filter((f) => f.startsWith(`${baseName}_chunk_`) && f.endsWith(".csv")).map((f) => unlink(join(outDir, f))));
|
|
272
|
+
}
|
|
273
|
+
let headerRow = null;
|
|
274
|
+
let expectedCols = null;
|
|
275
|
+
let totalLines = 0;
|
|
276
|
+
let currentChunk = 1;
|
|
277
|
+
let currentSize = 0;
|
|
278
|
+
const parser = new Parser({
|
|
279
|
+
columns: false,
|
|
280
|
+
skip_empty_lines: true
|
|
281
|
+
});
|
|
282
|
+
let sampleBytes = 0;
|
|
283
|
+
let sampleRows = 0;
|
|
284
|
+
const emit = () => {
|
|
285
|
+
const avg = sampleRows > 0 ? sampleBytes / sampleRows : 0;
|
|
286
|
+
const estTotal = avg > 0 ? Math.max(totalLines, Math.ceil(fileBytes / avg)) : void 0;
|
|
287
|
+
onProgress(totalLines, estTotal);
|
|
288
|
+
lastTick = Date.now();
|
|
289
|
+
};
|
|
290
|
+
emit();
|
|
291
|
+
let writer = null;
|
|
292
|
+
const currentChunkPath = () => join(outDir, `${baseName}_chunk_${pad4(currentChunk)}.csv`);
|
|
293
|
+
const t = new Transform({
|
|
294
|
+
objectMode: true,
|
|
295
|
+
async transform(row, _enc, cb) {
|
|
296
|
+
try {
|
|
297
|
+
if (!headerRow) {
|
|
298
|
+
headerRow = row.slice(0);
|
|
299
|
+
expectedCols = headerRow.length;
|
|
300
|
+
writer = createCsvChunkWriter(currentChunkPath(), headerRow);
|
|
301
|
+
cb();
|
|
302
|
+
return;
|
|
303
|
+
}
|
|
304
|
+
if (expectedCols !== null && row.length !== expectedCols) logger.warn(`Row has ${row.length} cols; expected ${expectedCols}`);
|
|
305
|
+
totalLines += 1;
|
|
306
|
+
if (totalLines % 25e4 === 0) onProgress(totalLines);
|
|
307
|
+
const obj = Object.fromEntries(headerRow.map((h, i) => [h, row[i]]));
|
|
308
|
+
const rowBytes = approxRowBytes(obj);
|
|
309
|
+
sampleBytes += rowBytes;
|
|
310
|
+
sampleRows += 1;
|
|
311
|
+
if (Date.now() - lastTick >= reportEveryMs) emit();
|
|
312
|
+
if (writer && currentSize > 0 && currentSize + rowBytes > chunkSizeBytes) {
|
|
313
|
+
await writer.end();
|
|
314
|
+
currentChunk += 1;
|
|
315
|
+
currentSize = 0;
|
|
316
|
+
logger.info(`Rolling to chunk ${currentChunk} after ${totalLines.toLocaleString()} rows.`);
|
|
317
|
+
writer = createCsvChunkWriter(currentChunkPath(), headerRow);
|
|
318
|
+
}
|
|
319
|
+
if (!writer) writer = createCsvChunkWriter(currentChunkPath(), headerRow);
|
|
320
|
+
await writer.write(obj);
|
|
321
|
+
currentSize += rowBytes;
|
|
322
|
+
cb();
|
|
323
|
+
} catch (e) {
|
|
324
|
+
cb(e);
|
|
325
|
+
}
|
|
326
|
+
},
|
|
327
|
+
async flush(cb) {
|
|
328
|
+
try {
|
|
329
|
+
if (writer) {
|
|
330
|
+
await writer.end();
|
|
331
|
+
writer = null;
|
|
332
|
+
}
|
|
333
|
+
emit();
|
|
334
|
+
cb();
|
|
335
|
+
} catch (e) {
|
|
336
|
+
cb(e);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
});
|
|
340
|
+
await pipeline(createReadStream(filePath), parser, t);
|
|
341
|
+
onProgress(totalLines);
|
|
342
|
+
logger.info(`Chunked ${filePath} into ${currentChunk} file(s); processed ${totalLines.toLocaleString()} rows.`);
|
|
343
|
+
}
|
|
344
|
+
//#endregion
|
|
345
|
+
//#region src/bluebird.ts
|
|
346
|
+
/**
|
|
347
|
+
* Concurrent map with configurable concurrency limit.
|
|
348
|
+
* Re-export of Bluebird.map for use across the monorepo.
|
|
349
|
+
*/
|
|
350
|
+
const map = Bluebird.map.bind(Bluebird);
|
|
351
|
+
/**
|
|
352
|
+
* Sequential map (concurrency = 1).
|
|
353
|
+
* Re-export of Bluebird.mapSeries for use across the monorepo.
|
|
354
|
+
*/
|
|
355
|
+
const mapSeries = Bluebird.mapSeries.bind(Bluebird);
|
|
356
|
+
//#endregion
|
|
357
|
+
//#region src/index.ts
|
|
358
|
+
const whitespacePattern = /\s+/g;
|
|
359
|
+
function normalizeDeveloperToolName(value) {
|
|
360
|
+
return value.trim().replace(whitespacePattern, " ");
|
|
361
|
+
}
|
|
362
|
+
function toPackageDisplayName(value) {
|
|
363
|
+
return normalizeDeveloperToolName(value).split(" ").map((segment) => segment.charAt(0).toUpperCase() + segment.slice(1).toLowerCase()).join(" ");
|
|
364
|
+
}
|
|
365
|
+
function toPackageSlug(value) {
|
|
366
|
+
return normalizeDeveloperToolName(value).toLowerCase().replace(whitespacePattern, "-");
|
|
367
|
+
}
|
|
368
|
+
function describePackageName(value) {
|
|
369
|
+
return {
|
|
370
|
+
displayName: toPackageDisplayName(value),
|
|
371
|
+
slug: toPackageSlug(value)
|
|
372
|
+
};
|
|
373
|
+
}
|
|
374
|
+
//#endregion
|
|
375
|
+
export { DAY_MS, FIVE_MIN_MS, HOUR_MS, RateCounter, addDaysUtc, addMs, chunkOneCsvFile, clampPageSize, describePackageName, extractErrorMessage, getErrorStatus, limitRecords, map, mapSeries, normalizeDeveloperToolName, retrySamePromise, sleepPromise, splitInHalf, startOfHour, startOfUtcDay, toPackageDisplayName, toPackageSlug };
|
|
376
|
+
|
|
377
|
+
//# sourceMappingURL=index.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.mjs","names":[],"sources":["../src/splitInHalf.ts","../src/sleepPromise.ts","../src/extractErrorMessage.ts","../src/getErrorStatus.ts","../src/limitRecords.ts","../src/RateCounter.ts","../src/time.ts","../src/retrySamePromise.ts","../src/chunkOneCsvFile.ts","../src/bluebird.ts","../src/index.ts"],"sourcesContent":["/**\n * Split an array roughly in half. Stable for even/odd lengths.\n *\n * @param entries - Items to split\n * @returns A tuple [left, right] halves\n */\nexport function splitInHalf<T>(entries: T[]): [T[], T[]] {\n const mid = Math.floor(entries.length / 2);\n return [entries.slice(0, mid), entries.slice(mid)];\n}\n","/**\n * Sleep in a promise\n *\n * @param sleepTime - The time to sleep in milliseconds.\n * @returns Resolves promise\n */\nexport function sleepPromise(sleepTime: number): Promise<number> {\n return new Promise((resolve) => {\n setTimeout(() => resolve(sleepTime), sleepTime);\n });\n}\n","/**\n * Extract a human-readable error message from a thrown error.\n *\n * Tries to parse JSON bodies that follow common REST/GraphQL error patterns:\n * { error: { message: string } }\n * { errors: [{ message: string }, ...] }\n *\n * Falls back to `err.message` or 'Unknown error'.\n *\n * @param err - Unknown error thrown by network call\n * @returns A concise error string safe to log/show\n */\nexport function extractErrorMessage(err: unknown): string {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n const anyErr = err as any;\n let errorMsg = anyErr?.response?.body || anyErr?.message || 'Unknown error';\n\n // Try to parse as JSON; if not parsable, leave as-is.\n try {\n const parsed = JSON.parse(errorMsg);\n // Typical shapes: errors[], error.errors[], error.message\n const candidates = parsed.errors ||\n parsed.error?.errors || [parsed.error?.message || parsed.error];\n\n const msgs = Array.isArray(candidates) ? candidates : [candidates];\n errorMsg = msgs.filter(Boolean).join(', ');\n } catch {\n // not JSON, ignore\n }\n return errorMsg;\n}\n","/**\n * Extract an HTTP status code from a thrown error (got compatible).\n *\n * @param err - Unknown error thrown by network call\n * @returns HTTP status code, if present\n */\nexport function getErrorStatus(err: unknown): number | undefined {\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n const anyErr = err as any;\n return anyErr?.response?.statusCode ?? anyErr?.response?.status;\n}\n","/**\n * Limits the number of records in the returned object to a maximum.\n * For entries beyond the max, sets their value to `true`.\n *\n * @param obj - Object\n * @param max - Maximum number of entries to retain original value.\n * @returns Object with keys mapped to their value or `true` if over the limit.\n */\nexport function limitRecords<T>(obj: Record<string, T>, max: number): Record<string, T | true> {\n return Object.entries(obj).reduce(\n (acc, [userId, value], i) => {\n acc[userId] = i < max ? value : true;\n return acc;\n },\n {} as Record<string, T | true>,\n );\n}\n","/**\n * Tracks counts over time and calculates rates within a specified time window.\n *\n * This class maintains a rolling window of count \"buckets\" (timestamped values)\n * and provides methods to add new counts and compute the rate of events over a\n * configurable time window.\n *\n * Example usage:\n * ```typescript\n * const counter = new RateCounter();\n * counter.add(5); // Add 5 events\n * const rate = counter.rate(60_000); // Get rate over last 60 seconds\n * ```\n */\nexport class RateCounter {\n private buckets: Array<{\n /** Timestamp of the bucket */\n t: number;\n /** Number of events in the bucket */\n n: number;\n }> = [];\n\n /**\n * Adds a new count to the counter.\n *\n * @param n - The number of events to add to the counter.\n */\n add(n: number): void {\n const now = Date.now();\n this.buckets.push({ t: now, n });\n // keep last 2 minutes of buckets\n const cutoff = now - 120_000;\n while (this.buckets.length > 0 && this.buckets[0]!.t < cutoff) {\n this.buckets.shift();\n }\n }\n\n /**\n * rate over the last `windowMs` milliseconds\n *\n * @param windowMs - The time window in milliseconds to calculate the rate over.\n * @returns The average rate of events per second over the specified time window.\n */\n rate(windowMs: number): number {\n const now = Date.now();\n const cutoff = now - windowMs;\n let sum = 0;\n for (let i = this.buckets.length - 1; i >= 0; i -= 1) {\n const b = this.buckets[i]!;\n if (b.t < cutoff) break;\n sum += b.n;\n }\n return sum / (windowMs / 1000);\n }\n}\n","export const DAY_MS: number = 24 * 60 * 60 * 1000;\nexport const HOUR_MS: number = 60 * 60 * 1000;\nexport const FIVE_MIN_MS: number = 5 * 60 * 1000;\n\n/**\n * Clamp 1..50 per API spec\n *\n * @param n - Number\n * @returns Clamped number\n */\nexport const clampPageSize = (n?: number): number => Math.max(1, Math.min(50, n ?? 50));\n\n/**\n * TRUE UTC day start (00:00:00Z)\n *\n * @param d - Date\n * @returns Day start\n */\nexport const startOfUtcDay = (d: Date): Date =>\n new Date(Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate()));\n\n/**\n * TRUE UTC hour start (HH:00:00Z)\n *\n * @param d - Date\n * @returns Hour start\n */\nexport const startOfHour = (d: Date): Date =>\n new Date(Date.UTC(d.getUTCFullYear(), d.getUTCMonth(), d.getUTCDate(), d.getUTCHours()));\n\n/**\n * Add ms safely\n *\n * @param d - Date\n * @param ms - Milliseconds to add\n * @returns New date\n */\nexport const addMs = (d: Date, ms: number): Date => new Date(d.getTime() + ms);\n\n/**\n * Add whole UTC days (exclusive bound helper)\n *\n * @param d - Date\n * @param n - Number of days to add\n * @returns New date\n */\nexport const addDaysUtc = (d: Date, n: number): Date => new Date(d.getTime() + n * DAY_MS);\n","import { extractErrorMessage } from './extractErrorMessage.js';\nimport { getErrorStatus } from './getErrorStatus.js';\nimport { sleepPromise } from './sleepPromise.js';\n\nexport interface RetryPolicy {\n /** Maximum retry attempts (not counting the initial try) */\n maxAttempts: number;\n /** Fixed delay between attempts in milliseconds */\n delayMs: number;\n /**\n * Decide whether a given error should be retried.\n *\n * @param status - HTTP status code (if known)\n * @param message - Extracted error message (if known)\n */\n shouldRetry(status?: number, message?: string): boolean;\n}\n\n/**\n * Retry a single async operation according to the provided policy.\n * The operation is executed once initially, then up to `maxAttempts` retries.\n *\n * @param op - Operation to run\n * @param policy - Retry policy\n * @param onBackoff - Observer called before each retry (for logging/metrics)\n * @returns Result of the operation if it eventually succeeds\n * @throws The last error encountered if all retries fail\n */\nexport async function retrySamePromise<T>(\n op: () => Promise<T>,\n policy: RetryPolicy,\n onBackoff: (note: string) => void,\n): Promise<T> {\n let attempt = 0;\n\n // eslint-disable-next-line no-constant-condition\n while (true) {\n try {\n // First pass and any subsequent retries run the same op.\n return await op();\n } catch (err) {\n attempt += 1;\n\n // Use shared helpers for status + human-readable message extraction\n const status = getErrorStatus(err);\n const msg = extractErrorMessage(err);\n\n const canRetry = attempt <= policy.maxAttempts && policy.shouldRetry(status, msg);\n\n if (!canRetry) {\n // Surface the final error to the caller, which may then split/fail.\n throw err;\n }\n\n onBackoff(\n `Retrying after status=${status} attempt=${attempt}/${policy.maxAttempts} — ${msg}`,\n );\n await sleepPromise(policy.delayMs);\n // Loop to retry\n }\n }\n}\n","import { once } from 'node:events';\nimport { createReadStream, createWriteStream } from 'node:fs';\nimport { mkdir, readdir, unlink, stat } from 'node:fs/promises';\nimport { basename, dirname, join } from 'node:path';\nimport { Transform } from 'node:stream';\nimport { pipeline } from 'node:stream/promises';\n\nimport { Parser } from 'csv-parse';\nimport * as fastcsv from 'fast-csv';\n\nimport type { Logger } from './logger.js';\n\n/**\n * Options for chunking a single CSV file\n */\nexport type ChunkOpts = {\n /** Path to the CSV file to chunk */\n filePath: string;\n /** Output directory for chunk files; defaults to the same directory as the input file */\n outputDir?: string;\n /** Clear output directory before starting */\n clearOutputDir: boolean;\n /** Chunk size in MB */\n chunkSizeMB: number;\n /** Optional report interval in milliseconds for progress updates */\n reportEveryMs?: number;\n /** Callback for progress updates */\n onProgress: (processed: number, total?: number) => void;\n /** Logger instance (required — no default; callers must inject) */\n logger: Logger;\n};\n\n/**\n * Create a CSV writer (fast-csv formatter piped to a write stream) that writes\n * a header line first, and then accepts object rows. Returns a tiny API to\n * write rows with backpressure handling and to close the file cleanly.\n *\n * @param filePath - The path to the output CSV file\n * @param headers - The headers for the CSV file\n * @returns An object with `write` and `end` methods\n */\nfunction createCsvChunkWriter(\n filePath: string,\n headers: string[],\n): {\n /** Write a row object to the CSV file */\n write: (row: Record<string, unknown>) => Promise<void>;\n /** Close the CSV file, ensuring all data is flushed */\n end: () => Promise<void>;\n} {\n const ws = createWriteStream(filePath);\n const csv = fastcsv.format({ headers, writeHeaders: true, objectMode: true });\n // Pipe csv → file stream\n csv.pipe(ws);\n\n return {\n /**\n * Write a row object to the CSV file.\n *\n * @param row - The row data as an object\n */\n async write(row) {\n // Respect backpressure from fast-csv formatter\n const ok = csv.write(row);\n if (!ok) {\n await once(csv, 'drain');\n }\n },\n /**\n * Close the CSV file, ensuring all data is flushed.\n */\n async end() {\n // End formatter; wait for underlying file stream to finish flush/close\n const finished = Promise.all([once(ws, 'finish')]);\n csv.end();\n await finished;\n },\n };\n}\n\n/**\n * Zero-pad chunk numbers to four digits (e.g., 1 -> \"0001\").\n *\n * @param n - The chunk number to pad\n * @returns The padded chunk number as a string\n */\nfunction pad4(n: number): string {\n return String(n).padStart(4, '0');\n}\n\n/**\n * Approximate row size in bytes using comma-joined field values.\n *\n * @param obj - The row object to estimate size for\n * @returns Approximate byte size of the row when serialized as CSV\n */\nfunction approxRowBytes(obj: Record<string, unknown>): number {\n // naive but fast; adequate for chunk rollover thresholding\n return Buffer.byteLength(\n Object.values(obj)\n .map((v) => (v == null ? '' : String(v)))\n .join(','),\n 'utf8',\n );\n}\n\n/**\n * Stream a single CSV file and write chunk files of roughly chunkSizeMB.\n * - Writes header to each chunk.\n * - Logs periodic progress via onProgress.\n *\n * @param opts - Options for chunking the file\n * @returns Promise that resolves when done\n */\nexport async function chunkOneCsvFile(opts: ChunkOpts): Promise<void> {\n const {\n filePath,\n outputDir,\n clearOutputDir,\n chunkSizeMB,\n onProgress,\n reportEveryMs = 500,\n logger,\n } = opts;\n const { size: fileBytes } = await stat(filePath); // total bytes on disk\n let lastTick = 0;\n\n logger.info(`Chunking ${filePath} into ~${chunkSizeMB}MB files...`);\n\n const chunkSizeBytes = Math.floor(chunkSizeMB * 1024 * 1024);\n const baseName = basename(filePath, '.csv');\n const outDir = outputDir || dirname(filePath);\n logger.info(`Output directory: ${outDir}`);\n await mkdir(outDir, { recursive: true });\n\n // Clear previous chunk files for this base\n if (clearOutputDir) {\n logger.warn(`Clearing output directory: ${outDir}`);\n const files = await readdir(outDir);\n await Promise.all(\n files\n .filter((f) => f.startsWith(`${baseName}_chunk_`) && f.endsWith('.csv'))\n .map((f) => unlink(join(outDir, f))),\n );\n }\n\n let headerRow: string[] | null = null;\n let expectedCols: number | null = null;\n let totalLines = 0;\n let currentChunk = 1;\n let currentSize = 0;\n\n const parser = new Parser({\n columns: false,\n skip_empty_lines: true,\n });\n\n // running sample to estimate avg row bytes\n let sampleBytes = 0;\n let sampleRows = 0;\n\n const emit = (): void => {\n const avg = sampleRows > 0 ? sampleBytes / sampleRows : 0;\n const estTotal = avg > 0 ? Math.max(totalLines, Math.ceil(fileBytes / avg)) : undefined;\n onProgress(totalLines, estTotal);\n lastTick = Date.now();\n };\n\n // seed an initial 0/N as soon as we start\n emit();\n\n // Current active chunk writer; created after we know headers\n let writer: {\n /** Write a row object to the current chunk file */\n write: (row: Record<string, unknown>) => Promise<void>;\n /** Close the current chunk file */\n end: () => Promise<void>;\n } | null = null;\n\n // Returns current chunk file path -- chunk number is always 4-digit padded\n const currentChunkPath = (): string =>\n join(outDir, `${baseName}_chunk_${pad4(currentChunk)}.csv`);\n\n const t = new Transform({\n objectMode: true,\n /**\n * Transform each row of the CSV file into a chunk.\n *\n * @param row - The current row being processed\n * @param _enc - Encoding (not used)\n * @param cb - Callback to signal completion or error\n */\n async transform(row: string[], _enc, cb) {\n try {\n // First row is the header\n if (!headerRow) {\n headerRow = row.slice(0);\n expectedCols = headerRow.length;\n\n // Open first chunk with header asynchronously\n writer = createCsvChunkWriter(currentChunkPath(), headerRow);\n cb();\n return;\n }\n\n // sanity check rows (non-fatal)\n if (expectedCols !== null && row.length !== expectedCols) {\n logger.warn(`Row has ${row.length} cols; expected ${expectedCols}`);\n }\n\n totalLines += 1;\n if (totalLines % 250_000 === 0) {\n onProgress(totalLines);\n }\n\n // Build row object using the original header\n const obj = Object.fromEntries(headerRow!.map((h, i) => [h, row[i]]));\n\n // Determine the row size up-front\n const rowBytes = approxRowBytes(obj);\n sampleBytes += rowBytes;\n sampleRows += 1;\n\n // time-based throttle for UI updates\n if (Date.now() - lastTick >= reportEveryMs) emit();\n\n // If adding this row would exceed the threshold, roll first,\n // so this row becomes the first row in the next chunk.\n if (writer && currentSize > 0 && currentSize + rowBytes > chunkSizeBytes) {\n await writer.end();\n currentChunk += 1;\n currentSize = 0;\n logger.info(\n `Rolling to chunk ${currentChunk} after ${totalLines.toLocaleString()} rows.`,\n );\n writer = createCsvChunkWriter(currentChunkPath(), headerRow!);\n }\n\n // Ensure writer exists (should after header)\n if (!writer) {\n writer = createCsvChunkWriter(currentChunkPath(), headerRow!);\n }\n\n // Write row and update approximate size\n await writer.write(obj);\n currentSize += rowBytes;\n\n cb();\n } catch (e) {\n cb(e as Error);\n }\n },\n\n // Ensure final file is closed\n /**\n * Flush is called when the readable has ended; we close any open writer.\n *\n * @param cb - Callback to signal completion or error\n */\n async flush(cb) {\n try {\n if (writer) {\n await writer.end();\n writer = null;\n }\n emit(); // Final progress tick\n cb();\n } catch (e) {\n cb(e as Error);\n }\n },\n });\n\n const rs = createReadStream(filePath);\n await pipeline(rs, parser, t);\n\n // Final progress tick\n onProgress(totalLines);\n logger.info(\n `Chunked ${filePath} into ${currentChunk} file(s); processed ${totalLines.toLocaleString()} rows.`,\n );\n}\n","import Bluebird from 'bluebird';\n\n/**\n * Concurrent map with configurable concurrency limit.\n * Re-export of Bluebird.map for use across the monorepo.\n */\nexport const map: typeof Bluebird.map = Bluebird.map.bind(Bluebird);\n\n/**\n * Sequential map (concurrency = 1).\n * Re-export of Bluebird.mapSeries for use across the monorepo.\n */\nexport const mapSeries: typeof Bluebird.mapSeries = Bluebird.mapSeries.bind(Bluebird);\n","const whitespacePattern = /\\s+/g;\n\nexport interface DeveloperToolNameParts {\n displayName: string;\n slug: string;\n}\n\nexport function normalizeDeveloperToolName(value: string): string {\n return value.trim().replace(whitespacePattern, ' ');\n}\n\nexport function toPackageDisplayName(value: string): string {\n return normalizeDeveloperToolName(value)\n .split(' ')\n .map((segment) => segment.charAt(0).toUpperCase() + segment.slice(1).toLowerCase())\n .join(' ');\n}\n\nexport function toPackageSlug(value: string): string {\n return normalizeDeveloperToolName(value).toLowerCase().replace(whitespacePattern, '-');\n}\n\nexport function describePackageName(value: string): DeveloperToolNameParts {\n return {\n displayName: toPackageDisplayName(value),\n slug: toPackageSlug(value),\n };\n}\n\nexport * from './logger.js';\nexport * from './splitInHalf.js';\nexport * from './sleepPromise.js';\nexport * from './extractErrorMessage.js';\nexport * from './getErrorStatus.js';\nexport * from './limitRecords.js';\nexport * from './RateCounter.js';\nexport * from './time.js';\nexport * from './retrySamePromise.js';\nexport * from './chunkOneCsvFile.js';\nexport * from './bluebird.js';\n"],"mappings":";;;;;;;;;;;;;;;;AAMA,SAAgB,YAAe,SAA0B;CACvD,MAAM,MAAM,KAAK,MAAM,QAAQ,SAAS,EAAE;AAC1C,QAAO,CAAC,QAAQ,MAAM,GAAG,IAAI,EAAE,QAAQ,MAAM,IAAI,CAAC;;;;;;;;;;ACFpD,SAAgB,aAAa,WAAoC;AAC/D,QAAO,IAAI,SAAS,YAAY;AAC9B,mBAAiB,QAAQ,UAAU,EAAE,UAAU;GAC/C;;;;;;;;;;;;;;;;ACGJ,SAAgB,oBAAoB,KAAsB;CAExD,MAAM,SAAS;CACf,IAAI,WAAW,QAAQ,UAAU,QAAQ,QAAQ,WAAW;AAG5D,KAAI;EACF,MAAM,SAAS,KAAK,MAAM,SAAS;EAEnC,MAAM,aAAa,OAAO,UACxB,OAAO,OAAO,UAAU,CAAC,OAAO,OAAO,WAAW,OAAO,MAAM;AAGjE,cADa,MAAM,QAAQ,WAAW,GAAG,aAAa,CAAC,WAAW,EAClD,OAAO,QAAQ,CAAC,KAAK,KAAK;SACpC;AAGR,QAAO;;;;;;;;;;ACvBT,SAAgB,eAAe,KAAkC;CAE/D,MAAM,SAAS;AACf,QAAO,QAAQ,UAAU,cAAc,QAAQ,UAAU;;;;;;;;;;;;ACD3D,SAAgB,aAAgB,KAAwB,KAAuC;AAC7F,QAAO,OAAO,QAAQ,IAAI,CAAC,QACxB,KAAK,CAAC,QAAQ,QAAQ,MAAM;AAC3B,MAAI,UAAU,IAAI,MAAM,QAAQ;AAChC,SAAO;IAET,EAAE,CACH;;;;;;;;;;;;;;;;;;ACDH,IAAa,cAAb,MAAyB;CACvB,UAKK,EAAE;;;;;;CAOP,IAAI,GAAiB;EACnB,MAAM,MAAM,KAAK,KAAK;AACtB,OAAK,QAAQ,KAAK;GAAE,GAAG;GAAK;GAAG,CAAC;EAEhC,MAAM,SAAS,MAAM;AACrB,SAAO,KAAK,QAAQ,SAAS,KAAK,KAAK,QAAQ,GAAI,IAAI,OACrD,MAAK,QAAQ,OAAO;;;;;;;;CAUxB,KAAK,UAA0B;EAE7B,MAAM,SADM,KAAK,KAAK,GACD;EACrB,IAAI,MAAM;AACV,OAAK,IAAI,IAAI,KAAK,QAAQ,SAAS,GAAG,KAAK,GAAG,KAAK,GAAG;GACpD,MAAM,IAAI,KAAK,QAAQ;AACvB,OAAI,EAAE,IAAI,OAAQ;AAClB,UAAO,EAAE;;AAEX,SAAO,OAAO,WAAW;;;;;ACpD7B,MAAa,SAAiB,OAAU,KAAK;AAC7C,MAAa,UAAkB,OAAU;AACzC,MAAa,cAAsB,MAAS;;;;;;;AAQ5C,MAAa,iBAAiB,MAAuB,KAAK,IAAI,GAAG,KAAK,IAAI,IAAI,KAAK,GAAG,CAAC;;;;;;;AAQvF,MAAa,iBAAiB,MAC5B,IAAI,KAAK,KAAK,IAAI,EAAE,gBAAgB,EAAE,EAAE,aAAa,EAAE,EAAE,YAAY,CAAC,CAAC;;;;;;;AAQzE,MAAa,eAAe,MAC1B,IAAI,KAAK,KAAK,IAAI,EAAE,gBAAgB,EAAE,EAAE,aAAa,EAAE,EAAE,YAAY,EAAE,EAAE,aAAa,CAAC,CAAC;;;;;;;;AAS1F,MAAa,SAAS,GAAS,OAAqB,IAAI,KAAK,EAAE,SAAS,GAAG,GAAG;;;;;;;;AAS9E,MAAa,cAAc,GAAS,MAAoB,IAAI,KAAK,EAAE,SAAS,GAAG,IAAI,OAAO;;;;;;;;;;;;;AClB1F,eAAsB,iBACpB,IACA,QACA,WACY;CACZ,IAAI,UAAU;AAGd,QAAO,KACL,KAAI;AAEF,SAAO,MAAM,IAAI;UACV,KAAK;AACZ,aAAW;EAGX,MAAM,SAAS,eAAe,IAAI;EAClC,MAAM,MAAM,oBAAoB,IAAI;AAIpC,MAAI,EAFa,WAAW,OAAO,eAAe,OAAO,YAAY,QAAQ,IAAI,EAI/E,OAAM;AAGR,YACE,yBAAyB,OAAO,WAAW,QAAQ,GAAG,OAAO,YAAY,KAAK,MAC/E;AACD,QAAM,aAAa,OAAO,QAAQ;;;;;;;;;;;;;;AChBxC,SAAS,qBACP,UACA,SAMA;CACA,MAAM,KAAK,kBAAkB,SAAS;CACtC,MAAM,MAAM,QAAQ,OAAO;EAAE;EAAS,cAAc;EAAM,YAAY;EAAM,CAAC;AAE7E,KAAI,KAAK,GAAG;AAEZ,QAAO;EAML,MAAM,MAAM,KAAK;AAGf,OAAI,CADO,IAAI,MAAM,IAAI,CAEvB,OAAM,KAAK,KAAK,QAAQ;;EAM5B,MAAM,MAAM;GAEV,MAAM,WAAW,QAAQ,IAAI,CAAC,KAAK,IAAI,SAAS,CAAC,CAAC;AAClD,OAAI,KAAK;AACT,SAAM;;EAET;;;;;;;;AASH,SAAS,KAAK,GAAmB;AAC/B,QAAO,OAAO,EAAE,CAAC,SAAS,GAAG,IAAI;;;;;;;;AASnC,SAAS,eAAe,KAAsC;AAE5D,QAAO,OAAO,WACZ,OAAO,OAAO,IAAI,CACf,KAAK,MAAO,KAAK,OAAO,KAAK,OAAO,EAAE,CAAE,CACxC,KAAK,IAAI,EACZ,OACD;;;;;;;;;;AAWH,eAAsB,gBAAgB,MAAgC;CACpE,MAAM,EACJ,UACA,WACA,gBACA,aACA,YACA,gBAAgB,KAChB,WACE;CACJ,MAAM,EAAE,MAAM,cAAc,MAAM,KAAK,SAAS;CAChD,IAAI,WAAW;AAEf,QAAO,KAAK,YAAY,SAAS,SAAS,YAAY,aAAa;CAEnE,MAAM,iBAAiB,KAAK,MAAM,cAAc,OAAO,KAAK;CAC5D,MAAM,WAAW,SAAS,UAAU,OAAO;CAC3C,MAAM,SAAS,aAAa,QAAQ,SAAS;AAC7C,QAAO,KAAK,qBAAqB,SAAS;AAC1C,OAAM,MAAM,QAAQ,EAAE,WAAW,MAAM,CAAC;AAGxC,KAAI,gBAAgB;AAClB,SAAO,KAAK,8BAA8B,SAAS;EACnD,MAAM,QAAQ,MAAM,QAAQ,OAAO;AACnC,QAAM,QAAQ,IACZ,MACG,QAAQ,MAAM,EAAE,WAAW,GAAG,SAAS,SAAS,IAAI,EAAE,SAAS,OAAO,CAAC,CACvE,KAAK,MAAM,OAAO,KAAK,QAAQ,EAAE,CAAC,CAAC,CACvC;;CAGH,IAAI,YAA6B;CACjC,IAAI,eAA8B;CAClC,IAAI,aAAa;CACjB,IAAI,eAAe;CACnB,IAAI,cAAc;CAElB,MAAM,SAAS,IAAI,OAAO;EACxB,SAAS;EACT,kBAAkB;EACnB,CAAC;CAGF,IAAI,cAAc;CAClB,IAAI,aAAa;CAEjB,MAAM,aAAmB;EACvB,MAAM,MAAM,aAAa,IAAI,cAAc,aAAa;EACxD,MAAM,WAAW,MAAM,IAAI,KAAK,IAAI,YAAY,KAAK,KAAK,YAAY,IAAI,CAAC,GAAG,KAAA;AAC9E,aAAW,YAAY,SAAS;AAChC,aAAW,KAAK,KAAK;;AAIvB,OAAM;CAGN,IAAI,SAKO;CAGX,MAAM,yBACJ,KAAK,QAAQ,GAAG,SAAS,SAAS,KAAK,aAAa,CAAC,MAAM;CAE7D,MAAM,IAAI,IAAI,UAAU;EACtB,YAAY;EAQZ,MAAM,UAAU,KAAe,MAAM,IAAI;AACvC,OAAI;AAEF,QAAI,CAAC,WAAW;AACd,iBAAY,IAAI,MAAM,EAAE;AACxB,oBAAe,UAAU;AAGzB,cAAS,qBAAqB,kBAAkB,EAAE,UAAU;AAC5D,SAAI;AACJ;;AAIF,QAAI,iBAAiB,QAAQ,IAAI,WAAW,aAC1C,QAAO,KAAK,WAAW,IAAI,OAAO,kBAAkB,eAAe;AAGrE,kBAAc;AACd,QAAI,aAAa,SAAY,EAC3B,YAAW,WAAW;IAIxB,MAAM,MAAM,OAAO,YAAY,UAAW,KAAK,GAAG,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC;IAGrE,MAAM,WAAW,eAAe,IAAI;AACpC,mBAAe;AACf,kBAAc;AAGd,QAAI,KAAK,KAAK,GAAG,YAAY,cAAe,OAAM;AAIlD,QAAI,UAAU,cAAc,KAAK,cAAc,WAAW,gBAAgB;AACxE,WAAM,OAAO,KAAK;AAClB,qBAAgB;AAChB,mBAAc;AACd,YAAO,KACL,oBAAoB,aAAa,SAAS,WAAW,gBAAgB,CAAC,QACvE;AACD,cAAS,qBAAqB,kBAAkB,EAAE,UAAW;;AAI/D,QAAI,CAAC,OACH,UAAS,qBAAqB,kBAAkB,EAAE,UAAW;AAI/D,UAAM,OAAO,MAAM,IAAI;AACvB,mBAAe;AAEf,QAAI;YACG,GAAG;AACV,OAAG,EAAW;;;EAUlB,MAAM,MAAM,IAAI;AACd,OAAI;AACF,QAAI,QAAQ;AACV,WAAM,OAAO,KAAK;AAClB,cAAS;;AAEX,UAAM;AACN,QAAI;YACG,GAAG;AACV,OAAG,EAAW;;;EAGnB,CAAC;AAGF,OAAM,SADK,iBAAiB,SAAS,EAClB,QAAQ,EAAE;AAG7B,YAAW,WAAW;AACtB,QAAO,KACL,WAAW,SAAS,QAAQ,aAAa,sBAAsB,WAAW,gBAAgB,CAAC,QAC5F;;;;;;;;AClRH,MAAa,MAA2B,SAAS,IAAI,KAAK,SAAS;;;;;AAMnE,MAAa,YAAuC,SAAS,UAAU,KAAK,SAAS;;;ACZrF,MAAM,oBAAoB;AAO1B,SAAgB,2BAA2B,OAAuB;AAChE,QAAO,MAAM,MAAM,CAAC,QAAQ,mBAAmB,IAAI;;AAGrD,SAAgB,qBAAqB,OAAuB;AAC1D,QAAO,2BAA2B,MAAM,CACrC,MAAM,IAAI,CACV,KAAK,YAAY,QAAQ,OAAO,EAAE,CAAC,aAAa,GAAG,QAAQ,MAAM,EAAE,CAAC,aAAa,CAAC,CAClF,KAAK,IAAI;;AAGd,SAAgB,cAAc,OAAuB;AACnD,QAAO,2BAA2B,MAAM,CAAC,aAAa,CAAC,QAAQ,mBAAmB,IAAI;;AAGxF,SAAgB,oBAAoB,OAAuC;AACzE,QAAO;EACL,aAAa,qBAAqB,MAAM;EACxC,MAAM,cAAc,MAAM;EAC3B"}
|
package/package.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@transcend-io/utils",
|
|
3
|
+
"version": "0.0.0",
|
|
4
|
+
"description": "Shared utilities for the Transcend developer tools monorepo.",
|
|
5
|
+
"license": "Apache-2.0",
|
|
6
|
+
"files": [
|
|
7
|
+
"dist"
|
|
8
|
+
],
|
|
9
|
+
"type": "module",
|
|
10
|
+
"sideEffects": false,
|
|
11
|
+
"types": "./dist/index.d.mts",
|
|
12
|
+
"exports": {
|
|
13
|
+
".": {
|
|
14
|
+
"@transcend-io/source": "./src/index.ts",
|
|
15
|
+
"types": "./dist/index.d.mts",
|
|
16
|
+
"default": "./dist/index.mjs"
|
|
17
|
+
}
|
|
18
|
+
},
|
|
19
|
+
"publishConfig": {
|
|
20
|
+
"access": "public"
|
|
21
|
+
},
|
|
22
|
+
"scripts": {
|
|
23
|
+
"build": "tsdown",
|
|
24
|
+
"typecheck": "tsc -p tsconfig.json --noEmit",
|
|
25
|
+
"test": "vitest run",
|
|
26
|
+
"check-exports": "attw --pack . --ignore-rules cjs-resolves-to-esm"
|
|
27
|
+
},
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"bluebird": "^3.7.2",
|
|
30
|
+
"csv-parse": "^5.6.0",
|
|
31
|
+
"fast-csv": "^4.3.6"
|
|
32
|
+
},
|
|
33
|
+
"devDependencies": {
|
|
34
|
+
"@arethetypeswrong/cli": "catalog:",
|
|
35
|
+
"@types/bluebird": "^3.5.38",
|
|
36
|
+
"@types/node": "catalog:",
|
|
37
|
+
"tsdown": "catalog:",
|
|
38
|
+
"typescript": "catalog:",
|
|
39
|
+
"vitest": "catalog:"
|
|
40
|
+
},
|
|
41
|
+
"engines": {
|
|
42
|
+
"node": ">=22.0.0"
|
|
43
|
+
}
|
|
44
|
+
}
|