s3-querier 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "s3-querier",
3
- "version": "1.0.0",
3
+ "version": "1.0.2",
4
4
  "description": "Query S3-compatible storage with DuckDB and SQL",
5
5
  "type": "module",
6
6
  "main": "src/s3-querier.js",
package/src/s3/s3.js CHANGED
@@ -3,7 +3,7 @@ import { dirname } from 'node:path';
3
3
  import { S3Client, ListObjectsV2Command, GetObjectCommand } from '@aws-sdk/client-s3';
4
4
 
5
5
  import { logger } from '../utils/logger.js';
6
- import { datesInRange, hoursInRange, buildPath } from '../utils/file-path-builder/file-path-builder.js';
6
+ import { datesInRange, hoursInRange, monthsInRange, buildPath } from '../utils/file-path-builder/file-path-builder.js';
7
7
  import { regexFromPattern } from '../utils/date-regex/date-regex.js';
8
8
  import { buildIbmIamClient } from './auth/ibm-iam-client.js';
9
9
 
@@ -139,12 +139,14 @@ export default class S3 {
139
139
  * @returns {(from:Date, to:Date, filePattern:string) => string[]} A function that creates a list of prefixes
140
140
  */
141
141
  prefixStrategy(from, to, filePattern) {
142
- const hasDateToken = filePattern.match(/\{(yyyy|MM|dd|hh|mm)\}/g);
143
- const hasGlob = filePattern.match(/\*/g);
142
+ const hasDayToken = /\{(dd|hh|mm)\}/.test(filePattern);
143
+ const hasMonthToken = /\{(yyyy|MM)\}/.test(filePattern);
144
+ const hasGlob = filePattern.includes('*');
144
145
  const hourDiff = (new Date(to) - new Date(from)) / 1000 / 60 / 60;
145
146
 
146
- if (hasDateToken && hourDiff < 24) return this.prefixHours;
147
- if (hasDateToken) return this.prefixDays;
147
+ if (hasDayToken && hourDiff < 24) return this.prefixHours;
148
+ if (hasDayToken) return this.prefixDays;
149
+ if (hasMonthToken) return this.prefixMonths;
148
150
  if (hasGlob) return this.prefixGlob;
149
151
  return (_from, _to, pattern) => [pattern];
150
152
  }
@@ -182,6 +184,22 @@ export default class S3 {
182
184
  });
183
185
  }
184
186
 
187
+ /**
188
+ * Returns a list of prefixes based on a range of months, one per calendar month.
189
+ * Used for Hive-style paths with {yyyy}/{MM} tokens but no {dd}.
190
+ *
191
+ * @param {Date} from From date
192
+ * @param {Date} to To date
193
+ * @param {string} filePattern The file pattern to use
194
+ * @returns {string[]} The list of prefixes for filtering
195
+ */
196
+ prefixMonths(from, to, filePattern) {
197
+ const monthRange = monthsInRange(new Date(from), new Date(to));
198
+ const splitToken = filePattern.includes('{MM}') ? '{MM}' : '{yyyy}';
199
+ const [trimmed] = filePattern.split(splitToken);
200
+ return monthRange.map((date) => buildPath(`${trimmed}${splitToken}`, date));
201
+ }
202
+
185
203
  /**
186
204
  * Returns a single entry array with a file pattern trimmed to the first glob
187
205
  *
@@ -368,17 +386,20 @@ export default class S3 {
368
386
  */
369
387
  async objectToFile(key) {
370
388
  const file = `${this.mount}/${key}`;
389
+ const tmp = `${file}.${process.pid}.${Date.now()}.tmp`;
371
390
  try {
372
391
  const response = await this.s3.send(new GetObjectCommand({ Bucket: this.bucket, Key: key }));
373
392
  const chunks = [];
374
393
  for await (const chunk of response.Body) {
375
394
  chunks.push(chunk);
376
395
  }
377
- await fsPromise.writeFile(file, Buffer.concat(chunks));
396
+ await fsPromise.writeFile(tmp, Buffer.concat(chunks));
397
+ await fsPromise.rename(tmp, file);
378
398
  await this.processFile(file);
379
399
  return file;
380
400
  } catch (error) {
381
401
  logger.error(`${error.$metadata?.httpStatusCode ?? error.statusCode} - ${file}`);
402
+ await fsPromise.unlink(tmp).catch(() => {});
382
403
  throw error;
383
404
  }
384
405
  }
@@ -419,7 +440,7 @@ export default class S3 {
419
440
  }
420
441
  }
421
442
 
422
- function buildS3Client({ apiKey, accessKeyId, secretAccessKey, endpoint, region }) {
443
+ export function buildS3Client({ apiKey, accessKeyId, secretAccessKey, endpoint, region = 'us-east-1' }) {
423
444
  const config = { ...(endpoint && { endpoint }), region, forcePathStyle: true };
424
445
 
425
446
  if (apiKey) return buildIbmIamClient(config, apiKey);
@@ -28,6 +28,22 @@ export function datesInRange(from, to) {
28
28
  });
29
29
  }
30
30
 
31
+ /**
32
+ * Given a date range returns one date object per calendar month.
33
+ * Returns noon-UTC dates so that getMonth()/getFullYear() produce the correct
34
+ * UTC month in any timezone.
35
+ *
36
+ * @param {Date} from The from Date object
37
+ * @param {Date} to The to Date object
38
+ * @returns {Date[]} An array of dates, one per month within the range
39
+ */
40
+ export function monthsInRange(from, to) {
41
+ const startYear = from.getUTCFullYear();
42
+ const startMonth = from.getUTCMonth();
43
+ const count = (to.getUTCFullYear() - startYear) * 12 + (to.getUTCMonth() - startMonth) + 1;
44
+ return Array.from({ length: count }, (_, index) => noonUtcForMonthOffset(startYear, startMonth, index));
45
+ }
46
+
31
47
  /**
32
48
  * Given a date range returns an array of date objects
33
49
  *
@@ -53,3 +69,9 @@ export function zeroDateMins(date) {
53
69
  zeroedDate.setMinutes(0, 0);
54
70
  return zeroedDate;
55
71
  }
72
+
73
+ function noonUtcForMonthOffset(startYear, startMonth, offset) {
74
+ const year = startYear + Math.floor((startMonth + offset) / 12);
75
+ const month = (startMonth + offset) % 12;
76
+ return new Date(Date.UTC(year, month, 1, 12, 0, 0));
77
+ }