s3-querier 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/plugins/query-finalizer/query-finalizer.js +49 -22
- package/src/s3/s3.js +1 -1
- package/src/s3-querier.js +41 -10
package/package.json
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { regexFromPattern } from '../../utils/date-regex/date-regex.js';
|
|
2
2
|
import {
|
|
3
3
|
removeFileSettingTokens,
|
|
4
4
|
removeDoubleFwdSlash,
|
|
@@ -9,33 +9,60 @@ export default class QueryFinalizerPlugin {
|
|
|
9
9
|
name = 'CorePlugin';
|
|
10
10
|
|
|
11
11
|
processQuery(context) {
|
|
12
|
-
|
|
13
|
-
const processedQuery = QueryFinalizerPlugin.prepareQuery(settings, bucketsDir, query);
|
|
14
|
-
return { ...context, query: processedQuery };
|
|
12
|
+
return context;
|
|
15
13
|
}
|
|
16
14
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
15
|
+
/**
|
|
16
|
+
* Replaces each SQL file reference with the exact local paths downloaded from S3.
|
|
17
|
+
* Called after all downloads complete so that DuckDB receives precise file paths
|
|
18
|
+
* rather than glob patterns that would scan the entire local cache.
|
|
19
|
+
*
|
|
20
|
+
* @param {string} rawQuery - SQL with original file references and date/location tokens
|
|
21
|
+
* @param {object[]} fileSettings - Pre-merge per-file settings from processQuery
|
|
22
|
+
* @param {string[]} downloadedPaths - Absolute local paths of all downloaded files
|
|
23
|
+
* @param {string} bucketsDir - Root directory where files are cached locally
|
|
24
|
+
* @returns {string} Finalized SQL ready for DuckDB execution
|
|
25
|
+
*/
|
|
26
|
+
finalizeQuery(rawQuery, fileSettings, downloadedPaths, bucketsDir) {
|
|
27
|
+
let prepared = fileSettings.reduce(
|
|
28
|
+
(query, setting) => applyFileSetting(query, setting, downloadedPaths, bucketsDir),
|
|
29
|
+
rawQuery,
|
|
30
|
+
);
|
|
25
31
|
prepared = removeFileSettingTokens(prepared);
|
|
26
|
-
prepared = removeFileDatePatterns(prepared);
|
|
27
32
|
prepared = removeCacheSettings(prepared);
|
|
28
33
|
prepared = removeDoubleFwdSlash(prepared);
|
|
29
|
-
|
|
30
34
|
return prepared;
|
|
31
35
|
}
|
|
36
|
+
}
|
|
32
37
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
38
|
+
/** Helpers */
|
|
39
|
+
|
|
40
|
+
function applyFileSetting(query, { sqlFileReference, file, bucket }, downloadedPaths, bucketsDir) {
|
|
41
|
+
const localDir = `${bucketsDir}/${bucket}/`;
|
|
42
|
+
const filePattern = regexFromPattern(file);
|
|
43
|
+
const matchingPaths = downloadedPaths.filter((localPath) => matchesPattern(localPath, localDir, filePattern));
|
|
44
|
+
const searchStr = sqlFileReference.replace(/\?cache=(true|false)/i, '');
|
|
45
|
+
|
|
46
|
+
if (matchingPaths.length === 0) throw new Error(`No files found for: ${file}`);
|
|
47
|
+
if (matchingPaths.length > 1) return replaceWithArray(query, searchStr, matchingPaths);
|
|
48
|
+
|
|
49
|
+
return query.replace(new RegExp(escapeForRegex(searchStr), 'gi'), matchingPaths[0]);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function matchesPattern(localPath, localDir, filePattern) {
|
|
53
|
+
return localPath.startsWith(localDir) && filePattern.test(localPath.slice(localDir.length));
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function replaceWithArray(query, searchStr, paths) {
|
|
57
|
+
const arrayLiteral = `[${paths.map((path) => `'${path}'`).join(', ')}]`;
|
|
58
|
+
return query.replace(new RegExp(`['"]${escapeForRegex(searchStr)}['"]`, 'gi'), arrayLiteral);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function escapeForRegex(str) {
|
|
62
|
+
return str
|
|
63
|
+
.replace(/\*/g, '\\*')
|
|
64
|
+
.replace(/\./g, '\\.')
|
|
65
|
+
.replace(/\{/g, '\\{')
|
|
66
|
+
.replace(/\}/g, '\\}')
|
|
67
|
+
.replace(/\+/g, '\\+');
|
|
41
68
|
}
|
package/src/s3/s3.js
CHANGED
|
@@ -386,7 +386,7 @@ export default class S3 {
|
|
|
386
386
|
*/
|
|
387
387
|
async objectToFile(key) {
|
|
388
388
|
const file = `${this.mount}/${key}`;
|
|
389
|
-
const tmp = `${file}.${process.pid}.${Date.now()}.tmp`;
|
|
389
|
+
const tmp = `${file}.${process.pid}.${Date.now()}.${Math.random().toString(36).slice(2)}.tmp`;
|
|
390
390
|
try {
|
|
391
391
|
const response = await this.s3.send(new GetObjectCommand({ Bucket: this.bucket, Key: key }));
|
|
392
392
|
const chunks = [];
|
package/src/s3-querier.js
CHANGED
|
@@ -40,8 +40,16 @@ export default function s3Querier({
|
|
|
40
40
|
format,
|
|
41
41
|
}) {
|
|
42
42
|
const systemPlugins = [new QueryParserPlugin(), ...plugins, new QueryFinalizerPlugin()];
|
|
43
|
-
const
|
|
44
|
-
|
|
43
|
+
const {
|
|
44
|
+
query: rawQuery,
|
|
45
|
+
fileSettings,
|
|
46
|
+
settings: downloadSettings,
|
|
47
|
+
} = processQuery(systemPlugins, {
|
|
48
|
+
query,
|
|
49
|
+
endpoint: defaultEndpoint,
|
|
50
|
+
defaultBucket,
|
|
51
|
+
bucketsDir,
|
|
52
|
+
});
|
|
45
53
|
|
|
46
54
|
const downloadPromises = startDownloads({
|
|
47
55
|
apiKey,
|
|
@@ -58,7 +66,9 @@ export default function s3Querier({
|
|
|
58
66
|
results.forEach((result) => {
|
|
59
67
|
if (result.status === 'rejected') throw result.reason;
|
|
60
68
|
});
|
|
61
|
-
|
|
69
|
+
const downloadedPaths = results.flatMap((result) => result.value);
|
|
70
|
+
const finalQuery = runFinalizers({ plugins: systemPlugins, rawQuery, fileSettings, downloadedPaths, bucketsDir });
|
|
71
|
+
return execQuery(finalQuery, { format });
|
|
62
72
|
});
|
|
63
73
|
}
|
|
64
74
|
|
|
@@ -72,14 +82,35 @@ export default function s3Querier({
|
|
|
72
82
|
* @returns
|
|
73
83
|
*/
|
|
74
84
|
function processQuery(plugins = [], { query = '', endpoint, defaultBucket, bucketsDir }) {
|
|
75
|
-
const processedQuery = plugins.reduce(
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
85
|
+
const processedQuery = plugins.reduce((result, plugin) => plugin.processQuery(result), {
|
|
86
|
+
endpoint,
|
|
87
|
+
defaultBucket,
|
|
88
|
+
bucketsDir,
|
|
89
|
+
query,
|
|
90
|
+
settings: [],
|
|
91
|
+
});
|
|
92
|
+
const fileSettings = processedQuery.settings;
|
|
81
93
|
processedQuery.settings = mergeSettings(processedQuery.settings);
|
|
82
|
-
return processedQuery;
|
|
94
|
+
return { ...processedQuery, fileSettings };
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Passes the raw query through each plugin's `finalizeQuery` lifecycle method,
|
|
99
|
+
* substituting exact downloaded paths in place of glob patterns.
|
|
100
|
+
*
|
|
101
|
+
* @param {object} params
|
|
102
|
+
* @param {object[]} params.plugins - Plugin instances to run finalizers on.
|
|
103
|
+
* @param {string} params.rawQuery - SQL with original file references and date/location tokens.
|
|
104
|
+
* @param {object[]} params.fileSettings - Pre-merge per-file settings from processQuery.
|
|
105
|
+
* @param {string[]} params.downloadedPaths - Absolute local paths of all downloaded files.
|
|
106
|
+
* @param {string} params.bucketsDir - Root directory where files are cached locally.
|
|
107
|
+
* @returns {string} Finalized SQL ready for DuckDB execution.
|
|
108
|
+
*/
|
|
109
|
+
function runFinalizers({ plugins, rawQuery, fileSettings, downloadedPaths, bucketsDir }) {
|
|
110
|
+
return plugins.reduce((query, plugin) => {
|
|
111
|
+
if (!plugin.finalizeQuery) return query;
|
|
112
|
+
return plugin.finalizeQuery(query, fileSettings, downloadedPaths, bucketsDir);
|
|
113
|
+
}, rawQuery);
|
|
83
114
|
}
|
|
84
115
|
|
|
85
116
|
/**
|