s3-querier 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +72 -6
- package/package.json +1 -1
- package/src/mcp/server.js +14 -2
- package/src/mcp/tools/query/query.js +2 -6
- package/src/s3-querier.js +1 -0
package/README.md
CHANGED
|
@@ -141,16 +141,82 @@ Downloaded files are cached to `bucketsDir` on disk. Subsequent queries that ref
|
|
|
141
141
|
|
|
142
142
|
## Plugins
|
|
143
143
|
|
|
144
|
-
The `plugins` option accepts an array of plugin objects
|
|
144
|
+
The `plugins` option accepts an array of plugin objects. Plugins can hook into every phase of query execution — from S3 listing through download to SQL execution.
|
|
145
145
|
|
|
146
|
-
|
|
147
|
-
- `processFile(filePath)` — process each downloaded file (e.g. convert Avro to JSON)
|
|
146
|
+
### Plugin interface
|
|
148
147
|
|
|
149
|
-
|
|
148
|
+
| Method | Phase | Description |
|
|
149
|
+
| --- | --- | --- |
|
|
150
|
+
| `processQuery(context)` | pre-download | Transform the query context. Return the (possibly mutated) context. |
|
|
151
|
+
| `finalizeQuery(query, fileSettings, downloadedPaths, bucketsDir)` | post-download | Rewrite the SQL string after downloads complete. Return the final SQL. |
|
|
152
|
+
| `preListFiles({ prefix, bucket })` | S3 listing | Called before listing. Return a callback or nothing. |
|
|
153
|
+
| `preDownloadFiles({ bucket, from, to })` | S3 download | Called before downloading. Return a callback or nothing. |
|
|
154
|
+
| `preQuery({ sql, downloadedPaths, bucketsDir })` | DuckDB execution | Called before the query runs. Return a callback or nothing. |
|
|
155
|
+
| `postQuery({ result, downloadedPaths, bucketsDir })` | DuckDB execution | Called after the query completes. |
|
|
156
|
+
|
|
157
|
+
The `pre*` methods use a closure pattern: return a callback to receive the after-state for that phase. This lets you capture a start timestamp and receive the result in one place without shared mutable variables:
|
|
150
158
|
|
|
151
159
|
```js
|
|
152
|
-
|
|
153
|
-
|
|
160
|
+
preQuery({ sql }) {
|
|
161
|
+
const start = Date.now();
|
|
162
|
+
return ({ result }) => {
|
|
163
|
+
console.log(`Query took ${Date.now() - start}ms — ${result.length} rows`);
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Post-phase callbacks are fire-and-forget — errors are logged and swallowed, so a failing plugin never rejects the caller's query.
|
|
169
|
+
|
|
170
|
+
### FSPurgePlugin
|
|
171
|
+
|
|
172
|
+
`FSPurgePlugin` sweeps the local file cache after each query, evicting files that haven't been accessed recently. Import it alongside the default export:
|
|
173
|
+
|
|
174
|
+
```js
|
|
175
|
+
import s3Querier, { FSPurgePlugin } from 's3-querier';
|
|
176
|
+
|
|
177
|
+
const purgePlugin = new FSPurgePlugin({
|
|
178
|
+
bucketsDir: '/tmp/s3-cache',
|
|
179
|
+
lastAccessTTLMinutes: 60, // evict files not accessed in the last hour (default: 60)
|
|
180
|
+
refreshIntervalMin: 60, // minimum minutes between sweeps (default: 60)
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
const results = await s3Querier({
|
|
184
|
+
// ...
|
|
185
|
+
plugins: [purgePlugin],
|
|
186
|
+
});
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### StatsPlugin
|
|
190
|
+
|
|
191
|
+
`StatsPlugin` fires a single `onStats` callback for listing, download, and query events. Use it for logging, metrics, or custom dashboards:
|
|
192
|
+
|
|
193
|
+
```js
|
|
194
|
+
import s3Querier, { StatsPlugin } from 's3-querier';
|
|
195
|
+
|
|
196
|
+
const statsPlugin = new StatsPlugin((event) => console.log(event));
|
|
197
|
+
|
|
198
|
+
await s3Querier({ /* ... */ plugins: [statsPlugin] });
|
|
199
|
+
// { type: 'listing', prefix, bucket, fileCount, durationMs, cacheHit }
|
|
200
|
+
// { type: 'download', bucket, from, to, cacheHits, cacheMisses, enqueuedHits, bytesDownloaded, durationMs }
|
|
201
|
+
// { type: 'query', sql, durationMs, rowCount }
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Each call fires a single event with a discriminated `type`:
|
|
205
|
+
|
|
206
|
+
| `type` | Fields |
|
|
207
|
+
| --- | --- |
|
|
208
|
+
| `'listing'` | `prefix`, `bucket`, `fileCount`, `durationMs`, `cacheHit` |
|
|
209
|
+
| `'download'` | `bucket`, `from`, `to`, `cacheHits`, `cacheMisses`, `enqueuedHits`, `bytesDownloaded`, `durationMs` |
|
|
210
|
+
| `'query'` | `sql`, `durationMs`, `rowCount` |
|
|
211
|
+
|
|
212
|
+
Events fire independently — one listing event per S3 prefix, one download event per bucket per query, one query event per execution. Aggregation is left to the caller.
|
|
213
|
+
|
|
214
|
+
### AvroPlugin
|
|
215
|
+
|
|
216
|
+
The built-in Avro plugin converts Avro files to JSON before querying:
|
|
217
|
+
|
|
218
|
+
```js
|
|
219
|
+
import s3Querier, { AvroPlugin } from 's3-querier';
|
|
154
220
|
|
|
155
221
|
const results = await s3Querier({
|
|
156
222
|
// ...
|
package/package.json
CHANGED
package/src/mcp/server.js
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
|
|
3
2
|
import { S3QuerierMCP } from './s3querier-mcp.js';
|
|
3
|
+
import { FSPurgePlugin, StatsPlugin } from '../../s3-querier.js';
|
|
4
|
+
import { logger } from '../utils/logger.js';
|
|
5
|
+
|
|
6
|
+
const { S3_BUCKETS_DIR = '/tmp/s3-querier', S3_PURGE_CACHE = 'true', S3_PURGE_TTL_MINUTES = '60' } = process.env;
|
|
7
|
+
|
|
8
|
+
await new S3QuerierMCP({ plugins: buildDefaultPlugins() }).start();
|
|
4
9
|
|
|
5
|
-
|
|
10
|
+
function buildDefaultPlugins() {
|
|
11
|
+
const plugins = [];
|
|
12
|
+
if (S3_PURGE_CACHE !== 'false') {
|
|
13
|
+
plugins.push(new FSPurgePlugin({ bucketsDir: S3_BUCKETS_DIR, lastAccessTTLMinutes: Number(S3_PURGE_TTL_MINUTES) }));
|
|
14
|
+
}
|
|
15
|
+
plugins.push(new StatsPlugin((event) => logger.info(event)));
|
|
16
|
+
return plugins;
|
|
17
|
+
}
|
|
@@ -2,8 +2,7 @@ import { readFileSync } from 'node:fs';
|
|
|
2
2
|
import { z } from 'zod';
|
|
3
3
|
|
|
4
4
|
import BaseTool from '../base-tool.js';
|
|
5
|
-
import s3Querier, { bigintReplacer
|
|
6
|
-
import { logger } from '../../../utils/logger.js';
|
|
5
|
+
import s3Querier, { bigintReplacer } from '../../../s3-querier.js';
|
|
7
6
|
|
|
8
7
|
const {
|
|
9
8
|
S3_ACCESS_KEY_ID,
|
|
@@ -14,9 +13,6 @@ const {
|
|
|
14
13
|
S3_BUCKETS_DIR = '/tmp/s3-querier',
|
|
15
14
|
} = process.env;
|
|
16
15
|
|
|
17
|
-
const purgePlugin = new FSPurgePlugin({ bucketsDir: S3_BUCKETS_DIR });
|
|
18
|
-
const statsPlugin = new StatsPlugin((event) => logger.info(event));
|
|
19
|
-
|
|
20
16
|
const sqlDescription = readFileSync(new URL('../../descriptions/sql-param.md', import.meta.url), 'utf8');
|
|
21
17
|
const toolDescription = readFileSync(new URL('../../descriptions/tool.md', import.meta.url), 'utf8');
|
|
22
18
|
|
|
@@ -63,7 +59,7 @@ export default class QueryTool extends BaseTool {
|
|
|
63
59
|
accessKeyId: S3_ACCESS_KEY_ID,
|
|
64
60
|
secretAccessKey: S3_SECRET_ACCESS_KEY,
|
|
65
61
|
format: 'jsonRecords',
|
|
66
|
-
plugins: [
|
|
62
|
+
plugins: this.config.plugins ?? [],
|
|
67
63
|
});
|
|
68
64
|
|
|
69
65
|
return {
|
package/src/s3-querier.js
CHANGED
|
@@ -5,6 +5,7 @@ export { bigintReplacer } from './utils/bigint-replacer.js';
|
|
|
5
5
|
import { query as execQuery } from './duck-db/index.js';
|
|
6
6
|
import QueryParserPlugin from './plugins/query-parser/query-parser.js';
|
|
7
7
|
import QueryFinalizerPlugin from './plugins/query-finalizer/query-finalizer.js';
|
|
8
|
+
export { default as AvroPlugin } from './plugins/avro/avro-plugin.js';
|
|
8
9
|
export { default as FSPurgePlugin } from './plugins/fs-purge/fs-purge-plugin.js';
|
|
9
10
|
export { default as StatsPlugin } from './plugins/stats/stats-plugin.js';
|
|
10
11
|
import { processQuery, runFinalizers, runPreQuery, runPostQuery } from './plugins/lifecycle.js';
|