@gscdump/engine 0.4.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,8 +1,12 @@
1
- ## @gscdump/engine
1
+ # @gscdump/engine
2
2
 
3
- Append-only Parquet/DuckDB storage engine for the gscdump pipeline. Owns the storage runtime, planner, schema, and adapters that were previously bundled into `gscdump`.
3
+ [![npm version](https://img.shields.io/npm/v/@gscdump/engine?color=yellow)](https://npmjs.com/package/@gscdump/engine)
4
+ [![npm downloads](https://img.shields.io/npm/dm/@gscdump/engine?color=yellow)](https://npm.chart.dev/@gscdump/engine)
5
+ [![license](https://img.shields.io/github/license/harlan-zw/gscdump?color=yellow)](https://github.com/harlan-zw/gscdump/blob/main/LICENSE)
4
6
 
5
- Edge consumers stay on [`gscdump`](../gscdump). Anything that needs to read/write Parquet, run the DuckDB executor, or attach a snapshot lives here.
7
+ > Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.
8
+
9
+ Owns the storage runtime, planner, schema, and adapters that were previously bundled into `gscdump`. Edge consumers stay on [`gscdump`](../gscdump); anything that needs to read/write Parquet, run the DuckDB executor, or attach a snapshot lives here.
6
10
 
7
11
  ## Install
8
12
 
@@ -26,11 +30,21 @@ Optional peers (install only what your runtime needs):
26
30
  | `@gscdump/engine/snapshot` | `SnapshotIndex` contract for hot/cold snapshot files. |
27
31
  | `@gscdump/engine/ingest` | GSC row → storage row helpers (`createRowAccumulator`, `transformGscRow`). |
28
32
  | `@gscdump/engine/sql` | SQL literal binding helpers (`bindLiterals`, `formatLiteral`). |
33
+ | `@gscdump/engine/sql-fragments` | Reusable SQL fragments shared across analyzers. |
34
+ | `@gscdump/engine/rollups` | Pre-aggregated rollup contracts + helpers. |
35
+ | `@gscdump/engine/entities` | Entity helpers (sites, tenants, scope keys). |
36
+ | `@gscdump/engine/resolver` | Dialect-neutral SQL composition: `ResolverAdapter`, `pgResolverAdapter`, `compilePg`/`compileSqlite`, `resolveToSQL`. |
37
+ | `@gscdump/engine/scope` | Multi-tenant scope predicates. |
38
+ | `@gscdump/engine/arrow` | Apache Arrow utilities for engine result conversion. |
29
39
  | `@gscdump/engine/node` | Node-only DuckDB handle. |
40
+ | `@gscdump/engine/node-harness` | Node test harness for engine integration tests. |
30
41
  | `@gscdump/engine/filesystem` | Node-only `DataSource` + `ManifestStore` adapters. |
31
42
  | `@gscdump/engine/http` | Read-only HTTP `DataSource` (signed URLs, Range requests). |
32
43
  | `@gscdump/engine/hyparquet` | Pure-JS `ParquetCodec`. |
33
44
  | `@gscdump/engine/r2` | Cloudflare R2 `DataSource` (structurally typed against `R2Bucket`). |
45
+ | `@gscdump/engine/r2-manifest` | R2-backed `ManifestStore` for hosted deployments. |
46
+ | `@gscdump/engine/inspection-sqlite-node` | Node SQLite adapter for URL-inspection cache. |
47
+ | `@gscdump/engine/inspection-sqlite-browser` | Browser (wa-sqlite) adapter for URL-inspection cache. |
34
48
 
35
49
  ## Stability
36
50
 
@@ -46,6 +60,9 @@ Optional peers (install only what your runtime needs):
46
60
 
47
61
  - [`gscdump`](../gscdump) — REST client + query builder (edge-safe peer dep).
48
62
  - [`@gscdump/analysis`](../analysis) — analyzers; consumes `StorageEngine` via `createEngine` factories.
63
+ - [`@gscdump/engine-duckdb-node`](../engine-duckdb-node) — Node DuckDB analyzer adapter.
64
+ - [`@gscdump/engine-wasm`](../engine-wasm) — DuckDB-WASM browser adapter.
65
+ - [`@gscdump/engine-sqlite`](../engine-sqlite) — SQLite / D1 adapter.
49
66
  - [`@gscdump/cli`](../cli) — CLI wrapping engine + analysis.
50
67
 
51
68
  ## License
@@ -1033,8 +1033,8 @@ function createSqlFragments(config) {
1033
1033
  function metricSql(metric, tableKey) {
1034
1034
  const t = schema[tableKey];
1035
1035
  switch (metric) {
1036
- case "clicks": return sql`SUM(${t.clicks})`;
1037
- case "impressions": return sql`SUM(${t.impressions})`;
1036
+ case "clicks": return sql`CAST(SUM(${t.clicks}) AS ${sql.raw(metricCast)})`;
1037
+ case "impressions": return sql`CAST(SUM(${t.impressions}) AS ${sql.raw(metricCast)})`;
1038
1038
  case "ctr": return sql`CAST(SUM(${t.clicks}) AS ${sql.raw(metricCast)}) / NULLIF(SUM(${t.impressions}), 0)`;
1039
1039
  case "position": return sql`SUM(${t.sum_position}) / NULLIF(SUM(${t.impressions}), 0) + 1`;
1040
1040
  }
package/dist/index.mjs CHANGED
@@ -1151,8 +1151,8 @@ function createSqlFragments(config) {
1151
1151
  function metricSql(metric, tableKey) {
1152
1152
  const t = schema[tableKey];
1153
1153
  switch (metric) {
1154
- case "clicks": return sql`SUM(${t.clicks})`;
1155
- case "impressions": return sql`SUM(${t.impressions})`;
1154
+ case "clicks": return sql`CAST(SUM(${t.clicks}) AS ${sql.raw(metricCast)})`;
1155
+ case "impressions": return sql`CAST(SUM(${t.impressions}) AS ${sql.raw(metricCast)})`;
1156
1156
  case "ctr": return sql`CAST(SUM(${t.clicks}) AS ${sql.raw(metricCast)}) / NULLIF(SUM(${t.impressions}), 0)`;
1157
1157
  case "position": return sql`SUM(${t.sum_position}) / NULLIF(SUM(${t.impressions}), 0) + 1`;
1158
1158
  }
@@ -166,8 +166,8 @@ function createSqlFragments(config) {
166
166
  function metricSql(metric, tableKey) {
167
167
  const t = schema[tableKey];
168
168
  switch (metric) {
169
- case "clicks": return sql`SUM(${t.clicks})`;
170
- case "impressions": return sql`SUM(${t.impressions})`;
169
+ case "clicks": return sql`CAST(SUM(${t.clicks}) AS ${sql.raw(metricCast)})`;
170
+ case "impressions": return sql`CAST(SUM(${t.impressions}) AS ${sql.raw(metricCast)})`;
171
171
  case "ctr": return sql`CAST(SUM(${t.clicks}) AS ${sql.raw(metricCast)}) / NULLIF(SUM(${t.impressions}), 0)`;
172
172
  case "position": return sql`SUM(${t.sum_position}) / NULLIF(SUM(${t.impressions}), 0) + 1`;
173
173
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.4.0",
4
+ "version": "0.6.1",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -121,7 +121,7 @@
121
121
  "peerDependencies": {
122
122
  "@duckdb/duckdb-wasm": "^1.32.0",
123
123
  "better-sqlite3": "^12.9.0",
124
- "hyparquet": "^1.25.1",
124
+ "hyparquet": "^1.25.6",
125
125
  "hyparquet-writer": "^0.13.0",
126
126
  "wa-sqlite": "^1.0.0"
127
127
  },
@@ -145,16 +145,16 @@
145
145
  "dependencies": {
146
146
  "drizzle-orm": "^0.45.2",
147
147
  "proper-lockfile": "^4.1.2",
148
- "gscdump": "0.4.0"
148
+ "gscdump": "0.6.1"
149
149
  },
150
150
  "devDependencies": {
151
151
  "@duckdb/duckdb-wasm": "^1.32.0",
152
152
  "@types/proper-lockfile": "^4.1.4",
153
153
  "aws4fetch": "^1.0.20",
154
154
  "better-sqlite3": "^12.9.0",
155
- "hyparquet": "^1.25.1",
155
+ "hyparquet": "^1.25.6",
156
156
  "hyparquet-writer": "^0.13.0",
157
- "tsx": "^4.19.2",
157
+ "tsx": "^4.21.0",
158
158
  "vitest": "^4.1.5"
159
159
  },
160
160
  "scripts": {