@gscdump/engine 0.11.2 → 0.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,6 @@
1
1
  var AnalyzerCapabilityError = class extends Error {
2
+ tool;
3
+ missing;
2
4
  constructor(tool, missing) {
3
5
  super(`analyzer "${tool}" requires capabilities [${missing.join(", ")}] not provided by source`);
4
6
  this.tool = tool;
package/dist/entities.mjs CHANGED
@@ -26,6 +26,88 @@ function hashUrl(url) {
26
26
  }
27
27
  return (hi >>> 0).toString(16).padStart(8, "0") + (lo >>> 0).toString(16).padStart(8, "0");
28
28
  }
29
+ const INSPECTION_PARQUET_COLUMNS = [
30
+ {
31
+ name: "urlHash",
32
+ type: "VARCHAR",
33
+ nullable: false
34
+ },
35
+ {
36
+ name: "url",
37
+ type: "VARCHAR",
38
+ nullable: false
39
+ },
40
+ {
41
+ name: "inspectedAt",
42
+ type: "VARCHAR",
43
+ nullable: false
44
+ },
45
+ {
46
+ name: "indexStatus",
47
+ type: "VARCHAR",
48
+ nullable: true
49
+ },
50
+ {
51
+ name: "lastCrawlTime",
52
+ type: "VARCHAR",
53
+ nullable: true
54
+ },
55
+ {
56
+ name: "googleCanonical",
57
+ type: "VARCHAR",
58
+ nullable: true
59
+ },
60
+ {
61
+ name: "userCanonical",
62
+ type: "VARCHAR",
63
+ nullable: true
64
+ },
65
+ {
66
+ name: "coverageState",
67
+ type: "VARCHAR",
68
+ nullable: true
69
+ },
70
+ {
71
+ name: "robotsTxtState",
72
+ type: "VARCHAR",
73
+ nullable: true
74
+ },
75
+ {
76
+ name: "indexingState",
77
+ type: "VARCHAR",
78
+ nullable: true
79
+ },
80
+ {
81
+ name: "pageFetchState",
82
+ type: "VARCHAR",
83
+ nullable: true
84
+ },
85
+ {
86
+ name: "mobileUsabilityVerdict",
87
+ type: "VARCHAR",
88
+ nullable: true
89
+ },
90
+ {
91
+ name: "richResultsVerdict",
92
+ type: "VARCHAR",
93
+ nullable: true
94
+ },
95
+ {
96
+ name: "scheduleNextAt",
97
+ type: "BIGINT",
98
+ nullable: true
99
+ },
100
+ {
101
+ name: "scheduleConsecutiveUnchanged",
102
+ type: "INTEGER",
103
+ nullable: true
104
+ },
105
+ {
106
+ name: "schedulePolicyVersion",
107
+ type: "INTEGER",
108
+ nullable: true
109
+ }
110
+ ];
29
111
  function createInspectionStore(opts) {
30
112
  const hash = opts.hash ?? hashUrl;
31
113
  const ds = opts.dataSource;
@@ -117,88 +199,6 @@ function createInspectionStore(opts) {
117
199
  }
118
200
  };
119
201
  }
120
- const INSPECTION_PARQUET_COLUMNS = [
121
- {
122
- name: "urlHash",
123
- type: "VARCHAR",
124
- nullable: false
125
- },
126
- {
127
- name: "url",
128
- type: "VARCHAR",
129
- nullable: false
130
- },
131
- {
132
- name: "inspectedAt",
133
- type: "VARCHAR",
134
- nullable: false
135
- },
136
- {
137
- name: "indexStatus",
138
- type: "VARCHAR",
139
- nullable: true
140
- },
141
- {
142
- name: "lastCrawlTime",
143
- type: "VARCHAR",
144
- nullable: true
145
- },
146
- {
147
- name: "googleCanonical",
148
- type: "VARCHAR",
149
- nullable: true
150
- },
151
- {
152
- name: "userCanonical",
153
- type: "VARCHAR",
154
- nullable: true
155
- },
156
- {
157
- name: "coverageState",
158
- type: "VARCHAR",
159
- nullable: true
160
- },
161
- {
162
- name: "robotsTxtState",
163
- type: "VARCHAR",
164
- nullable: true
165
- },
166
- {
167
- name: "indexingState",
168
- type: "VARCHAR",
169
- nullable: true
170
- },
171
- {
172
- name: "pageFetchState",
173
- type: "VARCHAR",
174
- nullable: true
175
- },
176
- {
177
- name: "mobileUsabilityVerdict",
178
- type: "VARCHAR",
179
- nullable: true
180
- },
181
- {
182
- name: "richResultsVerdict",
183
- type: "VARCHAR",
184
- nullable: true
185
- },
186
- {
187
- name: "scheduleNextAt",
188
- type: "BIGINT",
189
- nullable: true
190
- },
191
- {
192
- name: "scheduleConsecutiveUnchanged",
193
- type: "INTEGER",
194
- nullable: true
195
- },
196
- {
197
- name: "schedulePolicyVersion",
198
- type: "INTEGER",
199
- nullable: true
200
- }
201
- ];
202
202
  function sitemapIndexKey(ctx) {
203
203
  return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/sitemaps/index.json` : `u_${ctx.userId}/entities/sitemaps/index.json`;
204
204
  }
@@ -1,4 +1,4 @@
1
- import { a as DataSource } from "./_chunks/storage.mjs";
1
+ import { a as DataSource, c as FileSetRef } from "./_chunks/storage.mjs";
2
2
  import { t as ColumnDef } from "./_chunks/schema.mjs";
3
3
  import { TenantCtx } from "gscdump/contracts";
4
4
  import * as _$_gscdump_engine_contracts0 from "@gscdump/engine/contracts";
@@ -13,10 +13,7 @@ interface RollupCtx extends TenantCtx {
13
13
  interface RollupEngine {
14
14
  runSQL: (opts: {
15
15
  ctx: TenantCtx;
16
- fileSets: Record<string, {
17
- table: _$_gscdump_engine_contracts0.TableName;
18
- partitions?: string[];
19
- }>;
16
+ fileSets: Record<string, FileSetRef>;
20
17
  table?: _$_gscdump_engine_contracts0.TableName;
21
18
  sql: string;
22
19
  params?: unknown[];
@@ -2,6 +2,7 @@ import { h as resolveToSQL, n as pgResolverAdapter, s as assertDimensionsSupport
2
2
  import { i as getFilterDimensions } from "../_chunks/resolver.mjs";
3
3
  import { n as runAnalyzerFromSource } from "../_chunks/dispatch.mjs";
4
4
  var AttachedTableMissingError = class extends Error {
5
+ missing;
5
6
  constructor(missing) {
6
7
  super(`attached-table source: required table(s) not attached: ${missing.join(", ")}`);
7
8
  this.missing = missing;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/engine",
3
3
  "type": "module",
4
- "version": "0.11.2",
4
+ "version": "0.11.3",
5
5
  "description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -152,8 +152,8 @@
152
152
  },
153
153
  "peerDependencies": {
154
154
  "@duckdb/duckdb-wasm": "^1.32.0",
155
- "hyparquet": "^1.25.6",
156
- "hyparquet-writer": "^0.14.0"
155
+ "hyparquet": "^1.25.8",
156
+ "hyparquet-writer": "^0.15.1"
157
157
  },
158
158
  "peerDependenciesMeta": {
159
159
  "@duckdb/duckdb-wasm": {
@@ -169,14 +169,14 @@
169
169
  "dependencies": {
170
170
  "drizzle-orm": "^0.45.2",
171
171
  "proper-lockfile": "^4.1.2",
172
- "gscdump": "0.11.2"
172
+ "gscdump": "0.11.3"
173
173
  },
174
174
  "devDependencies": {
175
175
  "@duckdb/duckdb-wasm": "^1.32.0",
176
176
  "@types/proper-lockfile": "^4.1.4",
177
177
  "aws4fetch": "^1.0.20",
178
- "hyparquet": "^1.25.6",
179
- "hyparquet-writer": "^0.14.0",
178
+ "hyparquet": "^1.25.8",
179
+ "hyparquet-writer": "^0.15.1",
180
180
  "tsx": "^4.21.0",
181
181
  "vitest": "^4.1.5"
182
182
  },