@gscdump/engine 0.11.2 → 0.11.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/dispatch.mjs +2 -0
- package/dist/entities.mjs +82 -82
- package/dist/rollups.d.mts +2 -5
- package/dist/source/index.mjs +1 -0
- package/package.json +6 -6
package/dist/entities.mjs
CHANGED
|
@@ -26,6 +26,88 @@ function hashUrl(url) {
|
|
|
26
26
|
}
|
|
27
27
|
return (hi >>> 0).toString(16).padStart(8, "0") + (lo >>> 0).toString(16).padStart(8, "0");
|
|
28
28
|
}
|
|
29
|
+
const INSPECTION_PARQUET_COLUMNS = [
|
|
30
|
+
{
|
|
31
|
+
name: "urlHash",
|
|
32
|
+
type: "VARCHAR",
|
|
33
|
+
nullable: false
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
name: "url",
|
|
37
|
+
type: "VARCHAR",
|
|
38
|
+
nullable: false
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
name: "inspectedAt",
|
|
42
|
+
type: "VARCHAR",
|
|
43
|
+
nullable: false
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
name: "indexStatus",
|
|
47
|
+
type: "VARCHAR",
|
|
48
|
+
nullable: true
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
name: "lastCrawlTime",
|
|
52
|
+
type: "VARCHAR",
|
|
53
|
+
nullable: true
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: "googleCanonical",
|
|
57
|
+
type: "VARCHAR",
|
|
58
|
+
nullable: true
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
name: "userCanonical",
|
|
62
|
+
type: "VARCHAR",
|
|
63
|
+
nullable: true
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
name: "coverageState",
|
|
67
|
+
type: "VARCHAR",
|
|
68
|
+
nullable: true
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
name: "robotsTxtState",
|
|
72
|
+
type: "VARCHAR",
|
|
73
|
+
nullable: true
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
name: "indexingState",
|
|
77
|
+
type: "VARCHAR",
|
|
78
|
+
nullable: true
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
name: "pageFetchState",
|
|
82
|
+
type: "VARCHAR",
|
|
83
|
+
nullable: true
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
name: "mobileUsabilityVerdict",
|
|
87
|
+
type: "VARCHAR",
|
|
88
|
+
nullable: true
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
name: "richResultsVerdict",
|
|
92
|
+
type: "VARCHAR",
|
|
93
|
+
nullable: true
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
name: "scheduleNextAt",
|
|
97
|
+
type: "BIGINT",
|
|
98
|
+
nullable: true
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
name: "scheduleConsecutiveUnchanged",
|
|
102
|
+
type: "INTEGER",
|
|
103
|
+
nullable: true
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
name: "schedulePolicyVersion",
|
|
107
|
+
type: "INTEGER",
|
|
108
|
+
nullable: true
|
|
109
|
+
}
|
|
110
|
+
];
|
|
29
111
|
function createInspectionStore(opts) {
|
|
30
112
|
const hash = opts.hash ?? hashUrl;
|
|
31
113
|
const ds = opts.dataSource;
|
|
@@ -117,88 +199,6 @@ function createInspectionStore(opts) {
|
|
|
117
199
|
}
|
|
118
200
|
};
|
|
119
201
|
}
|
|
120
|
-
const INSPECTION_PARQUET_COLUMNS = [
|
|
121
|
-
{
|
|
122
|
-
name: "urlHash",
|
|
123
|
-
type: "VARCHAR",
|
|
124
|
-
nullable: false
|
|
125
|
-
},
|
|
126
|
-
{
|
|
127
|
-
name: "url",
|
|
128
|
-
type: "VARCHAR",
|
|
129
|
-
nullable: false
|
|
130
|
-
},
|
|
131
|
-
{
|
|
132
|
-
name: "inspectedAt",
|
|
133
|
-
type: "VARCHAR",
|
|
134
|
-
nullable: false
|
|
135
|
-
},
|
|
136
|
-
{
|
|
137
|
-
name: "indexStatus",
|
|
138
|
-
type: "VARCHAR",
|
|
139
|
-
nullable: true
|
|
140
|
-
},
|
|
141
|
-
{
|
|
142
|
-
name: "lastCrawlTime",
|
|
143
|
-
type: "VARCHAR",
|
|
144
|
-
nullable: true
|
|
145
|
-
},
|
|
146
|
-
{
|
|
147
|
-
name: "googleCanonical",
|
|
148
|
-
type: "VARCHAR",
|
|
149
|
-
nullable: true
|
|
150
|
-
},
|
|
151
|
-
{
|
|
152
|
-
name: "userCanonical",
|
|
153
|
-
type: "VARCHAR",
|
|
154
|
-
nullable: true
|
|
155
|
-
},
|
|
156
|
-
{
|
|
157
|
-
name: "coverageState",
|
|
158
|
-
type: "VARCHAR",
|
|
159
|
-
nullable: true
|
|
160
|
-
},
|
|
161
|
-
{
|
|
162
|
-
name: "robotsTxtState",
|
|
163
|
-
type: "VARCHAR",
|
|
164
|
-
nullable: true
|
|
165
|
-
},
|
|
166
|
-
{
|
|
167
|
-
name: "indexingState",
|
|
168
|
-
type: "VARCHAR",
|
|
169
|
-
nullable: true
|
|
170
|
-
},
|
|
171
|
-
{
|
|
172
|
-
name: "pageFetchState",
|
|
173
|
-
type: "VARCHAR",
|
|
174
|
-
nullable: true
|
|
175
|
-
},
|
|
176
|
-
{
|
|
177
|
-
name: "mobileUsabilityVerdict",
|
|
178
|
-
type: "VARCHAR",
|
|
179
|
-
nullable: true
|
|
180
|
-
},
|
|
181
|
-
{
|
|
182
|
-
name: "richResultsVerdict",
|
|
183
|
-
type: "VARCHAR",
|
|
184
|
-
nullable: true
|
|
185
|
-
},
|
|
186
|
-
{
|
|
187
|
-
name: "scheduleNextAt",
|
|
188
|
-
type: "BIGINT",
|
|
189
|
-
nullable: true
|
|
190
|
-
},
|
|
191
|
-
{
|
|
192
|
-
name: "scheduleConsecutiveUnchanged",
|
|
193
|
-
type: "INTEGER",
|
|
194
|
-
nullable: true
|
|
195
|
-
},
|
|
196
|
-
{
|
|
197
|
-
name: "schedulePolicyVersion",
|
|
198
|
-
type: "INTEGER",
|
|
199
|
-
nullable: true
|
|
200
|
-
}
|
|
201
|
-
];
|
|
202
202
|
function sitemapIndexKey(ctx) {
|
|
203
203
|
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/sitemaps/index.json` : `u_${ctx.userId}/entities/sitemaps/index.json`;
|
|
204
204
|
}
|
package/dist/rollups.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { a as DataSource } from "./_chunks/storage.mjs";
|
|
1
|
+
import { a as DataSource, c as FileSetRef } from "./_chunks/storage.mjs";
|
|
2
2
|
import { t as ColumnDef } from "./_chunks/schema.mjs";
|
|
3
3
|
import { TenantCtx } from "gscdump/contracts";
|
|
4
4
|
import * as _$_gscdump_engine_contracts0 from "@gscdump/engine/contracts";
|
|
@@ -13,10 +13,7 @@ interface RollupCtx extends TenantCtx {
|
|
|
13
13
|
interface RollupEngine {
|
|
14
14
|
runSQL: (opts: {
|
|
15
15
|
ctx: TenantCtx;
|
|
16
|
-
fileSets: Record<string,
|
|
17
|
-
table: _$_gscdump_engine_contracts0.TableName;
|
|
18
|
-
partitions?: string[];
|
|
19
|
-
}>;
|
|
16
|
+
fileSets: Record<string, FileSetRef>;
|
|
20
17
|
table?: _$_gscdump_engine_contracts0.TableName;
|
|
21
18
|
sql: string;
|
|
22
19
|
params?: unknown[];
|
package/dist/source/index.mjs
CHANGED
|
@@ -2,6 +2,7 @@ import { h as resolveToSQL, n as pgResolverAdapter, s as assertDimensionsSupport
|
|
|
2
2
|
import { i as getFilterDimensions } from "../_chunks/resolver.mjs";
|
|
3
3
|
import { n as runAnalyzerFromSource } from "../_chunks/dispatch.mjs";
|
|
4
4
|
var AttachedTableMissingError = class extends Error {
|
|
5
|
+
missing;
|
|
5
6
|
constructor(missing) {
|
|
6
7
|
super(`attached-table source: required table(s) not attached: ${missing.join(", ")}`);
|
|
7
8
|
this.missing = missing;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.11.
|
|
4
|
+
"version": "0.11.4",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -152,8 +152,8 @@
|
|
|
152
152
|
},
|
|
153
153
|
"peerDependencies": {
|
|
154
154
|
"@duckdb/duckdb-wasm": "^1.32.0",
|
|
155
|
-
"hyparquet": "^1.25.
|
|
156
|
-
"hyparquet-writer": "^0.
|
|
155
|
+
"hyparquet": "^1.25.8",
|
|
156
|
+
"hyparquet-writer": "^0.15.1"
|
|
157
157
|
},
|
|
158
158
|
"peerDependenciesMeta": {
|
|
159
159
|
"@duckdb/duckdb-wasm": {
|
|
@@ -169,14 +169,14 @@
|
|
|
169
169
|
"dependencies": {
|
|
170
170
|
"drizzle-orm": "^0.45.2",
|
|
171
171
|
"proper-lockfile": "^4.1.2",
|
|
172
|
-
"gscdump": "0.11.
|
|
172
|
+
"gscdump": "0.11.4"
|
|
173
173
|
},
|
|
174
174
|
"devDependencies": {
|
|
175
175
|
"@duckdb/duckdb-wasm": "^1.32.0",
|
|
176
176
|
"@types/proper-lockfile": "^4.1.4",
|
|
177
177
|
"aws4fetch": "^1.0.20",
|
|
178
|
-
"hyparquet": "^1.25.
|
|
179
|
-
"hyparquet-writer": "^0.
|
|
178
|
+
"hyparquet": "^1.25.8",
|
|
179
|
+
"hyparquet-writer": "^0.15.1",
|
|
180
180
|
"tsx": "^4.21.0",
|
|
181
181
|
"vitest": "^4.1.5"
|
|
182
182
|
},
|