@gscdump/engine 0.11.1 → 0.11.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/dispatch.mjs +2 -0
- package/dist/entities.mjs +82 -82
- package/dist/rollups.d.mts +2 -5
- package/dist/rollups.mjs +9 -9
- package/dist/source/index.mjs +1 -0
- package/package.json +6 -6
package/dist/entities.mjs
CHANGED
|
@@ -26,6 +26,88 @@ function hashUrl(url) {
|
|
|
26
26
|
}
|
|
27
27
|
return (hi >>> 0).toString(16).padStart(8, "0") + (lo >>> 0).toString(16).padStart(8, "0");
|
|
28
28
|
}
|
|
29
|
+
const INSPECTION_PARQUET_COLUMNS = [
|
|
30
|
+
{
|
|
31
|
+
name: "urlHash",
|
|
32
|
+
type: "VARCHAR",
|
|
33
|
+
nullable: false
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
name: "url",
|
|
37
|
+
type: "VARCHAR",
|
|
38
|
+
nullable: false
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
name: "inspectedAt",
|
|
42
|
+
type: "VARCHAR",
|
|
43
|
+
nullable: false
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
name: "indexStatus",
|
|
47
|
+
type: "VARCHAR",
|
|
48
|
+
nullable: true
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
name: "lastCrawlTime",
|
|
52
|
+
type: "VARCHAR",
|
|
53
|
+
nullable: true
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
name: "googleCanonical",
|
|
57
|
+
type: "VARCHAR",
|
|
58
|
+
nullable: true
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
name: "userCanonical",
|
|
62
|
+
type: "VARCHAR",
|
|
63
|
+
nullable: true
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
name: "coverageState",
|
|
67
|
+
type: "VARCHAR",
|
|
68
|
+
nullable: true
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
name: "robotsTxtState",
|
|
72
|
+
type: "VARCHAR",
|
|
73
|
+
nullable: true
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
name: "indexingState",
|
|
77
|
+
type: "VARCHAR",
|
|
78
|
+
nullable: true
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
name: "pageFetchState",
|
|
82
|
+
type: "VARCHAR",
|
|
83
|
+
nullable: true
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
name: "mobileUsabilityVerdict",
|
|
87
|
+
type: "VARCHAR",
|
|
88
|
+
nullable: true
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
name: "richResultsVerdict",
|
|
92
|
+
type: "VARCHAR",
|
|
93
|
+
nullable: true
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
name: "scheduleNextAt",
|
|
97
|
+
type: "BIGINT",
|
|
98
|
+
nullable: true
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
name: "scheduleConsecutiveUnchanged",
|
|
102
|
+
type: "INTEGER",
|
|
103
|
+
nullable: true
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
name: "schedulePolicyVersion",
|
|
107
|
+
type: "INTEGER",
|
|
108
|
+
nullable: true
|
|
109
|
+
}
|
|
110
|
+
];
|
|
29
111
|
function createInspectionStore(opts) {
|
|
30
112
|
const hash = opts.hash ?? hashUrl;
|
|
31
113
|
const ds = opts.dataSource;
|
|
@@ -117,88 +199,6 @@ function createInspectionStore(opts) {
|
|
|
117
199
|
}
|
|
118
200
|
};
|
|
119
201
|
}
|
|
120
|
-
const INSPECTION_PARQUET_COLUMNS = [
|
|
121
|
-
{
|
|
122
|
-
name: "urlHash",
|
|
123
|
-
type: "VARCHAR",
|
|
124
|
-
nullable: false
|
|
125
|
-
},
|
|
126
|
-
{
|
|
127
|
-
name: "url",
|
|
128
|
-
type: "VARCHAR",
|
|
129
|
-
nullable: false
|
|
130
|
-
},
|
|
131
|
-
{
|
|
132
|
-
name: "inspectedAt",
|
|
133
|
-
type: "VARCHAR",
|
|
134
|
-
nullable: false
|
|
135
|
-
},
|
|
136
|
-
{
|
|
137
|
-
name: "indexStatus",
|
|
138
|
-
type: "VARCHAR",
|
|
139
|
-
nullable: true
|
|
140
|
-
},
|
|
141
|
-
{
|
|
142
|
-
name: "lastCrawlTime",
|
|
143
|
-
type: "VARCHAR",
|
|
144
|
-
nullable: true
|
|
145
|
-
},
|
|
146
|
-
{
|
|
147
|
-
name: "googleCanonical",
|
|
148
|
-
type: "VARCHAR",
|
|
149
|
-
nullable: true
|
|
150
|
-
},
|
|
151
|
-
{
|
|
152
|
-
name: "userCanonical",
|
|
153
|
-
type: "VARCHAR",
|
|
154
|
-
nullable: true
|
|
155
|
-
},
|
|
156
|
-
{
|
|
157
|
-
name: "coverageState",
|
|
158
|
-
type: "VARCHAR",
|
|
159
|
-
nullable: true
|
|
160
|
-
},
|
|
161
|
-
{
|
|
162
|
-
name: "robotsTxtState",
|
|
163
|
-
type: "VARCHAR",
|
|
164
|
-
nullable: true
|
|
165
|
-
},
|
|
166
|
-
{
|
|
167
|
-
name: "indexingState",
|
|
168
|
-
type: "VARCHAR",
|
|
169
|
-
nullable: true
|
|
170
|
-
},
|
|
171
|
-
{
|
|
172
|
-
name: "pageFetchState",
|
|
173
|
-
type: "VARCHAR",
|
|
174
|
-
nullable: true
|
|
175
|
-
},
|
|
176
|
-
{
|
|
177
|
-
name: "mobileUsabilityVerdict",
|
|
178
|
-
type: "VARCHAR",
|
|
179
|
-
nullable: true
|
|
180
|
-
},
|
|
181
|
-
{
|
|
182
|
-
name: "richResultsVerdict",
|
|
183
|
-
type: "VARCHAR",
|
|
184
|
-
nullable: true
|
|
185
|
-
},
|
|
186
|
-
{
|
|
187
|
-
name: "scheduleNextAt",
|
|
188
|
-
type: "BIGINT",
|
|
189
|
-
nullable: true
|
|
190
|
-
},
|
|
191
|
-
{
|
|
192
|
-
name: "scheduleConsecutiveUnchanged",
|
|
193
|
-
type: "INTEGER",
|
|
194
|
-
nullable: true
|
|
195
|
-
},
|
|
196
|
-
{
|
|
197
|
-
name: "schedulePolicyVersion",
|
|
198
|
-
type: "INTEGER",
|
|
199
|
-
nullable: true
|
|
200
|
-
}
|
|
201
|
-
];
|
|
202
202
|
function sitemapIndexKey(ctx) {
|
|
203
203
|
return ctx.siteId ? `u_${ctx.userId}/${ctx.siteId}/entities/sitemaps/index.json` : `u_${ctx.userId}/entities/sitemaps/index.json`;
|
|
204
204
|
}
|
package/dist/rollups.d.mts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { a as DataSource } from "./_chunks/storage.mjs";
|
|
1
|
+
import { a as DataSource, c as FileSetRef } from "./_chunks/storage.mjs";
|
|
2
2
|
import { t as ColumnDef } from "./_chunks/schema.mjs";
|
|
3
3
|
import { TenantCtx } from "gscdump/contracts";
|
|
4
4
|
import * as _$_gscdump_engine_contracts0 from "@gscdump/engine/contracts";
|
|
@@ -13,10 +13,7 @@ interface RollupCtx extends TenantCtx {
|
|
|
13
13
|
interface RollupEngine {
|
|
14
14
|
runSQL: (opts: {
|
|
15
15
|
ctx: TenantCtx;
|
|
16
|
-
fileSets: Record<string,
|
|
17
|
-
table: _$_gscdump_engine_contracts0.TableName;
|
|
18
|
-
partitions?: string[];
|
|
19
|
-
}>;
|
|
16
|
+
fileSets: Record<string, FileSetRef>;
|
|
20
17
|
table?: _$_gscdump_engine_contracts0.TableName;
|
|
21
18
|
sql: string;
|
|
22
19
|
params?: unknown[];
|
package/dist/rollups.mjs
CHANGED
|
@@ -343,17 +343,17 @@ const indexingHealthRollup = {
|
|
|
343
343
|
if (!await dataSource.head?.(key)) return { days: [] };
|
|
344
344
|
const sql = `
|
|
345
345
|
SELECT
|
|
346
|
-
substr(inspectedAt, 1, 10) AS date,
|
|
346
|
+
substr(CAST(inspectedAt AS VARCHAR), 1, 10) AS date,
|
|
347
347
|
COUNT(*)::BIGINT AS total_urls,
|
|
348
|
-
SUM(CASE WHEN indexStatus = 'PASS' THEN 1 ELSE 0 END)::BIGINT AS indexed_count,
|
|
349
|
-
SUM(CASE WHEN pageFetchState = 'SOFT_404' THEN 1 ELSE 0 END)::BIGINT AS soft_404,
|
|
350
|
-
SUM(CASE WHEN pageFetchState = 'REDIRECT_ERROR' THEN 1 ELSE 0 END)::BIGINT AS redirect,
|
|
351
|
-
SUM(CASE WHEN pageFetchState = 'NOT_FOUND' THEN 1 ELSE 0 END)::BIGINT AS not_found,
|
|
352
|
-
SUM(CASE WHEN mobileUsabilityVerdict = 'PASS' THEN 1 ELSE 0 END)::BIGINT AS mobile_passes,
|
|
353
|
-
SUM(CASE WHEN richResultsVerdict = 'PASS' THEN 1 ELSE 0 END)::BIGINT AS rich_results_passes,
|
|
354
|
-
SUM(CASE WHEN userCanonical IS NOT NULL AND googleCanonical IS NOT NULL AND userCanonical <> googleCanonical THEN 1 ELSE 0 END)::BIGINT AS canonical_mismatches
|
|
348
|
+
SUM(CASE WHEN CAST(indexStatus AS VARCHAR) = 'PASS' THEN 1 ELSE 0 END)::BIGINT AS indexed_count,
|
|
349
|
+
SUM(CASE WHEN CAST(pageFetchState AS VARCHAR) = 'SOFT_404' THEN 1 ELSE 0 END)::BIGINT AS soft_404,
|
|
350
|
+
SUM(CASE WHEN CAST(pageFetchState AS VARCHAR) = 'REDIRECT_ERROR' THEN 1 ELSE 0 END)::BIGINT AS redirect,
|
|
351
|
+
SUM(CASE WHEN CAST(pageFetchState AS VARCHAR) = 'NOT_FOUND' THEN 1 ELSE 0 END)::BIGINT AS not_found,
|
|
352
|
+
SUM(CASE WHEN CAST(mobileUsabilityVerdict AS VARCHAR) = 'PASS' THEN 1 ELSE 0 END)::BIGINT AS mobile_passes,
|
|
353
|
+
SUM(CASE WHEN CAST(richResultsVerdict AS VARCHAR) = 'PASS' THEN 1 ELSE 0 END)::BIGINT AS rich_results_passes,
|
|
354
|
+
SUM(CASE WHEN userCanonical IS NOT NULL AND googleCanonical IS NOT NULL AND CAST(userCanonical AS VARCHAR) <> CAST(googleCanonical AS VARCHAR) THEN 1 ELSE 0 END)::BIGINT AS canonical_mismatches
|
|
355
355
|
FROM read_parquet({{INSPECTIONS}}, union_by_name = true)
|
|
356
|
-
WHERE substr(inspectedAt, 1, 10) >= '${utcDateMinusDays(builtAt, 90)}'
|
|
356
|
+
WHERE substr(CAST(inspectedAt AS VARCHAR), 1, 10) >= '${utcDateMinusDays(builtAt, 90)}'
|
|
357
357
|
GROUP BY 1
|
|
358
358
|
ORDER BY 1
|
|
359
359
|
`;
|
package/dist/source/index.mjs
CHANGED
|
@@ -2,6 +2,7 @@ import { h as resolveToSQL, n as pgResolverAdapter, s as assertDimensionsSupport
|
|
|
2
2
|
import { i as getFilterDimensions } from "../_chunks/resolver.mjs";
|
|
3
3
|
import { n as runAnalyzerFromSource } from "../_chunks/dispatch.mjs";
|
|
4
4
|
var AttachedTableMissingError = class extends Error {
|
|
5
|
+
missing;
|
|
5
6
|
constructor(missing) {
|
|
6
7
|
super(`attached-table source: required table(s) not attached: ${missing.join(", ")}`);
|
|
7
8
|
this.missing = missing;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/engine",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.11.
|
|
4
|
+
"version": "0.11.3",
|
|
5
5
|
"description": "Append-only Parquet/DuckDB storage engine + planner + adapters for the gscdump pipeline. Node + edge runtimes; opt-in heavy peers.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -152,8 +152,8 @@
|
|
|
152
152
|
},
|
|
153
153
|
"peerDependencies": {
|
|
154
154
|
"@duckdb/duckdb-wasm": "^1.32.0",
|
|
155
|
-
"hyparquet": "^1.25.
|
|
156
|
-
"hyparquet-writer": "^0.
|
|
155
|
+
"hyparquet": "^1.25.8",
|
|
156
|
+
"hyparquet-writer": "^0.15.1"
|
|
157
157
|
},
|
|
158
158
|
"peerDependenciesMeta": {
|
|
159
159
|
"@duckdb/duckdb-wasm": {
|
|
@@ -169,14 +169,14 @@
|
|
|
169
169
|
"dependencies": {
|
|
170
170
|
"drizzle-orm": "^0.45.2",
|
|
171
171
|
"proper-lockfile": "^4.1.2",
|
|
172
|
-
"gscdump": "0.11.
|
|
172
|
+
"gscdump": "0.11.3"
|
|
173
173
|
},
|
|
174
174
|
"devDependencies": {
|
|
175
175
|
"@duckdb/duckdb-wasm": "^1.32.0",
|
|
176
176
|
"@types/proper-lockfile": "^4.1.4",
|
|
177
177
|
"aws4fetch": "^1.0.20",
|
|
178
|
-
"hyparquet": "^1.25.
|
|
179
|
-
"hyparquet-writer": "^0.
|
|
178
|
+
"hyparquet": "^1.25.8",
|
|
179
|
+
"hyparquet-writer": "^0.15.1",
|
|
180
180
|
"tsx": "^4.21.0",
|
|
181
181
|
"vitest": "^4.1.5"
|
|
182
182
|
},
|