@gscdump/analysis 0.18.2 → 0.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analyzer/index.mjs +17 -5
- package/dist/default-registry.mjs +17 -5
- package/dist/index.d.mts +13 -0
- package/dist/index.mjs +35 -6
- package/dist/source/index.d.mts +13 -0
- package/dist/source/index.mjs +18 -1
- package/package.json +4 -4
package/dist/analyzer/index.mjs
CHANGED
|
@@ -891,7 +891,10 @@ const changePointAnalyzer = defineAnalyzer({
|
|
|
891
891
|
SELECT
|
|
892
892
|
query,
|
|
893
893
|
url AS page,
|
|
894
|
-
date
|
|
894
|
+
-- Normalize at the source CTE: union_by_name=true can coerce date to
|
|
895
|
+
-- VARCHAR across parquets with mixed schemas, which makes downstream
|
|
896
|
+
-- strftime(date, ...) binder-error.
|
|
897
|
+
CAST(date AS DATE) AS date,
|
|
895
898
|
${METRIC_EXPR.clicks} AS clicks,
|
|
896
899
|
${METRIC_EXPR.impressions} AS impressions,
|
|
897
900
|
${valueExpr} AS value
|
|
@@ -1579,7 +1582,10 @@ const ctrAnomalyAnalyzer = defineAnalyzer({
|
|
|
1579
1582
|
SELECT
|
|
1580
1583
|
query,
|
|
1581
1584
|
url AS page,
|
|
1582
|
-
date
|
|
1585
|
+
-- Normalize at the source CTE: union_by_name=true can coerce date to
|
|
1586
|
+
-- VARCHAR across parquets with mixed schemas, which makes downstream
|
|
1587
|
+
-- strftime(date, ...) binder-error.
|
|
1588
|
+
CAST(date AS DATE) AS date,
|
|
1583
1589
|
${METRIC_EXPR.clicks} AS day_clicks,
|
|
1584
1590
|
${METRIC_EXPR.impressions} AS day_impressions,
|
|
1585
1591
|
${METRIC_EXPR.ctr} AS day_ctr,
|
|
@@ -3533,7 +3539,10 @@ const positionVolatilityAnalyzer = defineAnalyzer({
|
|
|
3533
3539
|
SELECT
|
|
3534
3540
|
url AS page,
|
|
3535
3541
|
query,
|
|
3536
|
-
date
|
|
3542
|
+
-- Normalize at the source CTE: union_by_name=true can coerce date to
|
|
3543
|
+
-- VARCHAR across parquets with mixed schemas, which makes downstream
|
|
3544
|
+
-- strftime(date, ...) binder-error.
|
|
3545
|
+
CAST(date AS DATE) AS date,
|
|
3537
3546
|
${METRIC_EXPR.impressions} AS q_impressions,
|
|
3538
3547
|
${METRIC_EXPR.position} AS q_position
|
|
3539
3548
|
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
@@ -3899,7 +3908,7 @@ const seasonalityAnalyzer = defineAnalyzer({
|
|
|
3899
3908
|
sql: `
|
|
3900
3909
|
WITH monthly AS (
|
|
3901
3910
|
SELECT
|
|
3902
|
-
strftime(date, '%Y-%m') AS month,
|
|
3911
|
+
strftime(CAST(date AS DATE), '%Y-%m') AS month,
|
|
3903
3912
|
CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
|
|
3904
3913
|
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3905
3914
|
WHERE date >= ? AND date <= ?
|
|
@@ -3998,7 +4007,10 @@ const stlDecomposeAnalyzer = defineAnalyzer({
|
|
|
3998
4007
|
SELECT
|
|
3999
4008
|
query,
|
|
4000
4009
|
url AS page,
|
|
4001
|
-
date
|
|
4010
|
+
-- Normalize at the source CTE: union_by_name=true can coerce date to
|
|
4011
|
+
-- VARCHAR across parquets with mixed schemas, which makes downstream
|
|
4012
|
+
-- strftime(date, ...) binder-error.
|
|
4013
|
+
CAST(date AS DATE) AS date,
|
|
4002
4014
|
${METRIC_EXPR.clicks} AS clicks,
|
|
4003
4015
|
${METRIC_EXPR.impressions} AS impressions,
|
|
4004
4016
|
CAST(SUM(${metric}) AS DOUBLE) AS observed
|
|
@@ -891,7 +891,10 @@ const changePointAnalyzer = defineAnalyzer({
|
|
|
891
891
|
SELECT
|
|
892
892
|
query,
|
|
893
893
|
url AS page,
|
|
894
|
-
date
|
|
894
|
+
-- Normalize at the source CTE: union_by_name=true can coerce date to
|
|
895
|
+
-- VARCHAR across parquets with mixed schemas, which makes downstream
|
|
896
|
+
-- strftime(date, ...) binder-error.
|
|
897
|
+
CAST(date AS DATE) AS date,
|
|
895
898
|
${METRIC_EXPR.clicks} AS clicks,
|
|
896
899
|
${METRIC_EXPR.impressions} AS impressions,
|
|
897
900
|
${valueExpr} AS value
|
|
@@ -1579,7 +1582,10 @@ const ctrAnomalyAnalyzer = defineAnalyzer({
|
|
|
1579
1582
|
SELECT
|
|
1580
1583
|
query,
|
|
1581
1584
|
url AS page,
|
|
1582
|
-
date
|
|
1585
|
+
-- Normalize at the source CTE: union_by_name=true can coerce date to
|
|
1586
|
+
-- VARCHAR across parquets with mixed schemas, which makes downstream
|
|
1587
|
+
-- strftime(date, ...) binder-error.
|
|
1588
|
+
CAST(date AS DATE) AS date,
|
|
1583
1589
|
${METRIC_EXPR.clicks} AS day_clicks,
|
|
1584
1590
|
${METRIC_EXPR.impressions} AS day_impressions,
|
|
1585
1591
|
${METRIC_EXPR.ctr} AS day_ctr,
|
|
@@ -3527,7 +3533,10 @@ const positionVolatilityAnalyzer = defineAnalyzer({
|
|
|
3527
3533
|
SELECT
|
|
3528
3534
|
url AS page,
|
|
3529
3535
|
query,
|
|
3530
|
-
date
|
|
3536
|
+
-- Normalize at the source CTE: union_by_name=true can coerce date to
|
|
3537
|
+
-- VARCHAR across parquets with mixed schemas, which makes downstream
|
|
3538
|
+
-- strftime(date, ...) binder-error.
|
|
3539
|
+
CAST(date AS DATE) AS date,
|
|
3531
3540
|
${METRIC_EXPR.impressions} AS q_impressions,
|
|
3532
3541
|
${METRIC_EXPR.position} AS q_position
|
|
3533
3542
|
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
@@ -3893,7 +3902,7 @@ const seasonalityAnalyzer = defineAnalyzer({
|
|
|
3893
3902
|
sql: `
|
|
3894
3903
|
WITH monthly AS (
|
|
3895
3904
|
SELECT
|
|
3896
|
-
strftime(date, '%Y-%m') AS month,
|
|
3905
|
+
strftime(CAST(date AS DATE), '%Y-%m') AS month,
|
|
3897
3906
|
CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
|
|
3898
3907
|
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
3899
3908
|
WHERE date >= ? AND date <= ?
|
|
@@ -3992,7 +4001,10 @@ const stlDecomposeAnalyzer = defineAnalyzer({
|
|
|
3992
4001
|
SELECT
|
|
3993
4002
|
query,
|
|
3994
4003
|
url AS page,
|
|
3995
|
-
date
|
|
4004
|
+
-- Normalize at the source CTE: union_by_name=true can coerce date to
|
|
4005
|
+
-- VARCHAR across parquets with mixed schemas, which makes downstream
|
|
4006
|
+
-- strftime(date, ...) binder-error.
|
|
4007
|
+
CAST(date AS DATE) AS date,
|
|
3996
4008
|
${METRIC_EXPR.clicks} AS clicks,
|
|
3997
4009
|
${METRIC_EXPR.impressions} AS impressions,
|
|
3998
4010
|
CAST(SUM(${metric}) AS DOUBLE) AS observed
|
package/dist/index.d.mts
CHANGED
|
@@ -467,6 +467,19 @@ declare function diffSitemapHealth(prev: readonly SitemapHealthRow[], curr: read
|
|
|
467
467
|
interface SyncedRange {
|
|
468
468
|
oldestDateSynced: string | null;
|
|
469
469
|
newestDateSynced: string | null;
|
|
470
|
+
/**
|
|
471
|
+
* Optional sorted list of `[start, end]` daily-key spans (`YYYY-MM-DD`,
|
|
472
|
+
* both inclusive) that the engine actually has partitions for. When set,
|
|
473
|
+
* `shouldRouteToLive` returns true for any requested range that overlaps
|
|
474
|
+
* a day NOT inside one of these spans — even when the request sits inside
|
|
475
|
+
* `oldestDateSynced..newestDateSynced`. Lets the composite catch *internal*
|
|
476
|
+
* manifest gaps (e.g. a missing monthly tier) that the outer envelope
|
|
477
|
+
* doesn't reveal. Spans must be sorted by `start` and non-overlapping.
|
|
478
|
+
*/
|
|
479
|
+
coveredSpans?: ReadonlyArray<{
|
|
480
|
+
start: string;
|
|
481
|
+
end: string;
|
|
482
|
+
}>;
|
|
470
483
|
}
|
|
471
484
|
interface CompositeSourceOptions {
|
|
472
485
|
engine: AnalysisQuerySource$1;
|
package/dist/index.mjs
CHANGED
|
@@ -1107,7 +1107,10 @@ const changePointAnalyzer = defineAnalyzer$1({
|
|
|
1107
1107
|
SELECT
|
|
1108
1108
|
query,
|
|
1109
1109
|
url AS page,
|
|
1110
|
-
date
|
|
1110
|
+
-- Normalize at the source CTE: union_by_name=true can coerce date to
|
|
1111
|
+
-- VARCHAR across parquets with mixed schemas, which makes downstream
|
|
1112
|
+
-- strftime(date, ...) binder-error.
|
|
1113
|
+
CAST(date AS DATE) AS date,
|
|
1111
1114
|
${METRIC_EXPR.clicks} AS clicks,
|
|
1112
1115
|
${METRIC_EXPR.impressions} AS impressions,
|
|
1113
1116
|
${valueExpr} AS value
|
|
@@ -1795,7 +1798,10 @@ const ctrAnomalyAnalyzer = defineAnalyzer$1({
|
|
|
1795
1798
|
SELECT
|
|
1796
1799
|
query,
|
|
1797
1800
|
url AS page,
|
|
1798
|
-
date
|
|
1801
|
+
-- Normalize at the source CTE: union_by_name=true can coerce date to
|
|
1802
|
+
-- VARCHAR across parquets with mixed schemas, which makes downstream
|
|
1803
|
+
-- strftime(date, ...) binder-error.
|
|
1804
|
+
CAST(date AS DATE) AS date,
|
|
1799
1805
|
${METRIC_EXPR.clicks} AS day_clicks,
|
|
1800
1806
|
${METRIC_EXPR.impressions} AS day_impressions,
|
|
1801
1807
|
${METRIC_EXPR.ctr} AS day_ctr,
|
|
@@ -3888,7 +3894,10 @@ const positionVolatilityAnalyzer = defineAnalyzer$1({
|
|
|
3888
3894
|
SELECT
|
|
3889
3895
|
url AS page,
|
|
3890
3896
|
query,
|
|
3891
|
-
date
|
|
3897
|
+
-- Normalize at the source CTE: union_by_name=true can coerce date to
|
|
3898
|
+
-- VARCHAR across parquets with mixed schemas, which makes downstream
|
|
3899
|
+
-- strftime(date, ...) binder-error.
|
|
3900
|
+
CAST(date AS DATE) AS date,
|
|
3892
3901
|
${METRIC_EXPR.impressions} AS q_impressions,
|
|
3893
3902
|
${METRIC_EXPR.position} AS q_position
|
|
3894
3903
|
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
@@ -4254,7 +4263,7 @@ const seasonalityAnalyzer = defineAnalyzer$1({
|
|
|
4254
4263
|
sql: `
|
|
4255
4264
|
WITH monthly AS (
|
|
4256
4265
|
SELECT
|
|
4257
|
-
strftime(date, '%Y-%m') AS month,
|
|
4266
|
+
strftime(CAST(date AS DATE), '%Y-%m') AS month,
|
|
4258
4267
|
CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
|
|
4259
4268
|
FROM read_parquet({{FILES}}, union_by_name = true)
|
|
4260
4269
|
WHERE date >= ? AND date <= ?
|
|
@@ -4353,7 +4362,10 @@ const stlDecomposeAnalyzer = defineAnalyzer$1({
|
|
|
4353
4362
|
SELECT
|
|
4354
4363
|
query,
|
|
4355
4364
|
url AS page,
|
|
4356
|
-
date
|
|
4365
|
+
-- Normalize at the source CTE: union_by_name=true can coerce date to
|
|
4366
|
+
-- VARCHAR across parquets with mixed schemas, which makes downstream
|
|
4367
|
+
-- strftime(date, ...) binder-error.
|
|
4368
|
+
CAST(date AS DATE) AS date,
|
|
4357
4369
|
${METRIC_EXPR.clicks} AS clicks,
|
|
4358
4370
|
${METRIC_EXPR.impressions} AS impressions,
|
|
4359
4371
|
CAST(SUM(${metric}) AS DOUBLE) AS observed
|
|
@@ -6738,12 +6750,29 @@ function diffSitemapHealth(prev, curr) {
|
|
|
6738
6750
|
totals
|
|
6739
6751
|
};
|
|
6740
6752
|
}
|
|
6753
|
+
function hasGapInCoveredSpans(start, end, coveredSpans) {
|
|
6754
|
+
let cursor = start;
|
|
6755
|
+
for (const span of coveredSpans) {
|
|
6756
|
+
if (span.end < cursor) continue;
|
|
6757
|
+
if (span.start > cursor) return true;
|
|
6758
|
+
if (span.end >= end) return false;
|
|
6759
|
+
cursor = nextDay(span.end);
|
|
6760
|
+
if (cursor > end) return false;
|
|
6761
|
+
}
|
|
6762
|
+
return cursor <= end;
|
|
6763
|
+
}
|
|
6764
|
+
function nextDay(day) {
|
|
6765
|
+
const t = Date.parse(`${day}T00:00:00Z`) + 864e5;
|
|
6766
|
+
return new Date(t).toISOString().slice(0, 10);
|
|
6767
|
+
}
|
|
6741
6768
|
function shouldRouteToLive(state, site) {
|
|
6742
6769
|
if (!canProxyToGsc(state)) return false;
|
|
6743
6770
|
const { startDate, endDate } = extractDateRange(state.filter);
|
|
6744
6771
|
if (!startDate || !endDate) return false;
|
|
6745
6772
|
if (!site.oldestDateSynced || !site.newestDateSynced) return true;
|
|
6746
|
-
|
|
6773
|
+
if (startDate < site.oldestDateSynced || endDate > site.newestDateSynced) return true;
|
|
6774
|
+
if (site.coveredSpans && site.coveredSpans.length > 0) return hasGapInCoveredSpans(startDate, endDate, site.coveredSpans);
|
|
6775
|
+
return false;
|
|
6747
6776
|
}
|
|
6748
6777
|
function createCompositeSource(opts) {
|
|
6749
6778
|
const { engine, live, site } = opts;
|
package/dist/source/index.d.mts
CHANGED
|
@@ -4,6 +4,19 @@ import { PlannerCapabilities } from "gscdump/query/plan";
|
|
|
4
4
|
interface SyncedRange {
|
|
5
5
|
oldestDateSynced: string | null;
|
|
6
6
|
newestDateSynced: string | null;
|
|
7
|
+
/**
|
|
8
|
+
* Optional sorted list of `[start, end]` daily-key spans (`YYYY-MM-DD`,
|
|
9
|
+
* both inclusive) that the engine actually has partitions for. When set,
|
|
10
|
+
* `shouldRouteToLive` returns true for any requested range that overlaps
|
|
11
|
+
* a day NOT inside one of these spans — even when the request sits inside
|
|
12
|
+
* `oldestDateSynced..newestDateSynced`. Lets the composite catch *internal*
|
|
13
|
+
* manifest gaps (e.g. a missing monthly tier) that the outer envelope
|
|
14
|
+
* doesn't reveal. Spans must be sorted by `start` and non-overlapping.
|
|
15
|
+
*/
|
|
16
|
+
coveredSpans?: ReadonlyArray<{
|
|
17
|
+
start: string;
|
|
18
|
+
end: string;
|
|
19
|
+
}>;
|
|
7
20
|
}
|
|
8
21
|
interface CompositeSourceOptions {
|
|
9
22
|
engine: AnalysisQuerySource;
|
package/dist/source/index.mjs
CHANGED
|
@@ -1,11 +1,28 @@
|
|
|
1
1
|
import { canProxyToGsc } from "@gscdump/engine-gsc-api";
|
|
2
2
|
import { extractDateRange } from "gscdump/query";
|
|
3
|
+
function hasGapInCoveredSpans(start, end, coveredSpans) {
|
|
4
|
+
let cursor = start;
|
|
5
|
+
for (const span of coveredSpans) {
|
|
6
|
+
if (span.end < cursor) continue;
|
|
7
|
+
if (span.start > cursor) return true;
|
|
8
|
+
if (span.end >= end) return false;
|
|
9
|
+
cursor = nextDay(span.end);
|
|
10
|
+
if (cursor > end) return false;
|
|
11
|
+
}
|
|
12
|
+
return cursor <= end;
|
|
13
|
+
}
|
|
14
|
+
function nextDay(day) {
|
|
15
|
+
const t = Date.parse(`${day}T00:00:00Z`) + 864e5;
|
|
16
|
+
return new Date(t).toISOString().slice(0, 10);
|
|
17
|
+
}
|
|
3
18
|
function shouldRouteToLive(state, site) {
|
|
4
19
|
if (!canProxyToGsc(state)) return false;
|
|
5
20
|
const { startDate, endDate } = extractDateRange(state.filter);
|
|
6
21
|
if (!startDate || !endDate) return false;
|
|
7
22
|
if (!site.oldestDateSynced || !site.newestDateSynced) return true;
|
|
8
|
-
|
|
23
|
+
if (startDate < site.oldestDateSynced || endDate > site.newestDateSynced) return true;
|
|
24
|
+
if (site.coveredSpans && site.coveredSpans.length > 0) return hasGapInCoveredSpans(startDate, endDate, site.coveredSpans);
|
|
25
|
+
return false;
|
|
9
26
|
}
|
|
10
27
|
function createCompositeSource(opts) {
|
|
11
28
|
const { engine, live, site } = opts;
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gscdump/analysis",
|
|
3
3
|
"type": "module",
|
|
4
|
-
"version": "0.18.
|
|
4
|
+
"version": "0.18.3",
|
|
5
5
|
"description": "GSC analyzers — striking-distance, opportunity, movers, decay, brand, clustering, concentration, seasonality. Pure row-based + DuckDB-native.",
|
|
6
6
|
"author": {
|
|
7
7
|
"name": "Harlan Wilton",
|
|
@@ -75,9 +75,9 @@
|
|
|
75
75
|
},
|
|
76
76
|
"dependencies": {
|
|
77
77
|
"drizzle-orm": "^0.45.2",
|
|
78
|
-
"@gscdump/engine": "0.18.
|
|
79
|
-
"@gscdump/engine-gsc-api": "0.18.
|
|
80
|
-
"gscdump": "0.18.
|
|
78
|
+
"@gscdump/engine": "0.18.3",
|
|
79
|
+
"@gscdump/engine-gsc-api": "0.18.3",
|
|
80
|
+
"gscdump": "0.18.3"
|
|
81
81
|
},
|
|
82
82
|
"devDependencies": {
|
|
83
83
|
"vitest": "^4.1.6"
|