@gscdump/analysis 0.18.2 → 0.18.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -891,7 +891,10 @@ const changePointAnalyzer = defineAnalyzer({
891
891
  SELECT
892
892
  query,
893
893
  url AS page,
894
- date,
894
+ -- Normalize at the source CTE: union_by_name=true can coerce date to
895
+ -- VARCHAR across parquets with mixed schemas, which makes downstream
896
+ -- strftime(date, ...) binder-error.
897
+ CAST(date AS DATE) AS date,
895
898
  ${METRIC_EXPR.clicks} AS clicks,
896
899
  ${METRIC_EXPR.impressions} AS impressions,
897
900
  ${valueExpr} AS value
@@ -1579,7 +1582,10 @@ const ctrAnomalyAnalyzer = defineAnalyzer({
1579
1582
  SELECT
1580
1583
  query,
1581
1584
  url AS page,
1582
- date,
1585
+ -- Normalize at the source CTE: union_by_name=true can coerce date to
1586
+ -- VARCHAR across parquets with mixed schemas, which makes downstream
1587
+ -- strftime(date, ...) binder-error.
1588
+ CAST(date AS DATE) AS date,
1583
1589
  ${METRIC_EXPR.clicks} AS day_clicks,
1584
1590
  ${METRIC_EXPR.impressions} AS day_impressions,
1585
1591
  ${METRIC_EXPR.ctr} AS day_ctr,
@@ -3533,7 +3539,10 @@ const positionVolatilityAnalyzer = defineAnalyzer({
3533
3539
  SELECT
3534
3540
  url AS page,
3535
3541
  query,
3536
- date,
3542
+ -- Normalize at the source CTE: union_by_name=true can coerce date to
3543
+ -- VARCHAR across parquets with mixed schemas, which makes downstream
3544
+ -- strftime(date, ...) binder-error.
3545
+ CAST(date AS DATE) AS date,
3537
3546
  ${METRIC_EXPR.impressions} AS q_impressions,
3538
3547
  ${METRIC_EXPR.position} AS q_position
3539
3548
  FROM read_parquet({{FILES}}, union_by_name = true)
@@ -3899,7 +3908,7 @@ const seasonalityAnalyzer = defineAnalyzer({
3899
3908
  sql: `
3900
3909
  WITH monthly AS (
3901
3910
  SELECT
3902
- strftime(date, '%Y-%m') AS month,
3911
+ strftime(CAST(date AS DATE), '%Y-%m') AS month,
3903
3912
  CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
3904
3913
  FROM read_parquet({{FILES}}, union_by_name = true)
3905
3914
  WHERE date >= ? AND date <= ?
@@ -3998,7 +4007,10 @@ const stlDecomposeAnalyzer = defineAnalyzer({
3998
4007
  SELECT
3999
4008
  query,
4000
4009
  url AS page,
4001
- date,
4010
+ -- Normalize at the source CTE: union_by_name=true can coerce date to
4011
+ -- VARCHAR across parquets with mixed schemas, which makes downstream
4012
+ -- strftime(date, ...) binder-error.
4013
+ CAST(date AS DATE) AS date,
4002
4014
  ${METRIC_EXPR.clicks} AS clicks,
4003
4015
  ${METRIC_EXPR.impressions} AS impressions,
4004
4016
  CAST(SUM(${metric}) AS DOUBLE) AS observed
@@ -891,7 +891,10 @@ const changePointAnalyzer = defineAnalyzer({
891
891
  SELECT
892
892
  query,
893
893
  url AS page,
894
- date,
894
+ -- Normalize at the source CTE: union_by_name=true can coerce date to
895
+ -- VARCHAR across parquets with mixed schemas, which makes downstream
896
+ -- strftime(date, ...) binder-error.
897
+ CAST(date AS DATE) AS date,
895
898
  ${METRIC_EXPR.clicks} AS clicks,
896
899
  ${METRIC_EXPR.impressions} AS impressions,
897
900
  ${valueExpr} AS value
@@ -1579,7 +1582,10 @@ const ctrAnomalyAnalyzer = defineAnalyzer({
1579
1582
  SELECT
1580
1583
  query,
1581
1584
  url AS page,
1582
- date,
1585
+ -- Normalize at the source CTE: union_by_name=true can coerce date to
1586
+ -- VARCHAR across parquets with mixed schemas, which makes downstream
1587
+ -- strftime(date, ...) binder-error.
1588
+ CAST(date AS DATE) AS date,
1583
1589
  ${METRIC_EXPR.clicks} AS day_clicks,
1584
1590
  ${METRIC_EXPR.impressions} AS day_impressions,
1585
1591
  ${METRIC_EXPR.ctr} AS day_ctr,
@@ -3527,7 +3533,10 @@ const positionVolatilityAnalyzer = defineAnalyzer({
3527
3533
  SELECT
3528
3534
  url AS page,
3529
3535
  query,
3530
- date,
3536
+ -- Normalize at the source CTE: union_by_name=true can coerce date to
3537
+ -- VARCHAR across parquets with mixed schemas, which makes downstream
3538
+ -- strftime(date, ...) binder-error.
3539
+ CAST(date AS DATE) AS date,
3531
3540
  ${METRIC_EXPR.impressions} AS q_impressions,
3532
3541
  ${METRIC_EXPR.position} AS q_position
3533
3542
  FROM read_parquet({{FILES}}, union_by_name = true)
@@ -3893,7 +3902,7 @@ const seasonalityAnalyzer = defineAnalyzer({
3893
3902
  sql: `
3894
3903
  WITH monthly AS (
3895
3904
  SELECT
3896
- strftime(date, '%Y-%m') AS month,
3905
+ strftime(CAST(date AS DATE), '%Y-%m') AS month,
3897
3906
  CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
3898
3907
  FROM read_parquet({{FILES}}, union_by_name = true)
3899
3908
  WHERE date >= ? AND date <= ?
@@ -3992,7 +4001,10 @@ const stlDecomposeAnalyzer = defineAnalyzer({
3992
4001
  SELECT
3993
4002
  query,
3994
4003
  url AS page,
3995
- date,
4004
+ -- Normalize at the source CTE: union_by_name=true can coerce date to
4005
+ -- VARCHAR across parquets with mixed schemas, which makes downstream
4006
+ -- strftime(date, ...) binder-error.
4007
+ CAST(date AS DATE) AS date,
3996
4008
  ${METRIC_EXPR.clicks} AS clicks,
3997
4009
  ${METRIC_EXPR.impressions} AS impressions,
3998
4010
  CAST(SUM(${metric}) AS DOUBLE) AS observed
package/dist/index.d.mts CHANGED
@@ -467,6 +467,19 @@ declare function diffSitemapHealth(prev: readonly SitemapHealthRow[], curr: read
467
467
  interface SyncedRange {
468
468
  oldestDateSynced: string | null;
469
469
  newestDateSynced: string | null;
470
+ /**
471
+ * Optional sorted list of `[start, end]` daily-key spans (`YYYY-MM-DD`,
472
+ * both inclusive) that the engine actually has partitions for. When set,
473
+ * `shouldRouteToLive` returns true for any requested range that overlaps
474
+ * a day NOT inside one of these spans — even when the request sits inside
475
+ * `oldestDateSynced..newestDateSynced`. Lets the composite catch *internal*
476
+ * manifest gaps (e.g. a missing monthly tier) that the outer envelope
477
+ * doesn't reveal. Spans must be sorted by `start` and non-overlapping.
478
+ */
479
+ coveredSpans?: ReadonlyArray<{
480
+ start: string;
481
+ end: string;
482
+ }>;
470
483
  }
471
484
  interface CompositeSourceOptions {
472
485
  engine: AnalysisQuerySource$1;
package/dist/index.mjs CHANGED
@@ -1107,7 +1107,10 @@ const changePointAnalyzer = defineAnalyzer$1({
1107
1107
  SELECT
1108
1108
  query,
1109
1109
  url AS page,
1110
- date,
1110
+ -- Normalize at the source CTE: union_by_name=true can coerce date to
1111
+ -- VARCHAR across parquets with mixed schemas, which makes downstream
1112
+ -- strftime(date, ...) binder-error.
1113
+ CAST(date AS DATE) AS date,
1111
1114
  ${METRIC_EXPR.clicks} AS clicks,
1112
1115
  ${METRIC_EXPR.impressions} AS impressions,
1113
1116
  ${valueExpr} AS value
@@ -1795,7 +1798,10 @@ const ctrAnomalyAnalyzer = defineAnalyzer$1({
1795
1798
  SELECT
1796
1799
  query,
1797
1800
  url AS page,
1798
- date,
1801
+ -- Normalize at the source CTE: union_by_name=true can coerce date to
1802
+ -- VARCHAR across parquets with mixed schemas, which makes downstream
1803
+ -- strftime(date, ...) binder-error.
1804
+ CAST(date AS DATE) AS date,
1799
1805
  ${METRIC_EXPR.clicks} AS day_clicks,
1800
1806
  ${METRIC_EXPR.impressions} AS day_impressions,
1801
1807
  ${METRIC_EXPR.ctr} AS day_ctr,
@@ -3888,7 +3894,10 @@ const positionVolatilityAnalyzer = defineAnalyzer$1({
3888
3894
  SELECT
3889
3895
  url AS page,
3890
3896
  query,
3891
- date,
3897
+ -- Normalize at the source CTE: union_by_name=true can coerce date to
3898
+ -- VARCHAR across parquets with mixed schemas, which makes downstream
3899
+ -- strftime(date, ...) binder-error.
3900
+ CAST(date AS DATE) AS date,
3892
3901
  ${METRIC_EXPR.impressions} AS q_impressions,
3893
3902
  ${METRIC_EXPR.position} AS q_position
3894
3903
  FROM read_parquet({{FILES}}, union_by_name = true)
@@ -4254,7 +4263,7 @@ const seasonalityAnalyzer = defineAnalyzer$1({
4254
4263
  sql: `
4255
4264
  WITH monthly AS (
4256
4265
  SELECT
4257
- strftime(date, '%Y-%m') AS month,
4266
+ strftime(CAST(date AS DATE), '%Y-%m') AS month,
4258
4267
  CAST(SUM(${params.metric === "impressions" ? "impressions" : "clicks"}) AS DOUBLE) AS value
4259
4268
  FROM read_parquet({{FILES}}, union_by_name = true)
4260
4269
  WHERE date >= ? AND date <= ?
@@ -4353,7 +4362,10 @@ const stlDecomposeAnalyzer = defineAnalyzer$1({
4353
4362
  SELECT
4354
4363
  query,
4355
4364
  url AS page,
4356
- date,
4365
+ -- Normalize at the source CTE: union_by_name=true can coerce date to
4366
+ -- VARCHAR across parquets with mixed schemas, which makes downstream
4367
+ -- strftime(date, ...) binder-error.
4368
+ CAST(date AS DATE) AS date,
4357
4369
  ${METRIC_EXPR.clicks} AS clicks,
4358
4370
  ${METRIC_EXPR.impressions} AS impressions,
4359
4371
  CAST(SUM(${metric}) AS DOUBLE) AS observed
@@ -6738,12 +6750,29 @@ function diffSitemapHealth(prev, curr) {
6738
6750
  totals
6739
6751
  };
6740
6752
  }
6753
+ function hasGapInCoveredSpans(start, end, coveredSpans) {
6754
+ let cursor = start;
6755
+ for (const span of coveredSpans) {
6756
+ if (span.end < cursor) continue;
6757
+ if (span.start > cursor) return true;
6758
+ if (span.end >= end) return false;
6759
+ cursor = nextDay(span.end);
6760
+ if (cursor > end) return false;
6761
+ }
6762
+ return cursor <= end;
6763
+ }
6764
+ function nextDay(day) {
6765
+ const t = Date.parse(`${day}T00:00:00Z`) + 864e5;
6766
+ return new Date(t).toISOString().slice(0, 10);
6767
+ }
6741
6768
  function shouldRouteToLive(state, site) {
6742
6769
  if (!canProxyToGsc(state)) return false;
6743
6770
  const { startDate, endDate } = extractDateRange(state.filter);
6744
6771
  if (!startDate || !endDate) return false;
6745
6772
  if (!site.oldestDateSynced || !site.newestDateSynced) return true;
6746
- return startDate < site.oldestDateSynced || endDate > site.newestDateSynced;
6773
+ if (startDate < site.oldestDateSynced || endDate > site.newestDateSynced) return true;
6774
+ if (site.coveredSpans && site.coveredSpans.length > 0) return hasGapInCoveredSpans(startDate, endDate, site.coveredSpans);
6775
+ return false;
6747
6776
  }
6748
6777
  function createCompositeSource(opts) {
6749
6778
  const { engine, live, site } = opts;
@@ -4,6 +4,19 @@ import { PlannerCapabilities } from "gscdump/query/plan";
4
4
  interface SyncedRange {
5
5
  oldestDateSynced: string | null;
6
6
  newestDateSynced: string | null;
7
+ /**
8
+ * Optional sorted list of `[start, end]` daily-key spans (`YYYY-MM-DD`,
9
+ * both inclusive) that the engine actually has partitions for. When set,
10
+ * `shouldRouteToLive` returns true for any requested range that overlaps
11
+ * a day NOT inside one of these spans — even when the request sits inside
12
+ * `oldestDateSynced..newestDateSynced`. Lets the composite catch *internal*
13
+ * manifest gaps (e.g. a missing monthly tier) that the outer envelope
14
+ * doesn't reveal. Spans must be sorted by `start` and non-overlapping.
15
+ */
16
+ coveredSpans?: ReadonlyArray<{
17
+ start: string;
18
+ end: string;
19
+ }>;
7
20
  }
8
21
  interface CompositeSourceOptions {
9
22
  engine: AnalysisQuerySource;
@@ -1,11 +1,28 @@
1
1
  import { canProxyToGsc } from "@gscdump/engine-gsc-api";
2
2
  import { extractDateRange } from "gscdump/query";
3
+ function hasGapInCoveredSpans(start, end, coveredSpans) {
4
+ let cursor = start;
5
+ for (const span of coveredSpans) {
6
+ if (span.end < cursor) continue;
7
+ if (span.start > cursor) return true;
8
+ if (span.end >= end) return false;
9
+ cursor = nextDay(span.end);
10
+ if (cursor > end) return false;
11
+ }
12
+ return cursor <= end;
13
+ }
14
+ function nextDay(day) {
15
+ const t = Date.parse(`${day}T00:00:00Z`) + 864e5;
16
+ return new Date(t).toISOString().slice(0, 10);
17
+ }
3
18
  function shouldRouteToLive(state, site) {
4
19
  if (!canProxyToGsc(state)) return false;
5
20
  const { startDate, endDate } = extractDateRange(state.filter);
6
21
  if (!startDate || !endDate) return false;
7
22
  if (!site.oldestDateSynced || !site.newestDateSynced) return true;
8
- return startDate < site.oldestDateSynced || endDate > site.newestDateSynced;
23
+ if (startDate < site.oldestDateSynced || endDate > site.newestDateSynced) return true;
24
+ if (site.coveredSpans && site.coveredSpans.length > 0) return hasGapInCoveredSpans(startDate, endDate, site.coveredSpans);
25
+ return false;
9
26
  }
10
27
  function createCompositeSource(opts) {
11
28
  const { engine, live, site } = opts;
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@gscdump/analysis",
3
3
  "type": "module",
4
- "version": "0.18.2",
4
+ "version": "0.18.4",
5
5
  "description": "GSC analyzers — striking-distance, opportunity, movers, decay, brand, clustering, concentration, seasonality. Pure row-based + DuckDB-native.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -75,9 +75,9 @@
75
75
  },
76
76
  "dependencies": {
77
77
  "drizzle-orm": "^0.45.2",
78
- "@gscdump/engine": "0.18.2",
79
- "@gscdump/engine-gsc-api": "0.18.2",
80
- "gscdump": "0.18.2"
78
+ "@gscdump/engine": "0.18.4",
79
+ "@gscdump/engine-gsc-api": "0.18.4",
80
+ "gscdump": "0.18.4"
81
81
  },
82
82
  "devDependencies": {
83
83
  "vitest": "^4.1.6"