@pagebridge/core 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,307 @@
1
+ import type { SanityClient } from "@sanity/client";
2
+
3
+ export type UnmatchReason =
4
+ | "matched" // Successfully matched
5
+ | "no_slug_extracted" // URL parsing failed or empty path after prefix
6
+ | "no_matching_document" // Slug extracted but no Sanity doc found
7
+ | "outside_path_prefix"; // URL doesn't contain the configured prefix
8
+
9
+ export interface MatchDiagnostics {
10
+ normalizedUrl: string;
11
+ pathAfterPrefix: string | null;
12
+ configuredPrefix: string | null;
13
+ availableSlugsCount: number;
14
+ similarSlugs: string[]; // Top 3 similar slugs for suggestions
15
+ }
16
+
17
+ export interface MatchResult {
18
+ gscUrl: string;
19
+ sanityId: string | undefined;
20
+ confidence: "exact" | "normalized" | "fuzzy" | "none";
21
+ matchedSlug?: string;
22
+ unmatchReason: UnmatchReason;
23
+ extractedSlug?: string;
24
+ diagnostics?: MatchDiagnostics;
25
+ }
26
+
27
+ export interface URLMatcherConfig {
28
+ contentTypes: string[];
29
+ slugField: string;
30
+ pathPrefix?: string;
31
+ baseUrl: string;
32
+ }
33
+
34
+ interface SanityDocument {
35
+ _id: string;
36
+ _createdAt: string;
37
+ [key: string]: unknown;
38
+ }
39
+
40
+ export class URLMatcher {
41
+ constructor(
42
+ private sanityClient: SanityClient,
43
+ private config: URLMatcherConfig,
44
+ ) {}
45
+
46
+ async matchUrls(gscUrls: string[]): Promise<MatchResult[]> {
47
+ const query = `*[_type in $types]{
48
+ _id,
49
+ _type,
50
+ "${this.config.slugField}": ${this.config.slugField}.current,
51
+ _createdAt
52
+ }`;
53
+ const documents: SanityDocument[] = await this.sanityClient.fetch(query, {
54
+ types: this.config.contentTypes,
55
+ });
56
+
57
+ const slugToDoc = new Map<string, { _id: string; _createdAt: string }>();
58
+ const allSlugs: string[] = [];
59
+ for (const doc of documents) {
60
+ const slug = doc[this.config.slugField] as string | undefined;
61
+ if (slug) {
62
+ const normalized = this.normalizeSlug(slug);
63
+ slugToDoc.set(normalized, doc);
64
+ allSlugs.push(normalized);
65
+ }
66
+ }
67
+
68
+ return gscUrls.map((url) => this.matchSingleUrl(url, slugToDoc, allSlugs));
69
+ }
70
+
71
+ /**
72
+ * Get all available slugs from Sanity for diagnostic purposes
73
+ */
74
+ async getAvailableSlugs(): Promise<string[]> {
75
+ const query = `*[_type in $types]{
76
+ "${this.config.slugField}": ${this.config.slugField}.current
77
+ }`;
78
+ const documents: SanityDocument[] = await this.sanityClient.fetch(query, {
79
+ types: this.config.contentTypes,
80
+ });
81
+
82
+ return documents
83
+ .map((doc) => doc[this.config.slugField] as string | undefined)
84
+ .filter((slug): slug is string => !!slug)
85
+ .map((slug) => this.normalizeSlug(slug));
86
+ }
87
+
88
+ private matchSingleUrl(
89
+ gscUrl: string,
90
+ slugToDoc: Map<string, { _id: string; _createdAt: string }>,
91
+ allSlugs: string[],
92
+ ): MatchResult {
93
+ const normalized = this.normalizeUrl(gscUrl);
94
+ const extractionResult = this.extractSlugWithDiagnostics(normalized);
95
+
96
+ // Check if URL is outside path prefix
97
+ if (extractionResult.outsidePrefix) {
98
+ return {
99
+ gscUrl,
100
+ sanityId: undefined,
101
+ confidence: "none",
102
+ unmatchReason: "outside_path_prefix",
103
+ diagnostics: {
104
+ normalizedUrl: normalized,
105
+ pathAfterPrefix: null,
106
+ configuredPrefix: this.config.pathPrefix ?? null,
107
+ availableSlugsCount: slugToDoc.size,
108
+ similarSlugs: [],
109
+ },
110
+ };
111
+ }
112
+
113
+ const slug = extractionResult.slug;
114
+
115
+ if (!slug) {
116
+ return {
117
+ gscUrl,
118
+ sanityId: undefined,
119
+ confidence: "none",
120
+ unmatchReason: "no_slug_extracted",
121
+ diagnostics: {
122
+ normalizedUrl: normalized,
123
+ pathAfterPrefix: extractionResult.pathAfterPrefix,
124
+ configuredPrefix: this.config.pathPrefix ?? null,
125
+ availableSlugsCount: slugToDoc.size,
126
+ similarSlugs: [],
127
+ },
128
+ };
129
+ }
130
+
131
+ const exactMatch = slugToDoc.get(slug);
132
+ if (exactMatch) {
133
+ return {
134
+ gscUrl,
135
+ sanityId: exactMatch._id,
136
+ confidence: "exact",
137
+ matchedSlug: slug,
138
+ unmatchReason: "matched",
139
+ extractedSlug: slug,
140
+ };
141
+ }
142
+
143
+ const withoutTrailing = slug.replace(/\/$/, "");
144
+ const trailingMatch = slugToDoc.get(withoutTrailing);
145
+ if (trailingMatch) {
146
+ return {
147
+ gscUrl,
148
+ sanityId: trailingMatch._id,
149
+ confidence: "normalized",
150
+ matchedSlug: withoutTrailing,
151
+ unmatchReason: "matched",
152
+ extractedSlug: slug,
153
+ };
154
+ }
155
+
156
+ const withTrailing = slug + "/";
157
+ const addedTrailingMatch = slugToDoc.get(withTrailing);
158
+ if (addedTrailingMatch) {
159
+ return {
160
+ gscUrl,
161
+ sanityId: addedTrailingMatch._id,
162
+ confidence: "normalized",
163
+ matchedSlug: withTrailing,
164
+ unmatchReason: "matched",
165
+ extractedSlug: slug,
166
+ };
167
+ }
168
+
169
+ // No match found - find similar slugs for suggestions
170
+ const similarSlugs = this.findSimilarSlugs(slug, allSlugs, 3);
171
+
172
+ return {
173
+ gscUrl,
174
+ sanityId: undefined,
175
+ confidence: "none",
176
+ unmatchReason: "no_matching_document",
177
+ extractedSlug: slug,
178
+ diagnostics: {
179
+ normalizedUrl: normalized,
180
+ pathAfterPrefix: extractionResult.pathAfterPrefix,
181
+ configuredPrefix: this.config.pathPrefix ?? null,
182
+ availableSlugsCount: slugToDoc.size,
183
+ similarSlugs,
184
+ },
185
+ };
186
+ }
187
+
188
+ private normalizeUrl(url: string): string {
189
+ try {
190
+ const parsed = new URL(url);
191
+ parsed.hostname = parsed.hostname.replace(/^www\./, "");
192
+ parsed.search = "";
193
+ parsed.hash = "";
194
+ return parsed.toString().toLowerCase();
195
+ } catch {
196
+ return url.toLowerCase();
197
+ }
198
+ }
199
+
200
+ private extractSlug(normalizedUrl: string): string | undefined {
201
+ return this.extractSlugWithDiagnostics(normalizedUrl).slug;
202
+ }
203
+
204
+ private extractSlugWithDiagnostics(normalizedUrl: string): {
205
+ slug: string | undefined;
206
+ pathAfterPrefix: string | null;
207
+ outsidePrefix: boolean;
208
+ } {
209
+ try {
210
+ const parsed = new URL(normalizedUrl);
211
+ let path = parsed.pathname;
212
+
213
+ // Check if the URL is outside the configured path prefix
214
+ if (this.config.pathPrefix) {
215
+ const prefixRegex = new RegExp(
216
+ `^${this.escapeRegex(this.config.pathPrefix)}(/|$)`,
217
+ );
218
+ if (!prefixRegex.test(path)) {
219
+ return {
220
+ slug: undefined,
221
+ pathAfterPrefix: null,
222
+ outsidePrefix: true,
223
+ };
224
+ }
225
+ path = path.replace(
226
+ new RegExp(`^${this.escapeRegex(this.config.pathPrefix)}`),
227
+ "",
228
+ );
229
+ }
230
+
231
+ const slug = path.replace(/^\/+|\/+$/g, "");
232
+ return {
233
+ slug: slug || undefined,
234
+ pathAfterPrefix: path,
235
+ outsidePrefix: false,
236
+ };
237
+ } catch {
238
+ return {
239
+ slug: undefined,
240
+ pathAfterPrefix: null,
241
+ outsidePrefix: false,
242
+ };
243
+ }
244
+ }
245
+
246
+ private escapeRegex(str: string): string {
247
+ return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
248
+ }
249
+
250
+ private normalizeSlug(slug: string): string {
251
+ return slug.replace(/^\/+|\/+$/g, "").toLowerCase();
252
+ }
253
+
254
+ /**
255
+ * Find similar slugs using Levenshtein distance
256
+ */
257
+ private findSimilarSlugs(
258
+ target: string,
259
+ candidates: string[],
260
+ limit: number,
261
+ ): string[] {
262
+ const scored = candidates
263
+ .map((candidate) => ({
264
+ slug: candidate,
265
+ distance: this.levenshteinDistance(target, candidate),
266
+ }))
267
+ .filter((item) => item.distance <= Math.max(target.length * 0.5, 10)) // Only include reasonably similar
268
+ .sort((a, b) => a.distance - b.distance)
269
+ .slice(0, limit);
270
+
271
+ return scored.map((item) => item.slug);
272
+ }
273
+
274
+ /**
275
+ * Calculate Levenshtein distance between two strings
276
+ */
277
+ private levenshteinDistance(a: string, b: string): number {
278
+ if (a.length === 0) return b.length;
279
+ if (b.length === 0) return a.length;
280
+
281
+ const matrix: number[][] = [];
282
+
283
+ for (let i = 0; i <= b.length; i++) {
284
+ matrix[i] = [i];
285
+ }
286
+
287
+ for (let j = 0; j <= a.length; j++) {
288
+ matrix[0]![j] = j;
289
+ }
290
+
291
+ for (let i = 1; i <= b.length; i++) {
292
+ for (let j = 1; j <= a.length; j++) {
293
+ if (b.charAt(i - 1) === a.charAt(j - 1)) {
294
+ matrix[i]![j] = matrix[i - 1]![j - 1]!;
295
+ } else {
296
+ matrix[i]![j] = Math.min(
297
+ matrix[i - 1]![j - 1]! + 1, // substitution
298
+ matrix[i]![j - 1]! + 1, // insertion
299
+ matrix[i - 1]![j]! + 1, // deletion
300
+ );
301
+ }
302
+ }
303
+ }
304
+
305
+ return matrix[b.length]![a.length]!;
306
+ }
307
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,9 @@
1
+ {
2
+ "extends": "@pagebridge/typescript-config/library.json",
3
+ "compilerOptions": {
4
+ "outDir": "./dist",
5
+ "rootDir": "./src"
6
+ },
7
+ "include": ["src/**/*"],
8
+ "exclude": ["node_modules", "dist"]
9
+ }