folderblog 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-24MKFHML.cjs → chunk-2TZSVPNP.cjs} +5 -0
- package/dist/{chunk-HMQIQUPB.cjs → chunk-6TFXNIO6.cjs} +108 -0
- package/dist/{chunk-ZRUBI3GH.js → chunk-B43UAOPC.js} +106 -1
- package/dist/{chunk-XP5J4LFJ.js → chunk-D26H5722.js} +5 -0
- package/dist/chunk-E7PYGJA7.cjs +39 -0
- package/dist/{chunk-QA4KPPTA.cjs → chunk-J3Y3HEBF.cjs} +84 -13
- package/dist/{chunk-PARGDJNY.js → chunk-K76XLEC7.js} +1 -1
- package/dist/{chunk-IXP35S24.js → chunk-LPPBVXJ7.js} +83 -12
- package/dist/chunk-Q6EXKX6K.js +17 -0
- package/dist/{chunk-4ZJGUMHS.cjs → chunk-Q6EYTOTM.cjs} +2 -2
- package/dist/chunk-UCXXH2MP.cjs +20 -0
- package/dist/chunk-XQD3UUL5.js +34 -0
- package/dist/cli/bin.cjs +5 -5
- package/dist/cli/bin.js +4 -4
- package/dist/cli/index.cjs +5 -5
- package/dist/cli/index.js +4 -4
- package/dist/config-ADPY6IQS.d.cts +473 -0
- package/dist/config-Dctsdeo6.d.ts +473 -0
- package/dist/index.cjs +157 -187
- package/dist/index.d.cts +4 -3
- package/dist/index.d.ts +4 -3
- package/dist/index.js +16 -69
- package/dist/local/index.cjs +785 -0
- package/dist/local/index.d.cts +268 -0
- package/dist/local/index.d.ts +268 -0
- package/dist/local/index.js +772 -0
- package/dist/output-0P0br3Jc.d.cts +452 -0
- package/dist/output-0P0br3Jc.d.ts +452 -0
- package/dist/plugins/embed-cloudflare-ai.cjs +166 -0
- package/dist/plugins/embed-cloudflare-ai.d.cts +73 -0
- package/dist/plugins/embed-cloudflare-ai.d.ts +73 -0
- package/dist/plugins/embed-cloudflare-ai.js +156 -0
- package/dist/plugins/embed-transformers.cjs +121 -0
- package/dist/plugins/embed-transformers.d.cts +55 -0
- package/dist/plugins/embed-transformers.d.ts +55 -0
- package/dist/plugins/embed-transformers.js +113 -0
- package/dist/plugins/similarity.cjs +19 -0
- package/dist/plugins/similarity.d.cts +41 -0
- package/dist/plugins/similarity.d.ts +41 -0
- package/dist/plugins/similarity.js +2 -0
- package/dist/processor/index.cjs +123 -111
- package/dist/processor/index.d.cts +6 -2
- package/dist/processor/index.d.ts +6 -2
- package/dist/processor/index.js +3 -3
- package/dist/processor/plugins.cjs +24 -12
- package/dist/processor/plugins.d.cts +4 -2
- package/dist/processor/plugins.d.ts +4 -2
- package/dist/processor/plugins.js +1 -1
- package/dist/processor/types.cjs +16 -16
- package/dist/processor/types.d.cts +3 -2
- package/dist/processor/types.d.ts +3 -2
- package/dist/processor/types.js +1 -1
- package/dist/seo/index.cjs +289 -0
- package/dist/seo/index.d.cts +95 -0
- package/dist/seo/index.d.ts +95 -0
- package/dist/seo/index.js +274 -0
- package/dist/server/index.cjs +2 -5
- package/dist/server/index.js +2 -5
- package/package.json +36 -1
- package/dist/config-DFr-htlO.d.cts +0 -887
- package/dist/config-DFr-htlO.d.ts +0 -887
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Issue tracking types for @repo-md/processor-core
|
|
3
|
+
*/
|
|
4
|
+
type IssueSeverity = 'error' | 'warning' | 'info';
|
|
5
|
+
type IssueCategory = 'broken-link' | 'missing-media' | 'media-processing' | 'slug-conflict' | 'mermaid-error' | 'frontmatter-error' | 'parse-error' | 'file-access' | 'embedding-error' | 'database-error' | 'plugin-error' | 'configuration' | 'other';
|
|
6
|
+
type IssueModule = 'markdown-parser' | 'image-processor' | 'embed-mermaid' | 'embed-media' | 'link-resolver' | 'slug-generator' | 'frontmatter-parser' | 'file-system' | 'config-validator' | 'post-processor' | 'text-embeddings' | 'image-embeddings' | 'similarity' | 'database' | 'plugin-manager' | 'other';
|
|
7
|
+
/**
|
|
8
|
+
* Base interface for all processing issues
|
|
9
|
+
*/
|
|
10
|
+
interface ProcessingIssue {
|
|
11
|
+
readonly severity: IssueSeverity;
|
|
12
|
+
readonly category: IssueCategory;
|
|
13
|
+
readonly module: IssueModule;
|
|
14
|
+
readonly message: string;
|
|
15
|
+
readonly filePath?: string;
|
|
16
|
+
readonly lineNumber?: number;
|
|
17
|
+
readonly columnNumber?: number;
|
|
18
|
+
readonly context?: Readonly<Record<string, unknown>>;
|
|
19
|
+
readonly timestamp: string;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Broken link issue
|
|
23
|
+
*/
|
|
24
|
+
interface BrokenLinkIssue extends ProcessingIssue {
|
|
25
|
+
readonly category: 'broken-link';
|
|
26
|
+
readonly module: 'link-resolver';
|
|
27
|
+
readonly context: {
|
|
28
|
+
readonly linkText: string;
|
|
29
|
+
readonly linkTarget: string;
|
|
30
|
+
readonly linkType: 'wiki' | 'markdown' | 'frontmatter';
|
|
31
|
+
readonly suggestions?: readonly string[];
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Missing media issue
|
|
36
|
+
*/
|
|
37
|
+
interface MissingMediaIssue extends ProcessingIssue {
|
|
38
|
+
readonly category: 'missing-media';
|
|
39
|
+
readonly module: 'embed-media' | 'image-processor';
|
|
40
|
+
readonly context: {
|
|
41
|
+
readonly mediaPath: string;
|
|
42
|
+
readonly referencedFrom: 'content' | 'frontmatter' | 'embed';
|
|
43
|
+
readonly originalReference?: string;
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Media processing issue
|
|
48
|
+
*/
|
|
49
|
+
interface MediaProcessingIssue extends ProcessingIssue {
|
|
50
|
+
readonly category: 'media-processing';
|
|
51
|
+
readonly module: 'image-processor';
|
|
52
|
+
readonly context: {
|
|
53
|
+
readonly mediaPath: string;
|
|
54
|
+
readonly operation: 'read' | 'optimize' | 'resize' | 'hash' | 'copy';
|
|
55
|
+
readonly errorMessage: string;
|
|
56
|
+
readonly errorCode?: string;
|
|
57
|
+
};
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Slug conflict issue
|
|
61
|
+
*/
|
|
62
|
+
interface SlugConflictIssue extends ProcessingIssue {
|
|
63
|
+
readonly category: 'slug-conflict';
|
|
64
|
+
readonly module: 'slug-generator';
|
|
65
|
+
readonly context: {
|
|
66
|
+
readonly originalSlug: string;
|
|
67
|
+
readonly finalSlug: string;
|
|
68
|
+
readonly conflictingFiles: readonly string[];
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Mermaid error issue
|
|
73
|
+
*/
|
|
74
|
+
interface MermaidErrorIssue extends ProcessingIssue {
|
|
75
|
+
readonly category: 'mermaid-error';
|
|
76
|
+
readonly module: 'embed-mermaid';
|
|
77
|
+
readonly context: {
|
|
78
|
+
readonly errorType: 'plugin-load' | 'render-fail' | 'missing-deps';
|
|
79
|
+
readonly diagramContent?: string;
|
|
80
|
+
readonly fallback: string;
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Embedding error issue
|
|
85
|
+
*/
|
|
86
|
+
interface EmbeddingErrorIssue extends ProcessingIssue {
|
|
87
|
+
readonly category: 'embedding-error';
|
|
88
|
+
readonly module: 'text-embeddings' | 'image-embeddings';
|
|
89
|
+
readonly context: {
|
|
90
|
+
readonly embeddingType: 'text' | 'image';
|
|
91
|
+
readonly operation: 'initialize' | 'embed' | 'batch';
|
|
92
|
+
readonly errorMessage: string;
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
/**
|
|
96
|
+
* Plugin error issue
|
|
97
|
+
*/
|
|
98
|
+
interface PluginErrorIssue extends ProcessingIssue {
|
|
99
|
+
readonly category: 'plugin-error';
|
|
100
|
+
readonly module: 'plugin-manager';
|
|
101
|
+
readonly context: {
|
|
102
|
+
readonly pluginName: string;
|
|
103
|
+
readonly operation: 'initialize' | 'process' | 'dispose';
|
|
104
|
+
readonly errorMessage: string;
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
declare const isBrokenLinkIssue: (issue: ProcessingIssue) => issue is BrokenLinkIssue;
|
|
108
|
+
declare const isMissingMediaIssue: (issue: ProcessingIssue) => issue is MissingMediaIssue;
|
|
109
|
+
declare const isMediaProcessingIssue: (issue: ProcessingIssue) => issue is MediaProcessingIssue;
|
|
110
|
+
declare const isSlugConflictIssue: (issue: ProcessingIssue) => issue is SlugConflictIssue;
|
|
111
|
+
declare const isMermaidErrorIssue: (issue: ProcessingIssue) => issue is MermaidErrorIssue;
|
|
112
|
+
declare const isEmbeddingErrorIssue: (issue: ProcessingIssue) => issue is EmbeddingErrorIssue;
|
|
113
|
+
declare const isPluginErrorIssue: (issue: ProcessingIssue) => issue is PluginErrorIssue;
|
|
114
|
+
interface IssueSummary {
|
|
115
|
+
readonly totalIssues: number;
|
|
116
|
+
readonly errorCount: number;
|
|
117
|
+
readonly warningCount: number;
|
|
118
|
+
readonly infoCount: number;
|
|
119
|
+
readonly filesAffected: number;
|
|
120
|
+
readonly categoryCounts: Readonly<Record<IssueCategory, number>>;
|
|
121
|
+
readonly moduleCounts: Readonly<Record<IssueModule, number>>;
|
|
122
|
+
}
|
|
123
|
+
interface IssueReport {
|
|
124
|
+
readonly issues: readonly ProcessingIssue[];
|
|
125
|
+
readonly summary: IssueSummary;
|
|
126
|
+
readonly metadata: {
|
|
127
|
+
readonly processStartTime: string;
|
|
128
|
+
readonly processEndTime: string;
|
|
129
|
+
readonly processorVersion?: string;
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
interface IssueFilterOptions {
|
|
133
|
+
readonly severity?: IssueSeverity | readonly IssueSeverity[];
|
|
134
|
+
readonly category?: IssueCategory | readonly IssueCategory[];
|
|
135
|
+
readonly module?: IssueModule | readonly IssueModule[];
|
|
136
|
+
readonly filePath?: string | readonly string[];
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Cache types for @repo-md/processor-core
|
|
141
|
+
*
|
|
142
|
+
* These types support incremental builds by allowing the processor
|
|
143
|
+
* to skip re-processing files that haven't changed.
|
|
144
|
+
*/
|
|
145
|
+
|
|
146
|
+
/**
|
|
147
|
+
* Cached metadata for a previously processed media file.
|
|
148
|
+
* Contains all information needed to skip image processing
|
|
149
|
+
* while still generating correct HTML output with dimensions.
|
|
150
|
+
*/
|
|
151
|
+
interface CachedMediaMetadata {
|
|
152
|
+
/** Original image width */
|
|
153
|
+
readonly width: number;
|
|
154
|
+
/** Original image height */
|
|
155
|
+
readonly height: number;
|
|
156
|
+
/** Output format (webp, avif, jpeg, png) */
|
|
157
|
+
readonly format: string;
|
|
158
|
+
/** Output file size in bytes */
|
|
159
|
+
readonly size: number;
|
|
160
|
+
/** Original file size in bytes */
|
|
161
|
+
readonly originalSize?: number;
|
|
162
|
+
/** Output file path relative to media output dir */
|
|
163
|
+
readonly outputPath: string;
|
|
164
|
+
/** Responsive size variants */
|
|
165
|
+
readonly sizes: readonly CachedMediaSizeVariant[];
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Cached size variant for responsive images
|
|
169
|
+
*/
|
|
170
|
+
interface CachedMediaSizeVariant {
|
|
171
|
+
/** Size suffix (xs, sm, md, lg, xl) */
|
|
172
|
+
readonly suffix: string;
|
|
173
|
+
/** Output file path relative to media output dir */
|
|
174
|
+
readonly outputPath: string;
|
|
175
|
+
/** Width of this variant */
|
|
176
|
+
readonly width: number;
|
|
177
|
+
/** Height of this variant */
|
|
178
|
+
readonly height: number;
|
|
179
|
+
/** File size in bytes */
|
|
180
|
+
readonly size: number;
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Cache context passed to the processor for incremental builds.
|
|
184
|
+
* All caches are keyed by content hash (SHA-256).
|
|
185
|
+
*/
|
|
186
|
+
interface CacheContext {
|
|
187
|
+
/**
|
|
188
|
+
* Cached media metadata keyed by content hash.
|
|
189
|
+
* When a media file's hash matches a cached entry, processing can be skipped
|
|
190
|
+
* and the cached metadata used for HTML rendering (dimensions, paths).
|
|
191
|
+
*/
|
|
192
|
+
readonly media?: ReadonlyMap<string, CachedMediaMetadata>;
|
|
193
|
+
/**
|
|
194
|
+
* Cached text embeddings keyed by post content hash.
|
|
195
|
+
* When a post's hash matches a cached entry, embedding generation
|
|
196
|
+
* can be skipped and the cached vector used directly.
|
|
197
|
+
*/
|
|
198
|
+
readonly textEmbeddings?: ReadonlyMap<string, readonly number[]>;
|
|
199
|
+
/**
|
|
200
|
+
* Cached image embeddings keyed by media content hash.
|
|
201
|
+
* When a media file's hash matches a cached entry, CLIP embedding
|
|
202
|
+
* generation can be skipped and the cached vector used directly.
|
|
203
|
+
*/
|
|
204
|
+
readonly imageEmbeddings?: ReadonlyMap<string, readonly number[]>;
|
|
205
|
+
}
|
|
206
|
+
/**
|
|
207
|
+
* Statistics about cache usage during processing
|
|
208
|
+
*/
|
|
209
|
+
interface CacheStats {
|
|
210
|
+
/** Number of media files that used cached metadata */
|
|
211
|
+
readonly mediaCacheHits: number;
|
|
212
|
+
/** Number of media files that were processed fresh */
|
|
213
|
+
readonly mediaCacheMisses: number;
|
|
214
|
+
/** Number of posts that used cached text embeddings */
|
|
215
|
+
readonly textEmbeddingCacheHits: number;
|
|
216
|
+
/** Number of posts that required fresh text embedding generation */
|
|
217
|
+
readonly textEmbeddingCacheMisses: number;
|
|
218
|
+
/** Number of media files that used cached image embeddings */
|
|
219
|
+
readonly imageEmbeddingCacheHits: number;
|
|
220
|
+
/** Number of media files that required fresh image embedding generation */
|
|
221
|
+
readonly imageEmbeddingCacheMisses: number;
|
|
222
|
+
}
|
|
223
|
+
/**
|
|
224
|
+
* Create empty cache statistics
|
|
225
|
+
*/
|
|
226
|
+
declare const createEmptyCacheStats: () => CacheStats;
|
|
227
|
+
/**
|
|
228
|
+
* Build a media cache from a medias.json file structure
|
|
229
|
+
*/
|
|
230
|
+
declare function buildMediaCacheFromManifest(medias: readonly {
|
|
231
|
+
metadata?: MediaMetadata;
|
|
232
|
+
outputPath: string;
|
|
233
|
+
sizes?: readonly MediaSizeVariant[];
|
|
234
|
+
}[]): Map<string, CachedMediaMetadata>;
|
|
235
|
+
/**
|
|
236
|
+
* Build an embedding cache from an embedding hash map file structure
|
|
237
|
+
*/
|
|
238
|
+
declare function buildEmbeddingCacheFromManifest(embeddingMap: Record<string, readonly number[]>): Map<string, readonly number[]>;
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Output types for @repo-md/processor-core
|
|
242
|
+
*/
|
|
243
|
+
|
|
244
|
+
interface TocItem {
|
|
245
|
+
readonly text: string;
|
|
246
|
+
readonly depth: number;
|
|
247
|
+
readonly slug: string;
|
|
248
|
+
}
|
|
249
|
+
interface PostMetadata {
|
|
250
|
+
readonly createdAt: string;
|
|
251
|
+
readonly modifiedAt: string;
|
|
252
|
+
readonly processedAt: string;
|
|
253
|
+
readonly gitCreatedAt?: string;
|
|
254
|
+
readonly gitModifiedAt?: string;
|
|
255
|
+
}
|
|
256
|
+
interface PostCoverSize {
|
|
257
|
+
/** Size suffix (xs, sm, md, lg, xl) */
|
|
258
|
+
readonly suffix: string;
|
|
259
|
+
/** Output file path relative to output dir */
|
|
260
|
+
readonly path: string;
|
|
261
|
+
/** Full URL if domain configured */
|
|
262
|
+
readonly url?: string;
|
|
263
|
+
/** Width of this variant */
|
|
264
|
+
readonly width: number;
|
|
265
|
+
/** Height of this variant */
|
|
266
|
+
readonly height: number;
|
|
267
|
+
}
|
|
268
|
+
interface PostCover {
|
|
269
|
+
/** Original path from frontmatter */
|
|
270
|
+
readonly original: string;
|
|
271
|
+
/** Output file path relative to output dir */
|
|
272
|
+
readonly path: string;
|
|
273
|
+
/** Full URL if domain configured */
|
|
274
|
+
readonly url?: string;
|
|
275
|
+
/** Content hash */
|
|
276
|
+
readonly hash?: string;
|
|
277
|
+
/** Image width */
|
|
278
|
+
readonly width?: number;
|
|
279
|
+
/** Image height */
|
|
280
|
+
readonly height?: number;
|
|
281
|
+
/** Responsive image size variants */
|
|
282
|
+
readonly sizes?: readonly PostCoverSize[];
|
|
283
|
+
}
|
|
284
|
+
interface PostCoverError {
|
|
285
|
+
/** Original path from frontmatter */
|
|
286
|
+
readonly original: string;
|
|
287
|
+
/** Error message explaining why cover couldn't be resolved */
|
|
288
|
+
readonly error: string;
|
|
289
|
+
}
|
|
290
|
+
interface ProcessedPost {
|
|
291
|
+
/** Content hash for identification */
|
|
292
|
+
readonly hash: string;
|
|
293
|
+
/** URL-friendly slug */
|
|
294
|
+
readonly slug: string;
|
|
295
|
+
/** Post title (from frontmatter or filename) */
|
|
296
|
+
readonly title: string;
|
|
297
|
+
/** Original filename without extension */
|
|
298
|
+
readonly fileName: string;
|
|
299
|
+
/** Original file path relative to input directory */
|
|
300
|
+
readonly originalPath: string;
|
|
301
|
+
/** Rendered HTML content */
|
|
302
|
+
readonly content: string;
|
|
303
|
+
/** Original markdown content */
|
|
304
|
+
readonly markdown: string;
|
|
305
|
+
/** Plain text content (for search/embeddings) */
|
|
306
|
+
readonly plainText: string;
|
|
307
|
+
/** Excerpt/summary text */
|
|
308
|
+
readonly excerpt: string;
|
|
309
|
+
/** Word count */
|
|
310
|
+
readonly wordCount: number;
|
|
311
|
+
/** Table of contents */
|
|
312
|
+
readonly toc: readonly TocItem[];
|
|
313
|
+
/** Frontmatter data */
|
|
314
|
+
readonly frontmatter: Readonly<Record<string, unknown>>;
|
|
315
|
+
/** Timestamps and metadata */
|
|
316
|
+
readonly metadata: PostMetadata;
|
|
317
|
+
/** Cover image (resolved from frontmatter.cover) */
|
|
318
|
+
readonly cover?: PostCover | PostCoverError;
|
|
319
|
+
/** URL of first image (for cover) */
|
|
320
|
+
readonly firstImage?: string;
|
|
321
|
+
/** Hashes of posts this post links to */
|
|
322
|
+
readonly links?: readonly string[];
|
|
323
|
+
/** Text embedding vector */
|
|
324
|
+
readonly embedding?: readonly number[];
|
|
325
|
+
}
|
|
326
|
+
interface MediaMetadata {
|
|
327
|
+
readonly width?: number;
|
|
328
|
+
readonly height?: number;
|
|
329
|
+
readonly format?: string;
|
|
330
|
+
readonly size?: number;
|
|
331
|
+
readonly originalSize?: number;
|
|
332
|
+
readonly hash?: string;
|
|
333
|
+
}
|
|
334
|
+
interface MediaSizeVariant {
|
|
335
|
+
/** Size suffix (xs, sm, md, lg, xl) */
|
|
336
|
+
readonly suffix: string;
|
|
337
|
+
/** Output file path relative to output dir */
|
|
338
|
+
readonly outputPath: string;
|
|
339
|
+
/** Width of this variant */
|
|
340
|
+
readonly width: number;
|
|
341
|
+
/** Height of this variant */
|
|
342
|
+
readonly height: number;
|
|
343
|
+
/** File size in bytes */
|
|
344
|
+
readonly size: number;
|
|
345
|
+
}
|
|
346
|
+
interface ProcessedMedia {
|
|
347
|
+
/** Original file path relative to input */
|
|
348
|
+
readonly originalPath: string;
|
|
349
|
+
/** Output file path relative to output dir */
|
|
350
|
+
readonly outputPath: string;
|
|
351
|
+
/** Original filename */
|
|
352
|
+
readonly fileName: string;
|
|
353
|
+
/** Media type */
|
|
354
|
+
readonly type: 'image' | 'video' | 'audio' | 'media';
|
|
355
|
+
/** File metadata */
|
|
356
|
+
readonly metadata?: MediaMetadata;
|
|
357
|
+
/** Responsive image size variants */
|
|
358
|
+
readonly sizes?: readonly MediaSizeVariant[];
|
|
359
|
+
/** Image embedding vector */
|
|
360
|
+
readonly embedding?: readonly number[];
|
|
361
|
+
}
|
|
362
|
+
type RelationshipType = 'POST_LINKS_TO_POST' | 'POST_USE_IMAGE';
|
|
363
|
+
interface GraphNode {
|
|
364
|
+
readonly id: string;
|
|
365
|
+
readonly type: 'post' | 'media';
|
|
366
|
+
readonly label: string;
|
|
367
|
+
}
|
|
368
|
+
interface GraphEdge {
|
|
369
|
+
readonly source: string;
|
|
370
|
+
readonly target: string;
|
|
371
|
+
readonly type: RelationshipType;
|
|
372
|
+
}
|
|
373
|
+
interface GraphData {
|
|
374
|
+
readonly nodes: readonly GraphNode[];
|
|
375
|
+
readonly edges: readonly GraphEdge[];
|
|
376
|
+
}
|
|
377
|
+
/** Maps original paths to optimized public paths */
|
|
378
|
+
type MediaPathMap = Readonly<Record<string, string>>;
|
|
379
|
+
/** Maps original paths to content hashes */
|
|
380
|
+
type PathHashMap = Readonly<Record<string, string>>;
|
|
381
|
+
/** Maps slugs to post hashes */
|
|
382
|
+
type SlugHashMap = Readonly<Record<string, string>>;
|
|
383
|
+
/** Maps hashes to embedding vectors */
|
|
384
|
+
type EmbeddingMap = ReadonlyMap<string, readonly number[]>;
|
|
385
|
+
interface OutputFiles {
|
|
386
|
+
readonly posts: string;
|
|
387
|
+
readonly media: string;
|
|
388
|
+
readonly slugMap: string;
|
|
389
|
+
readonly pathMap: string;
|
|
390
|
+
readonly issues: string;
|
|
391
|
+
readonly graph?: string;
|
|
392
|
+
readonly similarity?: string;
|
|
393
|
+
readonly database?: string;
|
|
394
|
+
}
|
|
395
|
+
interface BuildReport {
|
|
396
|
+
/** Text embedding stats (if embeddings were generated) */
|
|
397
|
+
readonly postEmbeddings?: {
|
|
398
|
+
readonly filesProcessed: number;
|
|
399
|
+
readonly dimensions: number;
|
|
400
|
+
readonly model: string;
|
|
401
|
+
};
|
|
402
|
+
/** Image embedding stats (if image embeddings were generated) */
|
|
403
|
+
readonly mediaEmbeddings?: {
|
|
404
|
+
readonly filesProcessed: number;
|
|
405
|
+
readonly dimensions: number;
|
|
406
|
+
readonly model: string;
|
|
407
|
+
};
|
|
408
|
+
/** Similarity computation stats */
|
|
409
|
+
readonly similarity?: {
|
|
410
|
+
readonly pairsComputed: number;
|
|
411
|
+
readonly topN: number;
|
|
412
|
+
readonly postsWithEmbeddings: number;
|
|
413
|
+
};
|
|
414
|
+
/** Phase timing in milliseconds */
|
|
415
|
+
readonly timing?: {
|
|
416
|
+
readonly totalMs: number;
|
|
417
|
+
readonly phases: Readonly<Record<string, number>>;
|
|
418
|
+
};
|
|
419
|
+
}
|
|
420
|
+
interface ProcessResult {
|
|
421
|
+
/** Processed posts */
|
|
422
|
+
readonly posts: readonly ProcessedPost[];
|
|
423
|
+
/** Processed media files */
|
|
424
|
+
readonly media: readonly ProcessedMedia[];
|
|
425
|
+
/** Output directory path */
|
|
426
|
+
readonly outputDir: string;
|
|
427
|
+
/** Output file paths */
|
|
428
|
+
readonly outputFiles: OutputFiles;
|
|
429
|
+
/** Processing issues report */
|
|
430
|
+
readonly issues: IssueReport;
|
|
431
|
+
/** Graph data (if trackRelationships enabled) */
|
|
432
|
+
readonly graph?: GraphData;
|
|
433
|
+
/** Cache statistics (if cache was provided) */
|
|
434
|
+
readonly cacheStats?: CacheStats;
|
|
435
|
+
/** Build report with stats about embeddings, similarity, timing */
|
|
436
|
+
readonly report?: BuildReport;
|
|
437
|
+
}
|
|
438
|
+
declare const OUTPUT_FILES: {
|
|
439
|
+
readonly POSTS: "posts.json";
|
|
440
|
+
readonly POSTS_SLUG_MAP: "posts-slug-map.json";
|
|
441
|
+
readonly POSTS_PATH_MAP: "posts-path-map.json";
|
|
442
|
+
readonly MEDIAS: "media.json";
|
|
443
|
+
readonly MEDIA_PATH_MAP: "media-path-map.json";
|
|
444
|
+
readonly GRAPH: "graph.json";
|
|
445
|
+
readonly TEXT_EMBEDDINGS: "posts-embedding-hash-map.json";
|
|
446
|
+
readonly IMAGE_EMBEDDINGS: "media-embedding-hash-map.json";
|
|
447
|
+
readonly SIMILARITY: "similarity.json";
|
|
448
|
+
readonly DATABASE: "repo.db";
|
|
449
|
+
readonly ISSUES: "processor-issues.json";
|
|
450
|
+
};
|
|
451
|
+
|
|
452
|
+
export { type ProcessResult as A, type BrokenLinkIssue as B, type CacheContext as C, type SlugHashMap as D, type EmbeddingErrorIssue as E, buildEmbeddingCacheFromManifest as F, type GraphData as G, buildMediaCacheFromManifest as H, type IssueFilterOptions as I, createEmptyCacheStats as J, isBrokenLinkIssue as K, isEmbeddingErrorIssue as L, type MediaMetadata as M, isMediaProcessingIssue as N, OUTPUT_FILES as O, type ProcessingIssue as P, isMermaidErrorIssue as Q, type RelationshipType as R, type SlugConflictIssue as S, type TocItem as T, isMissingMediaIssue as U, isPluginErrorIssue as V, isSlugConflictIssue as W, type IssueSeverity as a, type IssueReport as b, type IssueSummary as c, type ProcessedPost as d, type ProcessedMedia as e, type BuildReport as f, type CacheStats as g, type CachedMediaMetadata as h, type CachedMediaSizeVariant as i, type EmbeddingMap as j, type GraphEdge as k, type GraphNode as l, type IssueCategory as m, type IssueModule as n, type MediaPathMap as o, type MediaProcessingIssue as p, type MediaSizeVariant as q, type MermaidErrorIssue as r, type MissingMediaIssue as s, type OutputFiles as t, type PathHashMap as u, type PluginErrorIssue as v, type PostCover as w, type PostCoverError as x, type PostCoverSize as y, type PostMetadata as z };
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, '__esModule', { value: true });
|
|
4
|
+
|
|
5
|
+
require('../chunk-OBGZSXTJ.cjs');
|
|
6
|
+
|
|
7
|
+
// src/plugins/embed-cloudflare-ai.ts
|
|
8
|
+
var DEFAULT_MODEL = "@cf/baai/bge-small-en-v1.5";
|
|
9
|
+
var MODEL_DIMENSIONS = {
|
|
10
|
+
"@cf/baai/bge-small-en-v1.5": 384,
|
|
11
|
+
"@cf/baai/bge-base-en-v1.5": 768,
|
|
12
|
+
"@cf/baai/bge-large-en-v1.5": 1024,
|
|
13
|
+
"@cf/baai/bge-m3": 1024
|
|
14
|
+
};
|
|
15
|
+
var CloudflareAITextEmbedder = class {
|
|
16
|
+
name = "textEmbedder";
|
|
17
|
+
model;
|
|
18
|
+
dimensions;
|
|
19
|
+
ready = false;
|
|
20
|
+
context = null;
|
|
21
|
+
options;
|
|
22
|
+
constructor(options = {}) {
|
|
23
|
+
this.options = {
|
|
24
|
+
binding: options.binding,
|
|
25
|
+
accountId: options.accountId,
|
|
26
|
+
apiToken: options.apiToken,
|
|
27
|
+
modelId: options.modelId ?? DEFAULT_MODEL,
|
|
28
|
+
pooling: options.pooling ?? "cls",
|
|
29
|
+
batchSize: options.batchSize ?? 100,
|
|
30
|
+
timeout: options.timeout ?? 3e4
|
|
31
|
+
};
|
|
32
|
+
this.model = this.options.modelId;
|
|
33
|
+
this.dimensions = MODEL_DIMENSIONS[this.model] ?? 384;
|
|
34
|
+
}
|
|
35
|
+
async initialize(context) {
|
|
36
|
+
this.context = context;
|
|
37
|
+
const mode = this.options.binding ? "binding" : "REST API";
|
|
38
|
+
context.log(
|
|
39
|
+
`Initializing CloudflareAITextEmbedder (${mode} mode, model: ${this.model}, pooling: ${this.options.pooling})`,
|
|
40
|
+
"info"
|
|
41
|
+
);
|
|
42
|
+
if (!this.options.binding && (!this.options.accountId || !this.options.apiToken)) {
|
|
43
|
+
throw new Error(
|
|
44
|
+
"CloudflareAITextEmbedder requires either a binding (env.AI) or accountId + apiToken for REST API mode"
|
|
45
|
+
);
|
|
46
|
+
}
|
|
47
|
+
try {
|
|
48
|
+
await this.embed("test");
|
|
49
|
+
context.log(
|
|
50
|
+
`CloudflareAITextEmbedder initialized (${this.dimensions} dimensions)`,
|
|
51
|
+
"info"
|
|
52
|
+
);
|
|
53
|
+
this.ready = true;
|
|
54
|
+
} catch (error) {
|
|
55
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
56
|
+
context.log(`Failed to initialize Cloudflare AI embedder: ${msg}`, "error");
|
|
57
|
+
throw error;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
isReady() {
|
|
61
|
+
return this.ready;
|
|
62
|
+
}
|
|
63
|
+
async dispose() {
|
|
64
|
+
this.ready = false;
|
|
65
|
+
}
|
|
66
|
+
async embed(text) {
|
|
67
|
+
const results = await this.batchEmbed([text]);
|
|
68
|
+
const result = results[0];
|
|
69
|
+
if (!result) {
|
|
70
|
+
throw new Error("Failed to generate embedding");
|
|
71
|
+
}
|
|
72
|
+
return result;
|
|
73
|
+
}
|
|
74
|
+
async batchEmbed(texts) {
|
|
75
|
+
if (texts.length === 0) {
|
|
76
|
+
return [];
|
|
77
|
+
}
|
|
78
|
+
const results = [];
|
|
79
|
+
const batchSize = this.options.batchSize;
|
|
80
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
81
|
+
const batch = texts.slice(i, i + batchSize);
|
|
82
|
+
const batchResults = await this.embedBatch(batch);
|
|
83
|
+
results.push(...batchResults);
|
|
84
|
+
if (texts.length > 50 && this.context) {
|
|
85
|
+
this.context.log(
|
|
86
|
+
`Embedding progress: ${Math.min(i + batchSize, texts.length)}/${texts.length}`,
|
|
87
|
+
"debug"
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return results;
|
|
92
|
+
}
|
|
93
|
+
async embedBatch(texts) {
|
|
94
|
+
if (this.options.binding) {
|
|
95
|
+
return this.embedWithBinding(texts);
|
|
96
|
+
}
|
|
97
|
+
return this.embedWithRestApi(texts);
|
|
98
|
+
}
|
|
99
|
+
async embedWithBinding(texts) {
|
|
100
|
+
const binding = this.options.binding;
|
|
101
|
+
const response = await binding.run(this.model, {
|
|
102
|
+
text: texts,
|
|
103
|
+
pooling: this.options.pooling
|
|
104
|
+
});
|
|
105
|
+
if (!response?.data) {
|
|
106
|
+
throw new Error("Invalid response from Cloudflare AI binding");
|
|
107
|
+
}
|
|
108
|
+
return response.data;
|
|
109
|
+
}
|
|
110
|
+
async embedWithRestApi(texts) {
|
|
111
|
+
const url = `https://api.cloudflare.com/client/v4/accounts/${this.options.accountId}/ai/run/${this.model}`;
|
|
112
|
+
const controller = new AbortController();
|
|
113
|
+
const timeoutId = setTimeout(() => controller.abort(), this.options.timeout);
|
|
114
|
+
try {
|
|
115
|
+
const response = await fetch(url, {
|
|
116
|
+
method: "POST",
|
|
117
|
+
headers: {
|
|
118
|
+
"Authorization": `Bearer ${this.options.apiToken}`,
|
|
119
|
+
"Content-Type": "application/json"
|
|
120
|
+
},
|
|
121
|
+
body: JSON.stringify({
|
|
122
|
+
text: texts,
|
|
123
|
+
pooling: this.options.pooling
|
|
124
|
+
}),
|
|
125
|
+
signal: controller.signal
|
|
126
|
+
});
|
|
127
|
+
if (!response.ok) {
|
|
128
|
+
const errorText = await response.text();
|
|
129
|
+
throw new Error(`Cloudflare AI API error (${response.status}): ${errorText}`);
|
|
130
|
+
}
|
|
131
|
+
const data = await response.json();
|
|
132
|
+
if (!data.success || !data.result?.data) {
|
|
133
|
+
const errorMsg = data.errors?.[0]?.message ?? "Unknown error";
|
|
134
|
+
throw new Error(`Cloudflare AI API error: ${errorMsg}`);
|
|
135
|
+
}
|
|
136
|
+
return data.result.data;
|
|
137
|
+
} finally {
|
|
138
|
+
clearTimeout(timeoutId);
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
};
|
|
142
|
+
var createCloudflareAIEmbedderWithBinding = (binding, options) => {
|
|
143
|
+
return new CloudflareAITextEmbedder({ ...options, binding });
|
|
144
|
+
};
|
|
145
|
+
var createCloudflareAIEmbedderWithRestApi = (accountId, apiToken, options) => {
|
|
146
|
+
return new CloudflareAITextEmbedder({ ...options, accountId, apiToken });
|
|
147
|
+
};
|
|
148
|
+
var createCloudflareAIEmbedderFromEnv = (options) => {
|
|
149
|
+
const accountId = process.env["CLOUDFLARE_ACCOUNT_ID"];
|
|
150
|
+
const apiToken = process.env["CLOUDFLARE_API_TOKEN"];
|
|
151
|
+
if (!accountId || !apiToken) {
|
|
152
|
+
throw new Error(
|
|
153
|
+
"CLOUDFLARE_ACCOUNT_ID and CLOUDFLARE_API_TOKEN environment variables are required"
|
|
154
|
+
);
|
|
155
|
+
}
|
|
156
|
+
return new CloudflareAITextEmbedder({ ...options, accountId, apiToken });
|
|
157
|
+
};
|
|
158
|
+
var embed_cloudflare_ai_default = CloudflareAITextEmbedder;
|
|
159
|
+
|
|
160
|
+
exports.CLOUDFLARE_DEFAULT_MODEL = DEFAULT_MODEL;
|
|
161
|
+
exports.CLOUDFLARE_MODEL_DIMENSIONS = MODEL_DIMENSIONS;
|
|
162
|
+
exports.CloudflareAITextEmbedder = CloudflareAITextEmbedder;
|
|
163
|
+
exports.createCloudflareAIEmbedderFromEnv = createCloudflareAIEmbedderFromEnv;
|
|
164
|
+
exports.createCloudflareAIEmbedderWithBinding = createCloudflareAIEmbedderWithBinding;
|
|
165
|
+
exports.createCloudflareAIEmbedderWithRestApi = createCloudflareAIEmbedderWithRestApi;
|
|
166
|
+
exports.default = embed_cloudflare_ai_default;
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { T as TextEmbeddingPlugin, P as PluginContext } from '../config-ADPY6IQS.cjs';
|
|
2
|
+
import '../output-0P0br3Jc.cjs';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Cloudflare Workers AI Text Embedding Plugin
|
|
6
|
+
*
|
|
7
|
+
* Uses Cloudflare Workers AI for text embeddings.
|
|
8
|
+
* Supports both:
|
|
9
|
+
* - Binding mode: When running inside a Cloudflare Worker (env.AI)
|
|
10
|
+
* - REST API mode: When running outside CF (uses fetch)
|
|
11
|
+
*
|
|
12
|
+
* Available models:
|
|
13
|
+
* - @cf/baai/bge-small-en-v1.5 (384 dims, fast, English)
|
|
14
|
+
* - @cf/baai/bge-base-en-v1.5 (768 dims, balanced, English)
|
|
15
|
+
* - @cf/baai/bge-large-en-v1.5 (1024 dims, best quality, English)
|
|
16
|
+
* - @cf/baai/bge-m3 (1024 dims, multilingual)
|
|
17
|
+
*
|
|
18
|
+
* @example
|
|
19
|
+
* ```ts
|
|
20
|
+
* // In a Cloudflare Worker:
|
|
21
|
+
* import { createCloudflareAIEmbedderWithBinding } from 'folderblog/plugins/embed-cloudflare-ai';
|
|
22
|
+
* const embedder = createCloudflareAIEmbedderWithBinding(env.AI);
|
|
23
|
+
*
|
|
24
|
+
* // External via REST API:
|
|
25
|
+
* import { createCloudflareAIEmbedderFromEnv } from 'folderblog/plugins/embed-cloudflare-ai';
|
|
26
|
+
* const embedder = createCloudflareAIEmbedderFromEnv();
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
interface CloudflareAIBinding {
|
|
31
|
+
run<T = unknown>(model: string, input: Record<string, unknown>): Promise<T>;
|
|
32
|
+
}
|
|
33
|
+
type PoolingStrategy = 'mean' | 'cls';
|
|
34
|
+
interface CloudflareAIEmbedderOptions {
|
|
35
|
+
/** Cloudflare Workers AI binding (env.AI). When provided, uses direct binding. */
|
|
36
|
+
readonly binding?: CloudflareAIBinding;
|
|
37
|
+
/** Cloudflare Account ID (required for REST API mode) */
|
|
38
|
+
readonly accountId?: string;
|
|
39
|
+
/** Cloudflare API Token with Workers AI permissions (required for REST API mode) */
|
|
40
|
+
readonly apiToken?: string;
|
|
41
|
+
/** Model ID (default: '@cf/baai/bge-small-en-v1.5') */
|
|
42
|
+
readonly modelId?: string;
|
|
43
|
+
/** Pooling strategy (default: 'cls' for better accuracy) */
|
|
44
|
+
readonly pooling?: PoolingStrategy;
|
|
45
|
+
/** Batch size for concurrent requests (default: 100) */
|
|
46
|
+
readonly batchSize?: number;
|
|
47
|
+
/** Request timeout in ms (default: 30000) */
|
|
48
|
+
readonly timeout?: number;
|
|
49
|
+
}
|
|
50
|
+
declare const DEFAULT_MODEL = "@cf/baai/bge-small-en-v1.5";
|
|
51
|
+
declare const MODEL_DIMENSIONS: Record<string, number>;
|
|
52
|
+
declare class CloudflareAITextEmbedder implements TextEmbeddingPlugin {
|
|
53
|
+
readonly name: "textEmbedder";
|
|
54
|
+
readonly model: string;
|
|
55
|
+
readonly dimensions: number;
|
|
56
|
+
private ready;
|
|
57
|
+
private context;
|
|
58
|
+
private options;
|
|
59
|
+
constructor(options?: CloudflareAIEmbedderOptions);
|
|
60
|
+
initialize(context: PluginContext): Promise<void>;
|
|
61
|
+
isReady(): boolean;
|
|
62
|
+
dispose(): Promise<void>;
|
|
63
|
+
embed(text: string): Promise<readonly number[]>;
|
|
64
|
+
batchEmbed(texts: readonly string[]): Promise<readonly (readonly number[])[]>;
|
|
65
|
+
private embedBatch;
|
|
66
|
+
private embedWithBinding;
|
|
67
|
+
private embedWithRestApi;
|
|
68
|
+
}
|
|
69
|
+
declare const createCloudflareAIEmbedderWithBinding: (binding: CloudflareAIBinding, options?: Omit<CloudflareAIEmbedderOptions, "binding" | "accountId" | "apiToken">) => TextEmbeddingPlugin;
|
|
70
|
+
declare const createCloudflareAIEmbedderWithRestApi: (accountId: string, apiToken: string, options?: Omit<CloudflareAIEmbedderOptions, "binding" | "accountId" | "apiToken">) => TextEmbeddingPlugin;
|
|
71
|
+
declare const createCloudflareAIEmbedderFromEnv: (options?: Omit<CloudflareAIEmbedderOptions, "binding" | "accountId" | "apiToken">) => TextEmbeddingPlugin;
|
|
72
|
+
|
|
73
|
+
export { DEFAULT_MODEL as CLOUDFLARE_DEFAULT_MODEL, MODEL_DIMENSIONS as CLOUDFLARE_MODEL_DIMENSIONS, type CloudflareAIBinding, type CloudflareAIEmbedderOptions, CloudflareAITextEmbedder, type PoolingStrategy, createCloudflareAIEmbedderFromEnv, createCloudflareAIEmbedderWithBinding, createCloudflareAIEmbedderWithRestApi, CloudflareAITextEmbedder as default };
|