folderblog 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/dist/{chunk-24MKFHML.cjs → chunk-2TZSVPNP.cjs} +5 -0
  2. package/dist/{chunk-HMQIQUPB.cjs → chunk-6TFXNIO6.cjs} +108 -0
  3. package/dist/{chunk-ZRUBI3GH.js → chunk-B43UAOPC.js} +106 -1
  4. package/dist/{chunk-XP5J4LFJ.js → chunk-D26H5722.js} +5 -0
  5. package/dist/chunk-E7PYGJA7.cjs +39 -0
  6. package/dist/{chunk-QA4KPPTA.cjs → chunk-J3Y3HEBF.cjs} +84 -13
  7. package/dist/{chunk-PARGDJNY.js → chunk-K76XLEC7.js} +1 -1
  8. package/dist/{chunk-IXP35S24.js → chunk-LPPBVXJ7.js} +83 -12
  9. package/dist/chunk-Q6EXKX6K.js +17 -0
  10. package/dist/{chunk-4ZJGUMHS.cjs → chunk-Q6EYTOTM.cjs} +2 -2
  11. package/dist/chunk-UCXXH2MP.cjs +20 -0
  12. package/dist/chunk-XQD3UUL5.js +34 -0
  13. package/dist/cli/bin.cjs +5 -5
  14. package/dist/cli/bin.js +4 -4
  15. package/dist/cli/index.cjs +5 -5
  16. package/dist/cli/index.js +4 -4
  17. package/dist/config-ADPY6IQS.d.cts +473 -0
  18. package/dist/config-Dctsdeo6.d.ts +473 -0
  19. package/dist/index.cjs +157 -187
  20. package/dist/index.d.cts +4 -3
  21. package/dist/index.d.ts +4 -3
  22. package/dist/index.js +16 -69
  23. package/dist/local/index.cjs +785 -0
  24. package/dist/local/index.d.cts +268 -0
  25. package/dist/local/index.d.ts +268 -0
  26. package/dist/local/index.js +772 -0
  27. package/dist/output-0P0br3Jc.d.cts +452 -0
  28. package/dist/output-0P0br3Jc.d.ts +452 -0
  29. package/dist/plugins/embed-cloudflare-ai.cjs +166 -0
  30. package/dist/plugins/embed-cloudflare-ai.d.cts +73 -0
  31. package/dist/plugins/embed-cloudflare-ai.d.ts +73 -0
  32. package/dist/plugins/embed-cloudflare-ai.js +156 -0
  33. package/dist/plugins/embed-transformers.cjs +121 -0
  34. package/dist/plugins/embed-transformers.d.cts +55 -0
  35. package/dist/plugins/embed-transformers.d.ts +55 -0
  36. package/dist/plugins/embed-transformers.js +113 -0
  37. package/dist/plugins/similarity.cjs +19 -0
  38. package/dist/plugins/similarity.d.cts +41 -0
  39. package/dist/plugins/similarity.d.ts +41 -0
  40. package/dist/plugins/similarity.js +2 -0
  41. package/dist/processor/index.cjs +123 -111
  42. package/dist/processor/index.d.cts +6 -2
  43. package/dist/processor/index.d.ts +6 -2
  44. package/dist/processor/index.js +3 -3
  45. package/dist/processor/plugins.cjs +24 -12
  46. package/dist/processor/plugins.d.cts +4 -2
  47. package/dist/processor/plugins.d.ts +4 -2
  48. package/dist/processor/plugins.js +1 -1
  49. package/dist/processor/types.cjs +16 -16
  50. package/dist/processor/types.d.cts +3 -2
  51. package/dist/processor/types.d.ts +3 -2
  52. package/dist/processor/types.js +1 -1
  53. package/dist/seo/index.cjs +289 -0
  54. package/dist/seo/index.d.cts +95 -0
  55. package/dist/seo/index.d.ts +95 -0
  56. package/dist/seo/index.js +274 -0
  57. package/dist/server/index.cjs +2 -5
  58. package/dist/server/index.js +2 -5
  59. package/package.json +36 -1
  60. package/dist/config-DFr-htlO.d.cts +0 -887
  61. package/dist/config-DFr-htlO.d.ts +0 -887
@@ -46,6 +46,11 @@ var withDefaults = (config) => ({
46
46
  removeDeadLinks: false,
47
47
  ...config.pipeline
48
48
  },
49
+ similarity: {
50
+ topN: 5,
51
+ threshold: 0,
52
+ ...config.similarity
53
+ },
49
54
  debug: {
50
55
  level: 0,
51
56
  timing: false,
@@ -374,14 +374,122 @@ var createAllNoOpPlugins = () => ({
374
374
  database: new NoOpDatabase()
375
375
  });
376
376
 
377
+ // ../processor/src/plugins/similarity.ts
378
+ var CosineSimilarityPlugin = class {
379
+ name = "similarity";
380
+ requires = ["textEmbedder"];
381
+ ready = false;
382
+ context = null;
383
+ topN;
384
+ threshold;
385
+ constructor(options = {}) {
386
+ this.topN = options.topN ?? 5;
387
+ this.threshold = options.threshold ?? 0;
388
+ }
389
+ async initialize(context) {
390
+ this.context = context;
391
+ this.ready = true;
392
+ context.log(
393
+ `CosineSimilarityPlugin initialized (topN=${this.topN}, threshold=${this.threshold})`,
394
+ "info"
395
+ );
396
+ }
397
+ isReady() {
398
+ return this.ready;
399
+ }
400
+ async dispose() {
401
+ this.ready = false;
402
+ }
403
+ computeSimilarity(a, b) {
404
+ return cosineSimilarity(a, b);
405
+ }
406
+ async generateSimilarityMap(posts) {
407
+ const postsWithEmbeddings = posts.filter(
408
+ (p) => p.embedding && p.embedding.length > 0
409
+ );
410
+ if (postsWithEmbeddings.length < 2) {
411
+ this.context?.log(
412
+ `Skipping similarity: only ${postsWithEmbeddings.length} posts with embeddings`,
413
+ "debug"
414
+ );
415
+ return {
416
+ pairwiseScores: /* @__PURE__ */ new Map(),
417
+ similarPosts: /* @__PURE__ */ new Map(),
418
+ metadata: {
419
+ computedAt: (/* @__PURE__ */ new Date()).toISOString(),
420
+ postCount: postsWithEmbeddings.length,
421
+ pairCount: 0
422
+ }
423
+ };
424
+ }
425
+ const pairwiseScores = /* @__PURE__ */ new Map();
426
+ const similarPosts = /* @__PURE__ */ new Map();
427
+ let pairCount = 0;
428
+ for (const post of postsWithEmbeddings) {
429
+ const similarities = [];
430
+ for (const other of postsWithEmbeddings) {
431
+ if (other.hash === post.hash) continue;
432
+ const pairKey = post.hash < other.hash ? `${post.hash}-${other.hash}` : `${other.hash}-${post.hash}`;
433
+ let score;
434
+ if (pairwiseScores.has(pairKey)) {
435
+ score = pairwiseScores.get(pairKey);
436
+ } else {
437
+ score = cosineSimilarity(post.embedding, other.embedding);
438
+ pairwiseScores.set(pairKey, score);
439
+ pairCount++;
440
+ }
441
+ if (score >= this.threshold) {
442
+ similarities.push({ hash: other.hash, score });
443
+ }
444
+ }
445
+ similarities.sort((a, b) => b.score - a.score);
446
+ const topHashes = similarities.slice(0, this.topN).map((s) => s.hash);
447
+ similarPosts.set(post.hash, topHashes);
448
+ }
449
+ this.context?.log(
450
+ `Computed similarity: ${pairCount} pairs for ${postsWithEmbeddings.length} posts`,
451
+ "info"
452
+ );
453
+ return {
454
+ pairwiseScores,
455
+ similarPosts,
456
+ metadata: {
457
+ computedAt: (/* @__PURE__ */ new Date()).toISOString(),
458
+ postCount: postsWithEmbeddings.length,
459
+ pairCount
460
+ }
461
+ };
462
+ }
463
+ };
464
+ function cosineSimilarity(a, b) {
465
+ let dot = 0;
466
+ let normA = 0;
467
+ let normB = 0;
468
+ for (let i = 0; i < a.length; i++) {
469
+ const aVal = a[i] ?? 0;
470
+ const bVal = b[i] ?? 0;
471
+ dot += aVal * bVal;
472
+ normA += aVal * aVal;
473
+ normB += bVal * bVal;
474
+ }
475
+ const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
476
+ return magnitude === 0 ? 0 : dot / magnitude;
477
+ }
478
+ var createSimilarityPlugin = (options) => {
479
+ return new CosineSimilarityPlugin(options);
480
+ };
481
+
377
482
  exports.CopyOnlyImageProcessor = CopyOnlyImageProcessor;
483
+ exports.CosineSimilarityPlugin = CosineSimilarityPlugin;
378
484
  exports.NoOpDatabase = NoOpDatabase;
379
485
  exports.NoOpImageEmbedder = NoOpImageEmbedder;
380
486
  exports.NoOpSimilarity = NoOpSimilarity;
381
487
  exports.NoOpTextEmbedder = NoOpTextEmbedder;
382
488
  exports.PassthroughMermaidRenderer = PassthroughMermaidRenderer;
383
489
  exports.PluginManager = PluginManager;
490
+ exports.cosineSimilarity = cosineSimilarity;
384
491
  exports.createAllNoOpPlugins = createAllNoOpPlugins;
385
492
  exports.createDefaultPlugins = createDefaultPlugins;
386
493
  exports.createPluginManager = createPluginManager;
494
+ exports.createSimilarityPlugin = createSimilarityPlugin;
387
495
  exports.topologicalSort = topologicalSort;
@@ -367,4 +367,109 @@ var createAllNoOpPlugins = () => ({
367
367
  database: new NoOpDatabase()
368
368
  });
369
369
 
370
- export { CopyOnlyImageProcessor, NoOpDatabase, NoOpImageEmbedder, NoOpSimilarity, NoOpTextEmbedder, PassthroughMermaidRenderer, PluginManager, createAllNoOpPlugins, createDefaultPlugins, createPluginManager, topologicalSort };
370
+ // ../processor/src/plugins/similarity.ts
371
+ var CosineSimilarityPlugin = class {
372
+ name = "similarity";
373
+ requires = ["textEmbedder"];
374
+ ready = false;
375
+ context = null;
376
+ topN;
377
+ threshold;
378
+ constructor(options = {}) {
379
+ this.topN = options.topN ?? 5;
380
+ this.threshold = options.threshold ?? 0;
381
+ }
382
+ async initialize(context) {
383
+ this.context = context;
384
+ this.ready = true;
385
+ context.log(
386
+ `CosineSimilarityPlugin initialized (topN=${this.topN}, threshold=${this.threshold})`,
387
+ "info"
388
+ );
389
+ }
390
+ isReady() {
391
+ return this.ready;
392
+ }
393
+ async dispose() {
394
+ this.ready = false;
395
+ }
396
+ computeSimilarity(a, b) {
397
+ return cosineSimilarity(a, b);
398
+ }
399
+ async generateSimilarityMap(posts) {
400
+ const postsWithEmbeddings = posts.filter(
401
+ (p) => p.embedding && p.embedding.length > 0
402
+ );
403
+ if (postsWithEmbeddings.length < 2) {
404
+ this.context?.log(
405
+ `Skipping similarity: only ${postsWithEmbeddings.length} posts with embeddings`,
406
+ "debug"
407
+ );
408
+ return {
409
+ pairwiseScores: /* @__PURE__ */ new Map(),
410
+ similarPosts: /* @__PURE__ */ new Map(),
411
+ metadata: {
412
+ computedAt: (/* @__PURE__ */ new Date()).toISOString(),
413
+ postCount: postsWithEmbeddings.length,
414
+ pairCount: 0
415
+ }
416
+ };
417
+ }
418
+ const pairwiseScores = /* @__PURE__ */ new Map();
419
+ const similarPosts = /* @__PURE__ */ new Map();
420
+ let pairCount = 0;
421
+ for (const post of postsWithEmbeddings) {
422
+ const similarities = [];
423
+ for (const other of postsWithEmbeddings) {
424
+ if (other.hash === post.hash) continue;
425
+ const pairKey = post.hash < other.hash ? `${post.hash}-${other.hash}` : `${other.hash}-${post.hash}`;
426
+ let score;
427
+ if (pairwiseScores.has(pairKey)) {
428
+ score = pairwiseScores.get(pairKey);
429
+ } else {
430
+ score = cosineSimilarity(post.embedding, other.embedding);
431
+ pairwiseScores.set(pairKey, score);
432
+ pairCount++;
433
+ }
434
+ if (score >= this.threshold) {
435
+ similarities.push({ hash: other.hash, score });
436
+ }
437
+ }
438
+ similarities.sort((a, b) => b.score - a.score);
439
+ const topHashes = similarities.slice(0, this.topN).map((s) => s.hash);
440
+ similarPosts.set(post.hash, topHashes);
441
+ }
442
+ this.context?.log(
443
+ `Computed similarity: ${pairCount} pairs for ${postsWithEmbeddings.length} posts`,
444
+ "info"
445
+ );
446
+ return {
447
+ pairwiseScores,
448
+ similarPosts,
449
+ metadata: {
450
+ computedAt: (/* @__PURE__ */ new Date()).toISOString(),
451
+ postCount: postsWithEmbeddings.length,
452
+ pairCount
453
+ }
454
+ };
455
+ }
456
+ };
457
+ function cosineSimilarity(a, b) {
458
+ let dot = 0;
459
+ let normA = 0;
460
+ let normB = 0;
461
+ for (let i = 0; i < a.length; i++) {
462
+ const aVal = a[i] ?? 0;
463
+ const bVal = b[i] ?? 0;
464
+ dot += aVal * bVal;
465
+ normA += aVal * aVal;
466
+ normB += bVal * bVal;
467
+ }
468
+ const magnitude = Math.sqrt(normA) * Math.sqrt(normB);
469
+ return magnitude === 0 ? 0 : dot / magnitude;
470
+ }
471
+ var createSimilarityPlugin = (options) => {
472
+ return new CosineSimilarityPlugin(options);
473
+ };
474
+
475
+ export { CopyOnlyImageProcessor, CosineSimilarityPlugin, NoOpDatabase, NoOpImageEmbedder, NoOpSimilarity, NoOpTextEmbedder, PassthroughMermaidRenderer, PluginManager, cosineSimilarity, createAllNoOpPlugins, createDefaultPlugins, createPluginManager, createSimilarityPlugin, topologicalSort };
@@ -44,6 +44,11 @@ var withDefaults = (config) => ({
44
44
  removeDeadLinks: false,
45
45
  ...config.pipeline
46
46
  },
47
+ similarity: {
48
+ topN: 5,
49
+ threshold: 0,
50
+ ...config.similarity
51
+ },
47
52
  debug: {
48
53
  level: 0,
49
54
  timing: false,
@@ -0,0 +1,39 @@
1
+ 'use strict';
2
+
3
+ // src/errors.ts
4
+ var FolderBlogError = class extends Error {
5
+ /** HTTP status code if applicable */
6
+ status;
7
+ /** The URL that was requested */
8
+ url;
9
+ constructor(message, options) {
10
+ super(message);
11
+ this.name = "FolderBlogError";
12
+ this.status = options?.status;
13
+ this.url = options?.url;
14
+ }
15
+ };
16
+ var NotFoundError = class extends FolderBlogError {
17
+ constructor(resource, identifier, url) {
18
+ super(`${resource} not found: ${identifier}`, { status: 404, url });
19
+ this.name = "NotFoundError";
20
+ }
21
+ };
22
+ var ApiError = class extends FolderBlogError {
23
+ constructor(message, status, url) {
24
+ super(message, { status, url });
25
+ this.name = "ApiError";
26
+ }
27
+ };
28
+ var NetworkError = class extends FolderBlogError {
29
+ constructor(message, cause) {
30
+ super(message);
31
+ this.name = "NetworkError";
32
+ if (cause) this.cause = cause;
33
+ }
34
+ };
35
+
36
+ exports.ApiError = ApiError;
37
+ exports.FolderBlogError = FolderBlogError;
38
+ exports.NetworkError = NetworkError;
39
+ exports.NotFoundError = NotFoundError;
@@ -1,6 +1,7 @@
1
1
  'use strict';
2
2
 
3
- var chunkHMQIQUPB_cjs = require('./chunk-HMQIQUPB.cjs');
3
+ var chunk2TZSVPNP_cjs = require('./chunk-2TZSVPNP.cjs');
4
+ var chunk6TFXNIO6_cjs = require('./chunk-6TFXNIO6.cjs');
4
5
  var chunkOBGZSXTJ_cjs = require('./chunk-OBGZSXTJ.cjs');
5
6
  var unified = require('unified');
6
7
  var remarkParse = require('remark-parse');
@@ -1019,7 +1020,7 @@ var Processor = class {
1019
1020
  this.config = options.config;
1020
1021
  this.issues = new IssueCollector();
1021
1022
  this.log = options.log ?? createDefaultLogger(this.config.debug?.level ?? 1);
1022
- this.pluginManager = new chunkHMQIQUPB_cjs.PluginManager({
1023
+ this.pluginManager = new chunk6TFXNIO6_cjs.PluginManager({
1023
1024
  config: this.config,
1024
1025
  outputDir: this.resolveOutputDir(),
1025
1026
  issues: this.issues,
@@ -1074,6 +1075,7 @@ var Processor = class {
1074
1075
  this.log(`Processing folder: ${inputDir}`, "info");
1075
1076
  this.log(`Output directory: ${outputDir}`, "info");
1076
1077
  await ensureDir(outputDir);
1078
+ const totalStart = Date.now();
1077
1079
  const state = {
1078
1080
  inputDir,
1079
1081
  outputDir,
@@ -1089,18 +1091,20 @@ var Processor = class {
1089
1091
  textEmbeddingCacheMisses: 0,
1090
1092
  imageEmbeddingCacheHits: 0,
1091
1093
  imageEmbeddingCacheMisses: 0
1092
- }
1094
+ },
1095
+ report: {},
1096
+ phaseTiming: /* @__PURE__ */ new Map()
1093
1097
  };
1094
1098
  if (!this.config.media?.skip) {
1095
- await this.processMedia(state);
1099
+ await this.timed(state, "media", () => this.processMedia(state));
1096
1100
  } else {
1097
1101
  this.log("Skipping media processing", "info");
1098
1102
  }
1099
- await this.processMarkdownFiles(state);
1100
- await this.generateEmbeddings(state);
1101
- await this.generateSimilarity(state);
1102
- await this.buildDatabase(state);
1103
- const outputFiles = await this.writeOutput(state);
1103
+ await this.timed(state, "markdown", () => this.processMarkdownFiles(state));
1104
+ await this.timed(state, "embeddings", () => this.generateEmbeddings(state));
1105
+ await this.timed(state, "similarity", () => this.generateSimilarity(state));
1106
+ await this.timed(state, "database", () => this.buildDatabase(state));
1107
+ const outputFiles = await this.timed(state, "output", () => this.writeOutput(state));
1104
1108
  const issueReport = this.issues.generateReport();
1105
1109
  this.log(this.issues.getSummaryString(), "info");
1106
1110
  const hasCacheActivity = state.cacheStats.mediaCacheHits > 0 || state.cacheStats.mediaCacheMisses > 0 || state.cacheStats.textEmbeddingCacheHits > 0 || state.cacheStats.imageEmbeddingCacheHits > 0;
@@ -1110,16 +1114,39 @@ var Processor = class {
1110
1114
  "info"
1111
1115
  );
1112
1116
  }
1117
+ const totalMs = Date.now() - totalStart;
1118
+ const phases = {};
1119
+ for (const [phase, ms] of state.phaseTiming) {
1120
+ phases[phase] = ms;
1121
+ }
1122
+ const report = {
1123
+ ...state.report,
1124
+ timing: { totalMs, phases }
1125
+ };
1113
1126
  return {
1114
1127
  posts: state.posts,
1115
1128
  media: state.media,
1116
1129
  outputDir,
1117
1130
  outputFiles,
1118
1131
  issues: issueReport,
1119
- cacheStats: this.config.cache ? state.cacheStats : void 0
1132
+ cacheStats: this.config.cache ? state.cacheStats : void 0,
1133
+ report
1120
1134
  };
1121
1135
  }
1122
1136
  // --------------------------------------------------------------------------
1137
+ // Phase Timing
1138
+ // --------------------------------------------------------------------------
1139
+ async timed(state, phase, fn) {
1140
+ const start = Date.now();
1141
+ const result = await fn();
1142
+ const elapsed = Date.now() - start;
1143
+ state.phaseTiming.set(phase, elapsed);
1144
+ if (elapsed > 0) {
1145
+ this.log(`Phase "${phase}" completed in ${elapsed}ms`, "debug");
1146
+ }
1147
+ return result;
1148
+ }
1149
+ // --------------------------------------------------------------------------
1123
1150
  // Media Processing
1124
1151
  // --------------------------------------------------------------------------
1125
1152
  async processMedia(state) {
@@ -1585,6 +1612,12 @@ var Processor = class {
1585
1612
  } else {
1586
1613
  this.log(`All ${cachedCount.hits} text embeddings loaded from cache`, "info");
1587
1614
  }
1615
+ const postsWithEmbeddings = state.posts.filter((p) => p.embedding && p.embedding.length > 0);
1616
+ state.report.postEmbeddings = {
1617
+ filesProcessed: postsWithEmbeddings.length,
1618
+ dimensions: textEmbedder.dimensions,
1619
+ model: textEmbedder.model
1620
+ };
1588
1621
  } catch (error) {
1589
1622
  const errorMessage = error instanceof Error ? error.message : String(error);
1590
1623
  this.issues.addEmbeddingError({
@@ -1627,6 +1660,12 @@ var Processor = class {
1627
1660
  }
1628
1661
  }
1629
1662
  this.log(`Image embeddings: ${generatedCount} generated, ${cachedCount} from cache`, "info");
1663
+ const mediaWithEmbeddings = state.media.filter((m) => m.embedding && m.embedding.length > 0);
1664
+ state.report.mediaEmbeddings = {
1665
+ filesProcessed: mediaWithEmbeddings.length,
1666
+ dimensions: imageEmbedder.dimensions,
1667
+ model: imageEmbedder.model
1668
+ };
1630
1669
  }
1631
1670
  }
1632
1671
  // --------------------------------------------------------------------------
@@ -1641,12 +1680,18 @@ var Processor = class {
1641
1680
  this.log("Generating similarity data...", "info");
1642
1681
  try {
1643
1682
  const result = await similarity.generateSimilarityMap(state.posts);
1644
- const similarityPath = path5__default.default.join(state.outputDir, "similarity.json");
1683
+ const similarityPath = path5__default.default.join(state.outputDir, chunk2TZSVPNP_cjs.OUTPUT_FILES.SIMILARITY);
1645
1684
  await writeJson(similarityPath, {
1646
1685
  pairwiseScores: Object.fromEntries(result.pairwiseScores),
1647
1686
  similarPosts: Object.fromEntries(result.similarPosts),
1648
1687
  metadata: result.metadata
1649
1688
  });
1689
+ const simConfig = this.config.similarity;
1690
+ state.report.similarity = {
1691
+ pairsComputed: result.metadata.pairCount,
1692
+ topN: simConfig?.topN ?? 5,
1693
+ postsWithEmbeddings: result.metadata.postCount
1694
+ };
1650
1695
  this.log(
1651
1696
  `Generated similarity data: ${result.metadata.pairCount} pairs`,
1652
1697
  "info"
@@ -1697,13 +1742,39 @@ var Processor = class {
1697
1742
  slugMap[post.slug] = post.hash;
1698
1743
  pathMap[post.originalPath] = post.hash;
1699
1744
  }
1700
- await Promise.all([
1745
+ const writePromises = [
1701
1746
  writeJson(postsPath, posts),
1702
1747
  writeJson(mediaPath, media),
1703
1748
  writeJson(slugMapPath, slugMap),
1704
1749
  writeJson(pathMapPath, pathMap),
1705
1750
  writeJson(issuesPath, this.issues.generateReport())
1706
- ]);
1751
+ ];
1752
+ const textEmbeddingMap = {};
1753
+ let hasTextEmbeddings = false;
1754
+ for (const post of posts) {
1755
+ if (post.embedding && post.embedding.length > 0) {
1756
+ textEmbeddingMap[post.hash] = post.embedding;
1757
+ hasTextEmbeddings = true;
1758
+ }
1759
+ }
1760
+ if (hasTextEmbeddings) {
1761
+ const textEmbPath = path5__default.default.join(outputDir, chunk2TZSVPNP_cjs.OUTPUT_FILES.TEXT_EMBEDDINGS);
1762
+ writePromises.push(writeJson(textEmbPath, textEmbeddingMap));
1763
+ }
1764
+ const imageEmbeddingMap = {};
1765
+ let hasImageEmbeddings = false;
1766
+ for (const m of media) {
1767
+ const mediaHash = m.metadata?.hash;
1768
+ if (mediaHash && m.embedding && m.embedding.length > 0) {
1769
+ imageEmbeddingMap[mediaHash] = m.embedding;
1770
+ hasImageEmbeddings = true;
1771
+ }
1772
+ }
1773
+ if (hasImageEmbeddings) {
1774
+ const imageEmbPath = path5__default.default.join(outputDir, chunk2TZSVPNP_cjs.OUTPUT_FILES.IMAGE_EMBEDDINGS);
1775
+ writePromises.push(writeJson(imageEmbPath, imageEmbeddingMap));
1776
+ }
1777
+ await Promise.all(writePromises);
1707
1778
  this.log(`Output written to ${outputDir}`, "info");
1708
1779
  return {
1709
1780
  posts: postsPath,
@@ -1,4 +1,4 @@
1
- import { processFolder } from './chunk-IXP35S24.js';
1
+ import { processFolder } from './chunk-LPPBVXJ7.js';
2
2
  import { resolve } from 'path';
3
3
  import { pathToFileURL } from 'url';
4
4
  import { existsSync } from 'fs';
@@ -1,4 +1,5 @@
1
- import { PluginManager } from './chunk-ZRUBI3GH.js';
1
+ import { OUTPUT_FILES } from './chunk-D26H5722.js';
2
+ import { PluginManager } from './chunk-B43UAOPC.js';
2
3
  import { __require } from './chunk-3RG5ZIWI.js';
3
4
  import { unified } from 'unified';
4
5
  import remarkParse from 'remark-parse';
@@ -1057,6 +1058,7 @@ var Processor = class {
1057
1058
  this.log(`Processing folder: ${inputDir}`, "info");
1058
1059
  this.log(`Output directory: ${outputDir}`, "info");
1059
1060
  await ensureDir(outputDir);
1061
+ const totalStart = Date.now();
1060
1062
  const state = {
1061
1063
  inputDir,
1062
1064
  outputDir,
@@ -1072,18 +1074,20 @@ var Processor = class {
1072
1074
  textEmbeddingCacheMisses: 0,
1073
1075
  imageEmbeddingCacheHits: 0,
1074
1076
  imageEmbeddingCacheMisses: 0
1075
- }
1077
+ },
1078
+ report: {},
1079
+ phaseTiming: /* @__PURE__ */ new Map()
1076
1080
  };
1077
1081
  if (!this.config.media?.skip) {
1078
- await this.processMedia(state);
1082
+ await this.timed(state, "media", () => this.processMedia(state));
1079
1083
  } else {
1080
1084
  this.log("Skipping media processing", "info");
1081
1085
  }
1082
- await this.processMarkdownFiles(state);
1083
- await this.generateEmbeddings(state);
1084
- await this.generateSimilarity(state);
1085
- await this.buildDatabase(state);
1086
- const outputFiles = await this.writeOutput(state);
1086
+ await this.timed(state, "markdown", () => this.processMarkdownFiles(state));
1087
+ await this.timed(state, "embeddings", () => this.generateEmbeddings(state));
1088
+ await this.timed(state, "similarity", () => this.generateSimilarity(state));
1089
+ await this.timed(state, "database", () => this.buildDatabase(state));
1090
+ const outputFiles = await this.timed(state, "output", () => this.writeOutput(state));
1087
1091
  const issueReport = this.issues.generateReport();
1088
1092
  this.log(this.issues.getSummaryString(), "info");
1089
1093
  const hasCacheActivity = state.cacheStats.mediaCacheHits > 0 || state.cacheStats.mediaCacheMisses > 0 || state.cacheStats.textEmbeddingCacheHits > 0 || state.cacheStats.imageEmbeddingCacheHits > 0;
@@ -1093,16 +1097,39 @@ var Processor = class {
1093
1097
  "info"
1094
1098
  );
1095
1099
  }
1100
+ const totalMs = Date.now() - totalStart;
1101
+ const phases = {};
1102
+ for (const [phase, ms] of state.phaseTiming) {
1103
+ phases[phase] = ms;
1104
+ }
1105
+ const report = {
1106
+ ...state.report,
1107
+ timing: { totalMs, phases }
1108
+ };
1096
1109
  return {
1097
1110
  posts: state.posts,
1098
1111
  media: state.media,
1099
1112
  outputDir,
1100
1113
  outputFiles,
1101
1114
  issues: issueReport,
1102
- cacheStats: this.config.cache ? state.cacheStats : void 0
1115
+ cacheStats: this.config.cache ? state.cacheStats : void 0,
1116
+ report
1103
1117
  };
1104
1118
  }
1105
1119
  // --------------------------------------------------------------------------
1120
+ // Phase Timing
1121
+ // --------------------------------------------------------------------------
1122
+ async timed(state, phase, fn) {
1123
+ const start = Date.now();
1124
+ const result = await fn();
1125
+ const elapsed = Date.now() - start;
1126
+ state.phaseTiming.set(phase, elapsed);
1127
+ if (elapsed > 0) {
1128
+ this.log(`Phase "${phase}" completed in ${elapsed}ms`, "debug");
1129
+ }
1130
+ return result;
1131
+ }
1132
+ // --------------------------------------------------------------------------
1106
1133
  // Media Processing
1107
1134
  // --------------------------------------------------------------------------
1108
1135
  async processMedia(state) {
@@ -1568,6 +1595,12 @@ var Processor = class {
1568
1595
  } else {
1569
1596
  this.log(`All ${cachedCount.hits} text embeddings loaded from cache`, "info");
1570
1597
  }
1598
+ const postsWithEmbeddings = state.posts.filter((p) => p.embedding && p.embedding.length > 0);
1599
+ state.report.postEmbeddings = {
1600
+ filesProcessed: postsWithEmbeddings.length,
1601
+ dimensions: textEmbedder.dimensions,
1602
+ model: textEmbedder.model
1603
+ };
1571
1604
  } catch (error) {
1572
1605
  const errorMessage = error instanceof Error ? error.message : String(error);
1573
1606
  this.issues.addEmbeddingError({
@@ -1610,6 +1643,12 @@ var Processor = class {
1610
1643
  }
1611
1644
  }
1612
1645
  this.log(`Image embeddings: ${generatedCount} generated, ${cachedCount} from cache`, "info");
1646
+ const mediaWithEmbeddings = state.media.filter((m) => m.embedding && m.embedding.length > 0);
1647
+ state.report.mediaEmbeddings = {
1648
+ filesProcessed: mediaWithEmbeddings.length,
1649
+ dimensions: imageEmbedder.dimensions,
1650
+ model: imageEmbedder.model
1651
+ };
1613
1652
  }
1614
1653
  }
1615
1654
  // --------------------------------------------------------------------------
@@ -1624,12 +1663,18 @@ var Processor = class {
1624
1663
  this.log("Generating similarity data...", "info");
1625
1664
  try {
1626
1665
  const result = await similarity.generateSimilarityMap(state.posts);
1627
- const similarityPath = path5.join(state.outputDir, "similarity.json");
1666
+ const similarityPath = path5.join(state.outputDir, OUTPUT_FILES.SIMILARITY);
1628
1667
  await writeJson(similarityPath, {
1629
1668
  pairwiseScores: Object.fromEntries(result.pairwiseScores),
1630
1669
  similarPosts: Object.fromEntries(result.similarPosts),
1631
1670
  metadata: result.metadata
1632
1671
  });
1672
+ const simConfig = this.config.similarity;
1673
+ state.report.similarity = {
1674
+ pairsComputed: result.metadata.pairCount,
1675
+ topN: simConfig?.topN ?? 5,
1676
+ postsWithEmbeddings: result.metadata.postCount
1677
+ };
1633
1678
  this.log(
1634
1679
  `Generated similarity data: ${result.metadata.pairCount} pairs`,
1635
1680
  "info"
@@ -1680,13 +1725,39 @@ var Processor = class {
1680
1725
  slugMap[post.slug] = post.hash;
1681
1726
  pathMap[post.originalPath] = post.hash;
1682
1727
  }
1683
- await Promise.all([
1728
+ const writePromises = [
1684
1729
  writeJson(postsPath, posts),
1685
1730
  writeJson(mediaPath, media),
1686
1731
  writeJson(slugMapPath, slugMap),
1687
1732
  writeJson(pathMapPath, pathMap),
1688
1733
  writeJson(issuesPath, this.issues.generateReport())
1689
- ]);
1734
+ ];
1735
+ const textEmbeddingMap = {};
1736
+ let hasTextEmbeddings = false;
1737
+ for (const post of posts) {
1738
+ if (post.embedding && post.embedding.length > 0) {
1739
+ textEmbeddingMap[post.hash] = post.embedding;
1740
+ hasTextEmbeddings = true;
1741
+ }
1742
+ }
1743
+ if (hasTextEmbeddings) {
1744
+ const textEmbPath = path5.join(outputDir, OUTPUT_FILES.TEXT_EMBEDDINGS);
1745
+ writePromises.push(writeJson(textEmbPath, textEmbeddingMap));
1746
+ }
1747
+ const imageEmbeddingMap = {};
1748
+ let hasImageEmbeddings = false;
1749
+ for (const m of media) {
1750
+ const mediaHash = m.metadata?.hash;
1751
+ if (mediaHash && m.embedding && m.embedding.length > 0) {
1752
+ imageEmbeddingMap[mediaHash] = m.embedding;
1753
+ hasImageEmbeddings = true;
1754
+ }
1755
+ }
1756
+ if (hasImageEmbeddings) {
1757
+ const imageEmbPath = path5.join(outputDir, OUTPUT_FILES.IMAGE_EMBEDDINGS);
1758
+ writePromises.push(writeJson(imageEmbPath, imageEmbeddingMap));
1759
+ }
1760
+ await Promise.all(writePromises);
1690
1761
  this.log(`Output written to ${outputDir}`, "info");
1691
1762
  return {
1692
1763
  posts: postsPath,
@@ -0,0 +1,17 @@
1
+ import { existsSync, readFileSync } from 'fs';
2
+
3
+ // src/utils.ts
4
+ function loadJsonFile(path, fallback) {
5
+ if (!existsSync(path)) return fallback;
6
+ return JSON.parse(readFileSync(path, "utf-8"));
7
+ }
8
+ function normalizeBaseUrl(domain) {
9
+ let url = domain.trim();
10
+ if (url.endsWith("/")) url = url.slice(0, -1);
11
+ if (!url.startsWith("http://") && !url.startsWith("https://")) {
12
+ url = `https://${url}`;
13
+ }
14
+ return url;
15
+ }
16
+
17
+ export { loadJsonFile, normalizeBaseUrl };