@j0hanz/superfetch 1.0.6 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/README.md +228 -36
  2. package/dist/config/index.d.ts +10 -5
  3. package/dist/config/index.d.ts.map +1 -1
  4. package/dist/config/index.js +73 -19
  5. package/dist/config/index.js.map +1 -1
  6. package/dist/config/types.d.ts +98 -57
  7. package/dist/config/types.d.ts.map +1 -1
  8. package/dist/errors/app-error.d.ts +4 -28
  9. package/dist/errors/app-error.d.ts.map +1 -1
  10. package/dist/errors/app-error.js +10 -51
  11. package/dist/errors/app-error.js.map +1 -1
  12. package/dist/index.js +10 -55
  13. package/dist/index.js.map +1 -1
  14. package/dist/middleware/error-handler.d.ts +2 -2
  15. package/dist/middleware/error-handler.d.ts.map +1 -1
  16. package/dist/middleware/error-handler.js +12 -14
  17. package/dist/middleware/error-handler.js.map +1 -1
  18. package/dist/middleware/rate-limiter.d.ts.map +1 -1
  19. package/dist/middleware/rate-limiter.js +0 -8
  20. package/dist/middleware/rate-limiter.js.map +1 -1
  21. package/dist/parsers/base-html-element-parser.d.ts +43 -0
  22. package/dist/parsers/base-html-element-parser.d.ts.map +1 -0
  23. package/dist/parsers/base-html-element-parser.js +59 -0
  24. package/dist/parsers/base-html-element-parser.js.map +1 -0
  25. package/dist/parsers/heading-element-parser.d.ts +14 -0
  26. package/dist/parsers/heading-element-parser.d.ts.map +1 -0
  27. package/dist/parsers/heading-element-parser.js +26 -0
  28. package/dist/parsers/heading-element-parser.js.map +1 -0
  29. package/dist/parsers/image-element-parser.d.ts +16 -0
  30. package/dist/parsers/image-element-parser.d.ts.map +1 -0
  31. package/dist/parsers/image-element-parser.js +33 -0
  32. package/dist/parsers/image-element-parser.js.map +1 -0
  33. package/dist/parsers/link-element-parser.d.ts +15 -0
  34. package/dist/parsers/link-element-parser.d.ts.map +1 -0
  35. package/dist/parsers/link-element-parser.js +28 -0
  36. package/dist/parsers/link-element-parser.js.map +1 -0
  37. package/dist/parsers/open-graph-parser.d.ts +17 -0
  38. package/dist/parsers/open-graph-parser.d.ts.map +1 -0
  39. package/dist/parsers/open-graph-parser.js +41 -0
  40. package/dist/parsers/open-graph-parser.js.map +1 -0
  41. package/dist/parsers/schema-org-parser.d.ts +17 -0
  42. package/dist/parsers/schema-org-parser.d.ts.map +1 -0
  43. package/dist/parsers/schema-org-parser.js +32 -0
  44. package/dist/parsers/schema-org-parser.js.map +1 -0
  45. package/dist/parsers/standard-meta-parser.d.ts +18 -0
  46. package/dist/parsers/standard-meta-parser.d.ts.map +1 -0
  47. package/dist/parsers/standard-meta-parser.js +32 -0
  48. package/dist/parsers/standard-meta-parser.js.map +1 -0
  49. package/dist/parsers/twitter-card-parser.d.ts +17 -0
  50. package/dist/parsers/twitter-card-parser.d.ts.map +1 -0
  51. package/dist/parsers/twitter-card-parser.js +41 -0
  52. package/dist/parsers/twitter-card-parser.js.map +1 -0
  53. package/dist/resources/cached-content.d.ts +0 -1
  54. package/dist/resources/cached-content.d.ts.map +1 -1
  55. package/dist/resources/cached-content.js +3 -9
  56. package/dist/resources/cached-content.js.map +1 -1
  57. package/dist/resources/index.d.ts.map +1 -1
  58. package/dist/resources/index.js +8 -8
  59. package/dist/resources/index.js.map +1 -1
  60. package/dist/server.d.ts.map +1 -1
  61. package/dist/server.js +10 -10
  62. package/dist/server.js.map +1 -1
  63. package/dist/services/cache.d.ts +0 -28
  64. package/dist/services/cache.d.ts.map +1 -1
  65. package/dist/services/cache.js +10 -173
  66. package/dist/services/cache.js.map +1 -1
  67. package/dist/services/extractor.d.ts +1 -11
  68. package/dist/services/extractor.d.ts.map +1 -1
  69. package/dist/services/extractor.js +86 -84
  70. package/dist/services/extractor.js.map +1 -1
  71. package/dist/services/fetcher.d.ts +2 -13
  72. package/dist/services/fetcher.d.ts.map +1 -1
  73. package/dist/services/fetcher.js +195 -211
  74. package/dist/services/fetcher.js.map +1 -1
  75. package/dist/services/logger.d.ts +5 -4
  76. package/dist/services/logger.d.ts.map +1 -1
  77. package/dist/services/logger.js +27 -42
  78. package/dist/services/logger.js.map +1 -1
  79. package/dist/services/parser.d.ts.map +1 -1
  80. package/dist/services/parser.js +35 -26
  81. package/dist/services/parser.js.map +1 -1
  82. package/dist/services/session-manager.d.ts +18 -0
  83. package/dist/services/session-manager.d.ts.map +1 -0
  84. package/dist/services/session-manager.js +73 -0
  85. package/dist/services/session-manager.js.map +1 -0
  86. package/dist/strategies/exponential-backoff-strategy.d.ts +13 -0
  87. package/dist/strategies/exponential-backoff-strategy.d.ts.map +1 -0
  88. package/dist/strategies/exponential-backoff-strategy.js +32 -0
  89. package/dist/strategies/exponential-backoff-strategy.js.map +1 -0
  90. package/dist/tools/handlers/fetch-links.tool.d.ts +2 -9
  91. package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
  92. package/dist/tools/handlers/fetch-links.tool.js +0 -1
  93. package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
  94. package/dist/tools/handlers/fetch-markdown.tool.d.ts +5 -2
  95. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
  96. package/dist/tools/handlers/fetch-markdown.tool.js +23 -33
  97. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
  98. package/dist/tools/handlers/fetch-url.tool.d.ts +2 -9
  99. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
  100. package/dist/tools/handlers/fetch-url.tool.js +15 -20
  101. package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
  102. package/dist/tools/handlers/fetch-urls.tool.d.ts +2 -9
  103. package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -1
  104. package/dist/tools/handlers/fetch-urls.tool.js +124 -105
  105. package/dist/tools/handlers/fetch-urls.tool.js.map +1 -1
  106. package/dist/tools/index.d.ts.map +1 -1
  107. package/dist/tools/index.js +0 -4
  108. package/dist/tools/index.js.map +1 -1
  109. package/dist/tools/utils/common.d.ts +6 -7
  110. package/dist/tools/utils/common.d.ts.map +1 -1
  111. package/dist/tools/utils/common.js +8 -8
  112. package/dist/tools/utils/common.js.map +1 -1
  113. package/dist/tools/utils/fetch-pipeline.d.ts +8 -0
  114. package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
  115. package/dist/tools/utils/fetch-pipeline.js +47 -79
  116. package/dist/tools/utils/fetch-pipeline.js.map +1 -1
  117. package/dist/transformers/jsonl.transformer.d.ts +1 -1
  118. package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
  119. package/dist/transformers/jsonl.transformer.js +15 -10
  120. package/dist/transformers/jsonl.transformer.js.map +1 -1
  121. package/dist/transformers/markdown.transformer.d.ts.map +1 -1
  122. package/dist/transformers/markdown.transformer.js +58 -62
  123. package/dist/transformers/markdown.transformer.js.map +1 -1
  124. package/dist/utils/concurrency.d.ts +2 -5
  125. package/dist/utils/concurrency.d.ts.map +1 -1
  126. package/dist/utils/concurrency.js +19 -19
  127. package/dist/utils/concurrency.js.map +1 -1
  128. package/dist/utils/content-cleaner.d.ts +0 -25
  129. package/dist/utils/content-cleaner.d.ts.map +1 -1
  130. package/dist/utils/content-cleaner.js +12 -187
  131. package/dist/utils/content-cleaner.js.map +1 -1
  132. package/dist/utils/html-truncator.d.ts +2 -0
  133. package/dist/utils/html-truncator.d.ts.map +1 -0
  134. package/dist/utils/html-truncator.js +14 -0
  135. package/dist/utils/html-truncator.js.map +1 -0
  136. package/dist/utils/language-detector.d.ts +0 -3
  137. package/dist/utils/language-detector.d.ts.map +1 -1
  138. package/dist/utils/language-detector.js +0 -11
  139. package/dist/utils/language-detector.js.map +1 -1
  140. package/dist/utils/sanitizer.d.ts.map +1 -1
  141. package/dist/utils/sanitizer.js +7 -5
  142. package/dist/utils/sanitizer.js.map +1 -1
  143. package/dist/utils/tool-error-handler.d.ts.map +1 -1
  144. package/dist/utils/tool-error-handler.js +15 -42
  145. package/dist/utils/tool-error-handler.js.map +1 -1
  146. package/dist/utils/url-validator.d.ts +0 -6
  147. package/dist/utils/url-validator.d.ts.map +1 -1
  148. package/dist/utils/url-validator.js +12 -81
  149. package/dist/utils/url-validator.js.map +1 -1
  150. package/package.json +5 -6
@@ -2,10 +2,6 @@ import * as cache from '../../services/cache.js';
2
2
  import { fetchUrlWithRetry } from '../../services/fetcher.js';
3
3
  import { logDebug, logWarn } from '../../services/logger.js';
4
4
  import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
5
- /**
6
- * Safe JSON parse with error handling for cache deserialization.
7
- * Returns undefined on parse failure, treating it as a cache miss.
8
- */
9
5
  function safeJsonParse(cached, cacheKey) {
10
6
  try {
11
7
  return JSON.parse(cached);
@@ -17,91 +13,63 @@ function safeJsonParse(cached, cacheKey) {
17
13
  return undefined;
18
14
  }
19
15
  }
20
- const pendingRequests = new Map();
21
- const DEDUPLICATION_TIMEOUT = 60000; // 1 minute TTL
22
- // Cleanup stale pending requests every 30 seconds to prevent memory leak
23
- const cleanupInterval = setInterval(() => {
24
- const now = Date.now();
25
- for (const [key, value] of pendingRequests.entries()) {
26
- if (now - value.timestamp > DEDUPLICATION_TIMEOUT) {
27
- pendingRequests.delete(key);
28
- }
16
+ function attemptCacheRetrieval(cacheKey, deserialize, cacheNamespace, normalizedUrl) {
17
+ if (!cacheKey)
18
+ return null;
19
+ const cached = cache.get(cacheKey);
20
+ if (!cached)
21
+ return null;
22
+ logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
23
+ const data = deserialize
24
+ ? deserialize(cached.content)
25
+ : safeJsonParse(cached.content, cacheKey);
26
+ if (data === undefined) {
27
+ logDebug('Cache miss due to deserialize failure', {
28
+ namespace: cacheNamespace,
29
+ url: normalizedUrl,
30
+ });
31
+ return null;
29
32
  }
30
- }, 30000);
31
- // Allow Node.js to exit if this is the only active timer
32
- cleanupInterval.unref();
33
+ return {
34
+ data,
35
+ fromCache: true,
36
+ url: normalizedUrl,
37
+ fetchedAt: cached.fetchedAt,
38
+ };
39
+ }
40
+ /**
41
+ * Unified fetch pipeline that handles caching, fetching, and transformation.
42
+ * Implements cache-first strategy with automatic serialization.
43
+ *
44
+ * @template T - Type of the transformed result
45
+ * @param options - Pipeline configuration options
46
+ * @returns Promise resolving to the pipeline result
47
+ */
33
48
  export async function executeFetchPipeline(options) {
34
49
  const { url, cacheNamespace, customHeaders, retries, signal, timeout, transform, serialize = JSON.stringify, deserialize, } = options;
35
50
  const normalizedUrl = validateAndNormalizeUrl(url);
36
51
  const cacheKey = cache.createCacheKey(cacheNamespace, normalizedUrl);
37
- // Check cache first
38
- if (cacheKey) {
39
- const cached = cache.get(cacheKey);
40
- if (cached) {
41
- logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
42
- // Use provided deserializer or safe JSON parse
43
- const data = deserialize
44
- ? deserialize(cached.content)
45
- : safeJsonParse(cached.content, cacheKey);
46
- // If deserialization failed, treat as cache miss
47
- if (data === undefined) {
48
- logDebug('Cache miss due to deserialize failure', {
49
- namespace: cacheNamespace,
50
- url: normalizedUrl,
51
- });
52
- }
53
- else {
54
- return {
55
- data,
56
- fromCache: true,
57
- url: normalizedUrl,
58
- fetchedAt: cached.fetchedAt,
59
- };
60
- }
61
- }
62
- }
63
- // Check for pending request to prevent duplicate fetches
64
- // Include custom headers hash to ensure requests with different headers aren't deduplicated
65
- const headersKey = customHeaders ? JSON.stringify(customHeaders) : '';
66
- const dedupeKey = `${cacheNamespace}:${normalizedUrl}:${headersKey}`;
67
- const pending = pendingRequests.get(dedupeKey);
68
- if (pending) {
69
- logDebug('Request deduplication hit', { url: normalizedUrl });
70
- return pending.promise;
52
+ const cachedResult = attemptCacheRetrieval(cacheKey, deserialize, cacheNamespace, normalizedUrl);
53
+ if (cachedResult) {
54
+ return cachedResult;
71
55
  }
72
- // Build fetch options
73
56
  const fetchOptions = {
74
57
  customHeaders,
75
58
  signal,
76
59
  timeout,
77
60
  };
78
- // Create new request
79
- const request = (async () => {
80
- try {
81
- logDebug('Fetching URL', { url: normalizedUrl, retries });
82
- const fetchResult = await fetchUrlWithRetry(normalizedUrl, fetchOptions, retries);
83
- const { html } = fetchResult;
84
- const data = transform(html, normalizedUrl);
85
- if (cacheKey) {
86
- const serialized = serialize(data);
87
- cache.set(cacheKey, serialized);
88
- }
89
- return {
90
- data,
91
- fromCache: false,
92
- url: normalizedUrl,
93
- fetchedAt: new Date().toISOString(),
94
- };
95
- }
96
- finally {
97
- // Clean up pending request
98
- pendingRequests.delete(dedupeKey);
99
- }
100
- })();
101
- pendingRequests.set(dedupeKey, {
102
- promise: request,
103
- timestamp: Date.now(),
104
- });
105
- return request;
61
+ logDebug('Fetching URL', { url: normalizedUrl, retries });
62
+ const html = await fetchUrlWithRetry(normalizedUrl, fetchOptions, retries);
63
+ const data = transform(html, normalizedUrl);
64
+ if (cacheKey) {
65
+ const serialized = serialize(data);
66
+ cache.set(cacheKey, serialized);
67
+ }
68
+ return {
69
+ data,
70
+ fromCache: false,
71
+ url: normalizedUrl,
72
+ fetchedAt: new Date().toISOString(),
73
+ };
106
74
  }
107
75
  //# sourceMappingURL=fetch-pipeline.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"fetch-pipeline.js","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AAEjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAE7D,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AAEvE;;;GAGG;AACH,SAAS,aAAa,CAAC,MAAc,EAAE,QAAgB;IACrD,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,4CAA4C,EAAE;YACpD,GAAG,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;SAChC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAQD,MAAM,eAAe,GAAG,IAAI,GAAG,EAA0B,CAAC;AAC1D,MAAM,qBAAqB,GAAG,KAAK,CAAC,CAAC,eAAe;AAEpD,yEAAyE;AACzE,MAAM,eAAe,GAAG,WAAW,CAAC,GAAG,EAAE;IACvC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,KAAK,MAAM,CAAC,GAAG,EAAE,KAAK,CAAC,IAAI,eAAe,CAAC,OAAO,EAAE,EAAE,CAAC;QACrD,IAAI,GAAG,GAAG,KAAK,CAAC,SAAS,GAAG,qBAAqB,EAAE,CAAC;YAClD,eAAe,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QAC9B,CAAC;IACH,CAAC;AACH,CAAC,EAAE,KAAK,CAAC,CAAC;AAEV,yDAAyD;AACzD,eAAe,CAAC,KAAK,EAAE,CAAC;AAExB,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgC;IAEhC,MAAM,EACJ,GAAG,EACH,cAAc,EACd,aAAa,EACb,OAAO,EACP,MAAM,EACN,OAAO,EACP,SAAS,EACT,SAAS,GAAG,IAAI,CAAC,SAAS,EAC1B,WAAW,GACZ,GAAG,OAAO,CAAC;IAEZ,MAAM,aAAa,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;IACnD,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IAErE,oBAAoB;IACpB,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACnC,IAAI,MAAM,EAAE,CAAC;YACX,QAAQ,CAAC,WAAW,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;YAEzE,+CAA+C;YAC/C,MAAM,IAAI,GAAG,WAAW;gBACtB,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC;gBAC7B,CAAC,CAAE,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,QAAQ,CAAmB,CAAC;YAE/D,iDAAiD;YACjD,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;gBACvB,QAAQ,CAAC,uCAAuC,EAAE;oBAChD,SAAS,EAAE,cAAc;oBACzB,GAAG,EAAE,aAAa;iBACnB,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,OAAO;oBACL,IAAI;oBACJ,SAAS,EAAE,IAAI;oBACf,GAAG,EAAE,aAAa;oBAClB,SAAS,EAAE,MAAM,CAAC,SAAS;iBAC5B,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC;IAED,yDAAyD;IACzD,4FAA4F;IAC5F,MAAM,UAAU,GAAG,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IACtE,MAAM,SAAS,GAAG,GAAG,cAAc,IAAI,aAAa,IAAI,UAAU,EAAE,CAAC;IACrE,MAAM,OAAO,GAAG,eAAe,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC;IAC/C,IAAI,OAAO,EAAE,CAAC;QACZ,QAAQ,CAAC,2BAA2B,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;QAC9D,OAAO,OAAO,CAAC,OAAqC,CAAC;IACvD,CAAC;IAED,sBAAsB;IACtB,MAAM,YAAY,GAAiB;QACjC,aAAa;QACb,MAAM;QACN,OAAO;KACR,CAAC;IAEF,qBAAqB;IACrB,MAAM,OAAO,GAAG,CAAC,KAAK,IAAI,EAAE;QAC1B,IAAI,CAAC;YACH,QAAQ,CAAC,cAAc,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,OAAO,EAAE,CAAC,CAAC;YAC1D,MAAM,WAAW,GAAG,MAAM,iBAAiB,CACzC,aAAa,EACb,YAAY,EACZ,OAAO,CACR,CAAC;YACF,MAAM,EAAE,IAAI,EAAE,GAAG,WAAW,CAAC;YAC7B,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;YAE5C,IAAI,QAAQ,EAAE,CAAC;gBACb,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;gBACnC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;YAClC,CAAC;YAED,OAAO;gBACL,IAAI;gBACJ,SAAS,EAAE,KAAK;gBAChB,GAAG,EAAE,aAAa;gBAClB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC;QACJ,CAAC;gBAAS,CAAC;YACT,2BAA2B;YAC3B,eAAe,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;QACpC,CAAC;IACH,CAAC,CAAC,EAAE,CAAC;IAEL,eAAe,CAAC,GAAG,CAAC,SAAS,EAAE;QAC7B,OAAO,EAAE,OAA2C;QACpD,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;KACtB,CAAC,CAAC;IACH,OAAO,OAAO,CAAC;AACjB,CAAC"}
1
+ {"version":3,"file":"fetch-pipeline.js","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,KAAK,MAAM,yBAAyB,CAAC;AACjD,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAC9D,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,0BAA0B,CAAC;AAE7D,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AAEvE,SAAS,aAAa,CAAC,MAAc,EAAE,QAAgB;IACrD,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC5B,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,4CAA4C,EAAE;YACpD,GAAG,EAAE,QAAQ,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC;SAChC,CAAC,CAAC;QACH,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,SAAS,qBAAqB,CAC5B,QAAuB,EACvB,WAAgD,EAChD,cAAsB,EACtB,aAAqB;IAErB,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IACnC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,QAAQ,CAAC,WAAW,EAAE,EAAE,SAAS,EAAE,cAAc,EAAE,GAAG,EAAE,aAAa,EAAE,CAAC,CAAC;IAEzE,MAAM,IAAI,GAAG,WAAW;QACtB,CAAC,CAAC,WAAW,CAAC,MAAM,CAAC,OAAO,CAAC;QAC7B,CAAC,CAAE,aAAa,CAAC,MAAM,CAAC,OAAO,EAAE,QAAQ,CAAmB,CAAC;IAE/D,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;QACvB,QAAQ,CAAC,uCAAuC,EAAE;YAChD,SAAS,EAAE,cAAc;YACzB,GAAG,EAAE,aAAa;SACnB,CAAC,CAAC;QACH,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO;QACL,IAAI;QACJ,SAAS,EAAE,IAAI;QACf,GAAG,EAAE,aAAa;QAClB,SAAS,EAAE,MAAM,CAAC,SAAS;KAC5B,CAAC;AACJ,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,OAAgC;IAEhC,MAAM,EACJ,GAAG,EACH,cAAc,EACd,aAAa,EACb,OAAO,EACP,MAAM,EACN,OAAO,EACP,SAAS,EACT,SAAS,GAAG,IAAI,CAAC,SAAS,EAC1B,WAAW,GACZ,GAAG,OAAO,CAAC;IAEZ,MAAM,aAAa,GAAG,uBAAuB,CAAC,GAAG,CAAC,CAAC;IACnD,MAAM,QAAQ,GAAG,KAAK,CAAC,cAAc,CAAC,cAAc,EAAE,aAAa,CAAC,CAAC;IAErE,MAAM,YAAY,GAAG,qBAAqB,CACxC,QAAQ,EACR,WAAW,EACX,cAAc,EACd,aAAa,CACd,CAAC;IAEF,IAAI,YAAY,EAAE,CAAC;QACjB,OAAO,YAAY,CAAC;IACtB,CAAC;IAED,MAAM,YAAY,GAAiB;QACjC,aAAa;QACb,MAAM;QACN,OAAO;KACR,CAAC;IAEF,QAAQ,CAAC,cAAc,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,OAAO,EAAE,CAAC,CAAC;IAE1D,MAAM,IAAI,GAAG,MAAM,iBAAiB,CAAC,aAAa,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;IAC3E,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,EAAE,aAAa,CAAC,CAAC;IAE5C,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,UAAU,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QACnC,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,UAAU,CAAC,CAAC;IAClC,CAAC;IAED,OAAO;QACL,IAAI;QACJ,SAAS,EAAE,KAAK;QAChB,GAAG,EAAE,aAAa;QAClB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;KACpC,CAAC;AACJ,CAAC"}
@@ -1,3 +1,3 @@
1
1
  import type { ContentBlockUnion, MetadataBlock } from '../config/types.js';
2
- export declare function toJsonl(blocks: ContentBlockUnion[], metadata?: MetadataBlock): string;
2
+ export declare function toJsonl(blocks: readonly ContentBlockUnion[], metadata?: MetadataBlock): string;
3
3
  //# sourceMappingURL=jsonl.transformer.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"jsonl.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AA6B3E,wBAAgB,OAAO,CACrB,MAAM,EAAE,iBAAiB,EAAE,EAC3B,QAAQ,CAAC,EAAE,aAAa,GACvB,MAAM,CA0BR"}
1
+ {"version":3,"file":"jsonl.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAuC3E,wBAAgB,OAAO,CACrB,MAAM,EAAE,SAAS,iBAAiB,EAAE,EACpC,QAAQ,CAAC,EAAE,aAAa,GACvB,MAAM,CAwBR"}
@@ -12,35 +12,40 @@ function truncateBlock(block) {
12
12
  }
13
13
  case 'list': {
14
14
  const truncatedItems = block.items.map((item) => truncateText(item, maxLength));
15
- const hasChanges = truncatedItems.some((item, i) => item !== block.items[i]);
15
+ const hasChanges = truncatedItems.some((item, index) => item !== block.items[index]);
16
16
  return hasChanges ? { ...block, items: truncatedItems } : block;
17
17
  }
18
18
  default:
19
19
  return block;
20
20
  }
21
21
  }
22
+ function serializeBlock(block) {
23
+ try {
24
+ return JSON.stringify(truncateBlock(block));
25
+ }
26
+ catch {
27
+ return null;
28
+ }
29
+ }
22
30
  export function toJsonl(blocks, metadata) {
23
31
  const lines = [];
24
- // Minimal metadata - just title and URL for context
25
32
  if (metadata) {
26
33
  try {
27
- const minimal = {
34
+ const minimalMetadata = {
28
35
  type: metadata.type,
29
36
  title: metadata.title,
30
37
  url: metadata.url,
31
38
  };
32
- lines.push(JSON.stringify(minimal));
39
+ lines.push(JSON.stringify(minimalMetadata));
33
40
  }
34
41
  catch {
35
- // Skip invalid metadata
42
+ /* skip */
36
43
  }
37
44
  }
38
45
  for (const block of blocks) {
39
- try {
40
- lines.push(JSON.stringify(truncateBlock(block)));
41
- }
42
- catch {
43
- // Skip blocks that fail to serialize
46
+ const serialized = serializeBlock(block);
47
+ if (serialized) {
48
+ lines.push(serialized);
44
49
  }
45
50
  }
46
51
  return lines.join('\n');
@@ -1 +1 @@
1
- {"version":3,"file":"jsonl.transformer.js","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAG5C,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAErD,SAAS,aAAa,CAAC,KAAwB;IAC7C,MAAM,SAAS,GAAG,MAAM,CAAC,UAAU,CAAC,cAAc,CAAC;IAEnD,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;QACnB,KAAK,WAAW,CAAC;QACjB,KAAK,SAAS,CAAC;QACf,KAAK,MAAM,CAAC;QACZ,KAAK,YAAY,CAAC,CAAC,CAAC;YAClB,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YACtD,OAAO,SAAS,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAC1E,CAAC;QACD,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,cAAc,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAC9C,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAC9B,CAAC;YACF,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CACpC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,KAAK,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CACrC,CAAC;YACF,OAAO,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;QAClE,CAAC;QACD;YACE,OAAO,KAAK,CAAC;IACjB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,OAAO,CACrB,MAA2B,EAC3B,QAAwB;IAExB,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,oDAAoD;IACpD,IAAI,QAAQ,EAAE,CAAC;QACb,IAAI,CAAC;YACH,MAAM,OAAO,GAAG;gBACd,IAAI,EAAE,QAAQ,CAAC,IAAI;gBACnB,KAAK,EAAE,QAAQ,CAAC,KAAK;gBACrB,GAAG,EAAE,QAAQ,CAAC,GAAG;aAClB,CAAC;YACF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;QACtC,CAAC;QAAC,MAAM,CAAC;YACP,wBAAwB;QAC1B,CAAC;IACH,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC;YACP,qCAAqC;QACvC,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
1
+ {"version":3,"file":"jsonl.transformer.js","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAG5C,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAErD,SAAS,aAAa,CAAC,KAAwB;IAC7C,MAAM,SAAS,GAAG,MAAM,CAAC,UAAU,CAAC,cAAc,CAAC;IAEnD,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;QACnB,KAAK,WAAW,CAAC;QACjB,KAAK,SAAS,CAAC;QACf,KAAK,MAAM,CAAC;QACZ,KAAK,YAAY,CAAC,CAAC,CAAC;YAClB,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YACtD,OAAO,SAAS,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAC1E,CAAC;QAED,KAAK,MAAM,CAAC,CAAC,CAAC;YACZ,MAAM,cAAc,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAC9C,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAC9B,CAAC;YACF,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CACpC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,IAAI,KAAK,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAC7C,CAAC;YACF,OAAO,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;QAClE,CAAC;QAED;YACE,OAAO,KAAK,CAAC;IACjB,CAAC;AACH,CAAC;AAED,SAAS,cAAc,CAAC,KAAwB;IAC9C,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,OAAO,CACrB,MAAoC,EACpC,QAAwB;IAExB,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,IAAI,QAAQ,EAAE,CAAC;QACb,IAAI,CAAC;YACH,MAAM,eAAe,GAAG;gBACtB,IAAI,EAAE,QAAQ,CAAC,IAAI;gBACnB,KAAK,EAAE,QAAQ,CAAC,KAAK;gBACrB,GAAG,EAAE,QAAQ,CAAC,GAAG;aAClB,CAAC;YACF,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,UAAU;QACZ,CAAC;IACH,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,UAAU,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACzC,IAAI,UAAU,EAAE,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAiKxD,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,aAAa,GAAG,MAAM,CAsB7E"}
1
+ {"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAwKxD,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,aAAa,GAAG,MAAM,CAiB7E"}
@@ -1,7 +1,28 @@
1
1
  import TurndownService from 'turndown';
2
- import { detectLanguage } from '../utils/language-detector.js';
3
- // Markdown-specific noise patterns (minimal set - content-cleaner.ts handles most filtering)
4
- // Only patterns that commonly appear as standalone lines in markdown output
2
+ function detectLanguageFromCode(code) {
3
+ const patterns = [
4
+ [
5
+ /^\s*import\s+.*\s+from\s+['"]react['"]|<[A-Z][a-zA-Z]*[\s/>]|jsx\s*:|className=/m,
6
+ 'jsx',
7
+ ],
8
+ [
9
+ /:\s*(string|number|boolean|void|any|unknown|never)\b|interface\s+\w+|type\s+\w+\s*=/m,
10
+ 'typescript',
11
+ ],
12
+ [/^\s*(fn|let\s+mut|impl|struct|enum|use\s+\w+::)/m, 'rust'],
13
+ [
14
+ /^\s*(export|const|let|var|function|class|async|await)\b|^\s*import\s+.*['"]]/m,
15
+ 'javascript',
16
+ ],
17
+ [/^\s*(def|class|import|from|if __name__|print\()/m, 'python'],
18
+ [/^\s*(npm|yarn|pnpm|npx)\s+(install|add|run|build|start)/m, 'bash'],
19
+ [/^\s*[.#@]?[\w-]+\s*\{[^}]*\}|@media|@import/m, 'css'],
20
+ [/^\s*<(!DOCTYPE|html|head|body|div)\b/im, 'html'],
21
+ [/^\s*\{\s*"|^\s*\[\s*(")/m, 'json'],
22
+ [/^\s*(SELECT|INSERT|UPDATE|DELETE|CREATE)\s+/im, 'sql'],
23
+ ];
24
+ return patterns.find(([pattern]) => pattern.test(code))?.[1];
25
+ }
5
26
  const NOISE_LINE_PATTERNS = [
6
27
  // Single letters or panel labels (common in code examples)
7
28
  /^[A-Z]$/,
@@ -12,52 +33,32 @@ const NOISE_LINE_PATTERNS = [
12
33
  /^\[\d+\]$/,
13
34
  /^\(\d+\)$/,
14
35
  ];
15
- /**
16
- * Check if a line is noise that should be removed
17
- */
36
+ const MULTIPLE_NEWLINES = /\n{3,}/g;
18
37
  function isNoiseLine(line) {
19
38
  const trimmed = line.trim();
20
- // Empty lines are fine
21
39
  if (!trimmed)
22
40
  return false;
23
- // Don't filter lines inside code blocks, headings, or lists
24
- if (trimmed.startsWith('#') ||
25
- trimmed.startsWith('-') ||
26
- trimmed.startsWith('*') ||
27
- trimmed.startsWith('`') ||
28
- trimmed.startsWith('>') ||
29
- trimmed.startsWith('|')) {
41
+ const markdownPrefixes = ['#', '-', '*', '`', '>', '|'];
42
+ if (markdownPrefixes.some((prefix) => trimmed.startsWith(prefix))) {
30
43
  return false;
31
44
  }
32
- // Check against noise patterns
33
- for (const pattern of NOISE_LINE_PATTERNS) {
34
- if (pattern.test(trimmed)) {
35
- return true;
36
- }
37
- }
38
- return false;
45
+ return NOISE_LINE_PATTERNS.some((pattern) => pattern.test(trimmed));
39
46
  }
40
- /**
41
- * Post-process markdown to remove noise lines
42
- */
47
+ const CODE_FENCE = '```';
43
48
  function cleanMarkdownContent(markdown) {
44
- // Split by lines but preserve code blocks
45
49
  const lines = markdown.split('\n');
46
50
  const cleanedLines = [];
47
- let inCodeBlock = false;
51
+ let insideCodeBlock = false;
48
52
  for (const line of lines) {
49
- // Track code block boundaries
50
- if (line.trim().startsWith('```')) {
51
- inCodeBlock = !inCodeBlock;
53
+ if (line.trim().startsWith(CODE_FENCE)) {
54
+ insideCodeBlock = !insideCodeBlock;
52
55
  cleanedLines.push(line);
53
56
  continue;
54
57
  }
55
- // Don't filter inside code blocks
56
- if (inCodeBlock) {
58
+ if (insideCodeBlock) {
57
59
  cleanedLines.push(line);
58
60
  continue;
59
61
  }
60
- // Filter noise lines outside code blocks
61
62
  if (!isNoiseLine(line)) {
62
63
  cleanedLines.push(line);
63
64
  }
@@ -70,12 +71,10 @@ const turndown = new TurndownService({
70
71
  emDelimiter: '_',
71
72
  bulletListMarker: '-',
72
73
  });
73
- // Remove noise elements
74
74
  turndown.addRule('removeNoise', {
75
75
  filter: ['script', 'style', 'noscript', 'nav', 'footer', 'aside', 'iframe'],
76
76
  replacement: () => '',
77
77
  });
78
- // Enhanced code block handling with language detection
79
78
  turndown.addRule('fencedCodeBlockWithLanguage', {
80
79
  filter: (node, options) => {
81
80
  return (options.codeBlockStyle === 'fenced' &&
@@ -86,70 +85,67 @@ turndown.addRule('fencedCodeBlockWithLanguage', {
86
85
  replacement: (_content, node) => {
87
86
  const codeNode = node.firstChild;
88
87
  const code = codeNode.textContent || '';
89
- // Try to get language from class
90
88
  const className = codeNode.getAttribute('class') ?? '';
91
89
  const dataLang = codeNode.getAttribute('data-language') ?? '';
92
90
  const languageMatch = /language-(\w+)/.exec(className) ??
93
91
  /lang-(\w+)/.exec(className) ??
94
92
  /highlight-(\w+)/.exec(className) ??
95
93
  /^(\w+)$/.exec(dataLang);
96
- // Use detected language from class, or detect from content using utility
97
- const language = languageMatch?.[1] ?? detectLanguage(code) ?? '';
94
+ const language = languageMatch?.[1] ?? detectLanguageFromCode(code) ?? '';
98
95
  return `\n\n\`\`\`${language}\n${code.replace(/\n$/, '')}\n\`\`\`\n\n`;
99
96
  },
100
97
  });
101
- // Pre-compiled regex patterns
102
98
  const YAML_SPECIAL_CHARS = /[:[\]{}"\n\r\t'|>&*!?,#]/;
103
99
  const YAML_NUMERIC = /^[\d.]+$/;
104
100
  const YAML_RESERVED_WORDS = /^(true|false|null|yes|no|on|off)$/i;
105
- const ESCAPE_BACKSLASH = /\\/g;
106
- const ESCAPE_QUOTE = /"/g;
107
- const ESCAPE_NEWLINE = /\n/g;
108
- const ESCAPE_TAB = /\t/g;
109
- const MULTIPLE_NEWLINES = /\n{3,}/g;
101
+ const ESCAPE_PATTERNS = {
102
+ backslash: /\\/g,
103
+ quote: /"/g,
104
+ newline: /\n/g,
105
+ tab: /\t/g,
106
+ };
110
107
  function escapeYamlValue(value) {
111
- const needsQuoting = YAML_SPECIAL_CHARS.test(value) ||
108
+ const requiresQuoting = YAML_SPECIAL_CHARS.test(value) ||
112
109
  value.startsWith(' ') ||
113
110
  value.endsWith(' ') ||
114
111
  value === '' ||
115
112
  YAML_NUMERIC.test(value) ||
116
113
  YAML_RESERVED_WORDS.test(value);
117
- if (!needsQuoting)
114
+ if (!requiresQuoting) {
118
115
  return value;
119
- return `"${value
120
- .replace(ESCAPE_BACKSLASH, '\\\\')
121
- .replace(ESCAPE_QUOTE, '\\"')
122
- .replace(ESCAPE_NEWLINE, '\\n')
123
- .replace(ESCAPE_TAB, '\\t')}"`;
116
+ }
117
+ const escaped = value
118
+ .replace(ESCAPE_PATTERNS.backslash, '\\\\')
119
+ .replace(ESCAPE_PATTERNS.quote, '\\"')
120
+ .replace(ESCAPE_PATTERNS.newline, '\\n')
121
+ .replace(ESCAPE_PATTERNS.tab, '\\t');
122
+ return `"${escaped}"`;
124
123
  }
125
124
  function createFrontmatter(metadata) {
126
125
  const lines = ['---'];
127
- if (metadata.title)
126
+ if (metadata.title) {
128
127
  lines.push(`title: ${escapeYamlValue(metadata.title)}`);
129
- if (metadata.url)
128
+ }
129
+ if (metadata.url) {
130
130
  lines.push(`source: ${escapeYamlValue(metadata.url)}`);
131
+ }
131
132
  lines.push('---');
132
133
  return lines.join('\n');
133
134
  }
134
135
  export function htmlToMarkdown(html, metadata) {
136
+ const frontmatter = metadata ? createFrontmatter(metadata) : '';
135
137
  if (!html || typeof html !== 'string') {
136
- return metadata ? `${createFrontmatter(metadata)}\n\n` : '';
138
+ return frontmatter ? `${frontmatter}\n\n` : '';
137
139
  }
138
- let content = '';
139
140
  try {
140
- content = turndown.turndown(html);
141
+ let content = turndown.turndown(html);
141
142
  content = content.replace(MULTIPLE_NEWLINES, '\n\n').trim();
142
- // Clean up noise lines from the markdown
143
143
  content = cleanMarkdownContent(content);
144
- // Final cleanup of multiple newlines after removing noise
145
144
  content = content.replace(MULTIPLE_NEWLINES, '\n\n').trim();
145
+ return frontmatter ? `${frontmatter}\n\n${content}` : content;
146
146
  }
147
147
  catch {
148
- return metadata ? `${createFrontmatter(metadata)}\n\n` : '';
149
- }
150
- if (metadata) {
151
- return `${createFrontmatter(metadata)}\n\n${content}`;
148
+ return frontmatter ? `${frontmatter}\n\n` : '';
152
149
  }
153
- return content;
154
150
  }
155
151
  //# sourceMappingURL=markdown.transformer.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"markdown.transformer.js","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAIvC,OAAO,EAAE,cAAc,EAAE,MAAM,+BAA+B,CAAC;AAE/D,6FAA6F;AAC7F,4EAA4E;AAC5E,MAAM,mBAAmB,GAAsB;IAC7C,2DAA2D;IAC3D,SAAS;IACT,kBAAkB;IAElB,mEAAmE;IACnE,8BAA8B;IAC9B,cAAc;IACd,WAAW;IACX,WAAW;CACH,CAAC;AAEX;;GAEG;AACH,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,uBAAuB;IACvB,IAAI,CAAC,OAAO;QAAE,OAAO,KAAK,CAAC;IAE3B,4DAA4D;IAC5D,IACE,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC;QACvB,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,EACvB,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAED,+BAA+B;IAC/B,KAAK,MAAM,OAAO,IAAI,mBAAmB,EAAE,CAAC;QAC1C,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,QAAgB;IAC5C,0CAA0C;IAC1C,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,IAAI,WAAW,GAAG,KAAK,CAAC;IAExB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,8BAA8B;QAC9B,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,EAAE,CAAC;YAClC,WAAW,GAAG,CAAC,WAAW,CAAC;YAC3B,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,kCAAkC;QAClC,IAAI,WAAW,EAAE,CAAC;YAChB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,yCAAyC;QACzC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;YACvB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,OAAO,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;IACnC,YAAY,EAAE,KAAK;IACnB,cAAc,EAAE,QAAQ;IACxB,WAAW,EAAE,GAAG;IAChB,gBAAgB,EAAE,GAAG;CACtB,CAAC,CAAC;AAEH,wBAAwB;AACxB,QAAQ,CAAC,OAAO,CAAC,aAAa,EAAE;IAC9B,MAAM,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC;IAC3E,WAAW,EAAE,GAAG,EAAE,CAAC,EAAE;CACtB,CAAC,CAAC;AAEH,uDAAuD;AACvD,QAAQ,CAAC,OAAO,CAAC,6BAA6B,EAAE;IAC9C,MAAM,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE;QACxB,OAAO,CACL,OAAO,CAAC,cAAc,KAAK,QAAQ;YACnC,IAAI,CAAC,QAAQ,KAAK,KAAK;YACvB,IAAI,CAAC,UAAU,KAAK,IAAI;YACxB,IAAI,CAAC,UAAU,CAAC,QAAQ,KAAK,MAAM,CACpC,CAAC;IACJ,CAAC;IACD,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;QAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAyB,CAAC;QAChD,MAAM,IAAI,GAAG,QAAQ,CAAC,WAAW,IAAI,EAAE,CAAC;QAExC,iCAAiC;QACjC,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QACvD,MAAM,QAAQ,GAAG,QAAQ,CAAC,YAAY,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAE9D,MAAM,aAAa,GACjB,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC;YAChC,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC;YAC5B,iBAAiB,CAAC,IAAI,CAAC,SAAS,CAAC;YACjC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAE3B,yEAAyE;QACzE,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC,CAAC,CAAC,IAAI,cAAc,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAElE,OAAO,aAAa,QAAQ,KAAK,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,cAAc,CAAC;IACzE,CAAC;CACF,CAAC,CAAC;AAEH,8BAA8B;AAC9B,MAAM,kBAAkB,GAAG,0BAA0B,CAAC;AACtD,MAAM,YAAY,GAAG,UAAU,CAAC;AAChC,MAAM,mBAAmB,GAAG,oCAAoC,CAAC;AACjE,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAC/B,MAAM,YAAY,GAAG,IAAI,CAAC;AAC1B,MAAM,cAAc,GAAG,KAAK,CAAC;AAC7B,MAAM,UAAU,GAAG,KAAK,CAAC;AACzB,MAAM,iBAAiB,GAAG,SAAS,CAAC;AAEpC,SAAS,eAAe,CAAC,KAAa;IACpC,MAAM,YAAY,GAChB,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC;QAC9B,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;QACrB,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;QACnB,KAAK,KAAK,EAAE;QACZ,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;QACxB,mBAAmB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAElC,IAAI,CAAC,YAAY;QAAE,OAAO,KAAK,CAAC;IAEhC,OAAO,IAAI,KAAK;SACb,OAAO,CAAC,gBAAgB,EAAE,MAAM,CAAC;SACjC,OAAO,CAAC,YAAY,EAAE,KAAK,CAAC;SAC5B,OAAO,CAAC,cAAc,EAAE,KAAK,CAAC;SAC9B,OAAO,CAAC,UAAU,EAAE,KAAK,CAAC,GAAG,CAAC;AACnC,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAuB;IAChD,MAAM,KAAK,GAAG,CAAC,KAAK,CAAC,CAAC;IACtB,IAAI,QAAQ,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,UAAU,eAAe,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC5E,IAAI,QAAQ,CAAC,GAAG;QAAE,KAAK,CAAC,IAAI,CAAC,WAAW,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACzE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,QAAwB;IACnE,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,QAAQ,CAAC,CAAC,CAAC,GAAG,iBAAiB,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9D,CAAC;IAED,IAAI,OAAO,GAAG,EAAE,CAAC;IACjB,IAAI,CAAC;QACH,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QAClC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5D,yCAAyC;QACzC,OAAO,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;QACxC,0DAA0D;QAC1D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IAC9D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,QAAQ,CAAC,CAAC,CAAC,GAAG,iBAAiB,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IAC9D,CAAC;IAED,IAAI,QAAQ,EAAE,CAAC;QACb,OAAO,GAAG,iBAAiB,CAAC,QAAQ,CAAC,OAAO,OAAO,EAAE,CAAC;IACxD,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC"}
1
+ {"version":3,"file":"markdown.transformer.js","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAIvC,SAAS,sBAAsB,CAAC,IAAY;IAC1C,MAAM,QAAQ,GAAgC;QAC5C;YACE,kFAAkF;YAClF,KAAK;SACN;QACD;YACE,sFAAsF;YACtF,YAAY;SACb;QACD,CAAC,kDAAkD,EAAE,MAAM,CAAC;QAC5D;YACE,+EAA+E;YAC/E,YAAY;SACb;QACD,CAAC,kDAAkD,EAAE,QAAQ,CAAC;QAC9D,CAAC,0DAA0D,EAAE,MAAM,CAAC;QACpE,CAAC,8CAA8C,EAAE,KAAK,CAAC;QACvD,CAAC,wCAAwC,EAAE,MAAM,CAAC;QAClD,CAAC,0BAA0B,EAAE,MAAM,CAAC;QACpC,CAAC,+CAA+C,EAAE,KAAK,CAAC;KACzD,CAAC;IACF,OAAO,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;AAC/D,CAAC;AAED,MAAM,mBAAmB,GAAsB;IAC7C,2DAA2D;IAC3D,SAAS;IACT,kBAAkB;IAElB,mEAAmE;IACnE,8BAA8B;IAC9B,cAAc;IACd,WAAW;IACX,WAAW;CACH,CAAC;AAEX,MAAM,iBAAiB,GAAG,SAAS,CAAC;AAEpC,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,IAAI,CAAC,OAAO;QAAE,OAAO,KAAK,CAAC;IAE3B,MAAM,gBAAgB,GAAG,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;IACxD,IAAI,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,OAAO,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC;QAClE,OAAO,KAAK,CAAC;IACf,CAAC;IAED,OAAO,mBAAmB,CAAC,IAAI,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;AACtE,CAAC;AAED,MAAM,UAAU,GAAG,KAAK,CAAC;AAEzB,SAAS,oBAAoB,CAAC,QAAgB;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,YAAY,GAAa,EAAE,CAAC;IAClC,IAAI,eAAe,GAAG,KAAK,CAAC;IAE5B,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YACvC,eAAe,GAAG,CAAC,eAAe,CAAC;YACnC,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,IAAI,eAAe,EAAE,CAAC;YACpB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,SAAS;QACX,CAAC;QAED,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,EAAE,CAAC;YACvB,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,OAAO,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;IACnC,YAAY,EAAE,KAAK;IACnB,cAAc,EAAE,QAAQ;IACxB,WAAW,EAAE,GAAG;IAChB,gBAAgB,EAAE,GAAG;CACtB,CAAC,CAAC;AAEH,QAAQ,CAAC,OAAO,CAAC,aAAa,EAAE;IAC9B,MAAM,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,QAAQ,CAAC;IAC3E,WAAW,EAAE,GAAG,EAAE,CAAC,EAAE;CACtB,CAAC,CAAC;AAEH,QAAQ,CAAC,OAAO,CAAC,6BAA6B,EAAE;IAC9C,MAAM,EAAE,CAAC,IAAI,EAAE,OAAO,EAAE,EAAE;QACxB,OAAO,CACL,OAAO,CAAC,cAAc,KAAK,QAAQ;YACnC,IAAI,CAAC,QAAQ,KAAK,KAAK;YACvB,IAAI,CAAC,UAAU,KAAK,IAAI;YACxB,IAAI,CAAC,UAAU,CAAC,QAAQ,KAAK,MAAM,CACpC,CAAC;IACJ,CAAC;IACD,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;QAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAyB,CAAC;QAChD,MAAM,IAAI,GAAG,QAAQ,CAAC,WAAW,IAAI,EAAE,CAAC;QAExC,MAAM,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QACvD,MAAM,QAAQ,GAAG,QAAQ,CAAC,YAAY,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;QAE9D,MAAM,aAAa,GACjB,gBAAgB,CAAC,IAAI,CAAC,SAAS,CAAC;YAChC,YAAY,CAAC,IAAI,CAAC,SAAS,CAAC;YAC5B,iBAAiB,CAAC,IAAI,CAAC,SAAS,CAAC;YACjC,SAAS,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAE3B,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC,CAAC,CAAC,IAAI,sBAAsB,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAE1E,OAAO,aAAa,QAAQ,KAAK,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,cAAc,CAAC;IACzE,CAAC;CACF,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAG,0BAA0B,CAAC;AACtD,MAAM,YAAY,GAAG,UAAU,CAAC;AAChC,MAAM,mBAAmB,GAAG,oCAAoC,CAAC;AAEjE,MAAM,eAAe,GAAG;IACtB,SAAS,EAAE,KAAK;IAChB,KAAK,EAAE,IAAI;IACX,OAAO,EAAE,KAAK;IACd,GAAG,EAAE,KAAK;CACF,CAAC;AAEX,SAAS,eAAe,CAAC,KAAa;IACpC,MAAM,eAAe,GACnB,kBAAkB,CAAC,IAAI,CAAC,KAAK,CAAC;QAC9B,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC;QACrB,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC;QACnB,KAAK,KAAK,EAAE;QACZ,YAAY,CAAC,IAAI,CAAC,KAAK,CAAC;QACxB,mBAAmB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAElC,IAAI,CAAC,eAAe,EAAE,CAAC;QACrB,OAAO,KAAK,CAAC;IACf,CAAC;IAED,MAAM,OAAO,GAAG,KAAK;SAClB,OAAO,CAAC,eAAe,CAAC,SAAS,EAAE,MAAM,CAAC;SAC1C,OAAO,CAAC,eAAe,CAAC,KAAK,EAAE,KAAK,CAAC;SACrC,OAAO,CAAC,eAAe,CAAC,OAAO,EAAE,KAAK,CAAC;SACvC,OAAO,CAAC,eAAe,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;IAEvC,OAAO,IAAI,OAAO,GAAG,CAAC;AACxB,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAuB;IAChD,MAAM,KAAK,GAAG,CAAC,KAAK,CAAC,CAAC;IAEtB,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;QACnB,KAAK,CAAC,IAAI,CAAC,UAAU,eAAe,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;IAC1D,CAAC;IACD,IAAI,QAAQ,CAAC,GAAG,EAAE,CAAC;QACjB,KAAK,CAAC,IAAI,CAAC,WAAW,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;IACzD,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAClB,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,IAAY,EAAE,QAAwB;IACnE,MAAM,WAAW,GAAG,QAAQ,CAAC,CAAC,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAEhE,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;QACtC,OAAO,WAAW,CAAC,CAAC,CAAC,GAAG,WAAW,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IACjD,CAAC;IAED,IAAI,CAAC;QACH,IAAI,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACtC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5D,OAAO,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAC;QACxC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,iBAAiB,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;QAE5D,OAAO,WAAW,CAAC,CAAC,CAAC,GAAG,WAAW,OAAO,OAAO,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;IAChE,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,WAAW,CAAC,CAAC,CAAC,GAAG,WAAW,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IACjD,CAAC;AACH,CAAC"}
@@ -1,6 +1,3 @@
1
- interface ConcurrencyOptions {
2
- onProgress?: (completed: number, total: number) => void;
3
- }
4
- export declare function runWithConcurrency<T>(limit: number, tasks: (() => Promise<T>)[], options?: ConcurrencyOptions): Promise<PromiseSettledResult<T>[]>;
5
- export {};
1
+ import type { ConcurrencyExecutionOptions } from '../config/types.js';
2
+ export declare function runWithConcurrency<T>(limit: number, tasks: readonly (() => Promise<T>)[], options?: ConcurrencyExecutionOptions): Promise<PromiseSettledResult<T>[]>;
6
3
  //# sourceMappingURL=concurrency.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"concurrency.d.ts","sourceRoot":"","sources":["../../src/utils/concurrency.ts"],"names":[],"mappings":"AAEA,UAAU,kBAAkB;IAC1B,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACzD;AAsBD,wBAAsB,kBAAkB,CAAC,CAAC,EACxC,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,CAAC,MAAM,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,EAC3B,OAAO,CAAC,EAAE,kBAAkB,GAC3B,OAAO,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,CAAC,CAiBpC"}
1
+ {"version":3,"file":"concurrency.d.ts","sourceRoot":"","sources":["../../src/utils/concurrency.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,2BAA2B,EAE5B,MAAM,oBAAoB,CAAC;AA8B5B,wBAAsB,kBAAkB,CAAC,CAAC,EACxC,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,SAAS,CAAC,MAAM,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,EACpC,OAAO,CAAC,EAAE,2BAA2B,GACpC,OAAO,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,CAAC,CAepC"}
@@ -1,38 +1,38 @@
1
+ const MAX_CONCURRENCY_LIMIT = 10;
2
+ const MIN_CONCURRENCY = 1;
1
3
  function createConcurrencyLimiter(limit) {
2
- const maxConcurrency = Math.min(Math.max(1, limit), 10);
3
- let active = 0;
4
- const queue = [];
5
- return async (fn) => {
6
- while (active >= maxConcurrency) {
7
- await new Promise((resolve) => queue.push(resolve));
4
+ const maxConcurrency = Math.min(Math.max(MIN_CONCURRENCY, limit), MAX_CONCURRENCY_LIMIT);
5
+ let activeCount = 0;
6
+ const waitingQueue = [];
7
+ return async (task) => {
8
+ while (activeCount >= maxConcurrency) {
9
+ await new Promise((resolve) => waitingQueue.push(resolve));
8
10
  }
9
- active++;
11
+ activeCount++;
10
12
  try {
11
- return await fn();
13
+ return await task();
12
14
  }
13
15
  finally {
14
- active--;
15
- const next = queue.shift();
16
- if (next)
17
- next();
16
+ activeCount--;
17
+ const nextWaiting = waitingQueue.shift();
18
+ if (nextWaiting)
19
+ nextWaiting();
18
20
  }
19
21
  };
20
22
  }
21
23
  export async function runWithConcurrency(limit, tasks, options) {
22
24
  const limiter = createConcurrencyLimiter(limit);
23
- const total = tasks.length;
24
- let completed = 0;
25
+ const totalTasks = tasks.length;
26
+ let completedCount = 0;
25
27
  const wrappedTasks = tasks.map((task) => async () => {
26
28
  try {
27
29
  return await limiter(task);
28
30
  }
29
31
  finally {
30
- completed++;
31
- if (options?.onProgress) {
32
- options.onProgress(completed, total);
33
- }
32
+ completedCount++;
33
+ options?.onProgress?.(completedCount, totalTasks);
34
34
  }
35
35
  });
36
- return Promise.allSettled(wrappedTasks.map(async (task) => task()));
36
+ return Promise.allSettled(wrappedTasks.map((task) => task()));
37
37
  }
38
38
  //# sourceMappingURL=concurrency.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/utils/concurrency.ts"],"names":[],"mappings":"AAMA,SAAS,wBAAwB,CAAC,KAAa;IAC7C,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;IACxD,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,KAAK,GAAmB,EAAE,CAAC;IAEjC,OAAO,KAAK,EAAK,EAAoB,EAAc,EAAE;QACnD,OAAO,MAAM,IAAI,cAAc,EAAE,CAAC;YAChC,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;QAC5D,CAAC;QAED,MAAM,EAAE,CAAC;QACT,IAAI,CAAC;YACH,OAAO,MAAM,EAAE,EAAE,CAAC;QACpB,CAAC;gBAAS,CAAC;YACT,MAAM,EAAE,CAAC;YACT,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,IAAI;gBAAE,IAAI,EAAE,CAAC;QACnB,CAAC;IACH,CAAC,CAAC;AACJ,CAAC;AACD,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,KAAa,EACb,KAA2B,EAC3B,OAA4B;IAE5B,MAAM,OAAO,GAAG,wBAAwB,CAAC,KAAK,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC;IAC3B,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,MAAM,YAAY,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,KAAK,IAAI,EAAE;QAClD,IAAI,CAAC;YACH,OAAO,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;QAC7B,CAAC;gBAAS,CAAC;YACT,SAAS,EAAE,CAAC;YACZ,IAAI,OAAO,EAAE,UAAU,EAAE,CAAC;gBACxB,OAAO,CAAC,UAAU,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;YACvC,CAAC;QACH,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;AACtE,CAAC"}
1
+ {"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/utils/concurrency.ts"],"names":[],"mappings":"AAKA,MAAM,qBAAqB,GAAG,EAAE,CAAC;AACjC,MAAM,eAAe,GAAG,CAAC,CAAC;AAE1B,SAAS,wBAAwB,CAAC,KAAa;IAC7C,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAC7B,IAAI,CAAC,GAAG,CAAC,eAAe,EAAE,KAAK,CAAC,EAChC,qBAAqB,CACtB,CAAC;IAEF,IAAI,WAAW,GAAG,CAAC,CAAC;IACpB,MAAM,YAAY,GAAmB,EAAE,CAAC;IAExC,OAAO,KAAK,EAAK,IAAsB,EAAc,EAAE;QACrD,OAAO,WAAW,IAAI,cAAc,EAAE,CAAC;YACrC,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;QACnE,CAAC;QAED,WAAW,EAAE,CAAC;QACd,IAAI,CAAC;YACH,OAAO,MAAM,IAAI,EAAE,CAAC;QACtB,CAAC;gBAAS,CAAC;YACT,WAAW,EAAE,CAAC;YACd,MAAM,WAAW,GAAG,YAAY,CAAC,KAAK,EAAE,CAAC;YACzC,IAAI,WAAW;gBAAE,WAAW,EAAE,CAAC;QACjC,CAAC;IACH,CAAC,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,KAAa,EACb,KAAoC,EACpC,OAAqC;IAErC,MAAM,OAAO,GAAG,wBAAwB,CAAC,KAAK,CAAC,CAAC;IAChD,MAAM,UAAU,GAAG,KAAK,CAAC,MAAM,CAAC;IAChC,IAAI,cAAc,GAAG,CAAC,CAAC;IAEvB,MAAM,YAAY,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,KAAK,IAAgB,EAAE;QAC9D,IAAI,CAAC;YACH,OAAO,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC;QAC7B,CAAC;gBAAS,CAAC;YACT,cAAc,EAAE,CAAC;YACjB,OAAO,EAAE,UAAU,EAAE,CAAC,cAAc,EAAE,UAAU,CAAC,CAAC;QACpD,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;AAChE,CAAC"}
@@ -1,32 +1,7 @@
1
- /**
2
- * Post-processing content cleaner for removing noise artifacts
3
- * that slip through Readability extraction.
4
- */
5
- /**
6
- * Clean paragraph text by removing noise
7
- */
8
1
  export declare function cleanParagraph(text: string): string | null;
9
- /**
10
- * Clean heading text by removing noise and markdown link syntax
11
- */
12
2
  export declare function cleanHeading(text: string): string | null;
13
- /**
14
- * Clean list items by filtering out noise
15
- */
16
3
  export declare function cleanListItems(items: string[]): string[];
17
- /**
18
- * Clean code block text - minimal cleaning to preserve code integrity
19
- */
20
4
  export declare function cleanCodeBlock(code: string): string | null;
21
- /**
22
- * Strip markdown link syntax from text for cleaner slugs/display
23
- * [Text](#anchor) -> Text
24
- * [Text](url) -> Text
25
- */
26
5
  export declare function stripMarkdownLinks(text: string): string;
27
- /**
28
- * Remove common timestamp patterns from text (inline removal)
29
- * Use when you want to strip timestamps from within longer content
30
- */
31
6
  export declare function removeInlineTimestamps(text: string): string;
32
7
  //# sourceMappingURL=content-cleaner.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"content-cleaner.d.ts","sourceRoot":"","sources":["../../src/utils/content-cleaner.ts"],"names":[],"mappings":"AAAA;;;GAGG;AA+LH;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAsB1D;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAuBxD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAQxD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAc1D;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEvD;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAsB3D"}
1
+ {"version":3,"file":"content-cleaner.d.ts","sourceRoot":"","sources":["../../src/utils/content-cleaner.ts"],"names":[],"mappings":"AA2BA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAc1D;AAED,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAgBxD;AAED,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAQxD;AAED,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAY1D;AAED,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEvD;AAED,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAgB3D"}