@j0hanz/superfetch 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/README.md +18 -204
  2. package/dist/config/constants.d.ts +0 -1
  3. package/dist/config/constants.js +0 -1
  4. package/dist/config/formatting.d.ts +0 -1
  5. package/dist/config/formatting.d.ts.map +1 -1
  6. package/dist/config/formatting.js +1 -3
  7. package/dist/config/formatting.js.map +1 -1
  8. package/dist/config/index.d.ts +1 -2
  9. package/dist/config/index.d.ts.map +1 -1
  10. package/dist/config/index.js +5 -11
  11. package/dist/config/index.js.map +1 -1
  12. package/dist/config/types/content.d.ts +0 -1
  13. package/dist/config/types/content.js +0 -1
  14. package/dist/config/types/runtime.d.ts +0 -1
  15. package/dist/config/types/runtime.js +0 -1
  16. package/dist/config/types/tools.d.ts +0 -1
  17. package/dist/config/types/tools.js +0 -1
  18. package/dist/errors/app-error.d.ts +0 -1
  19. package/dist/errors/app-error.js +0 -1
  20. package/dist/http/auth.d.ts +0 -1
  21. package/dist/http/auth.js +0 -1
  22. package/dist/http/cors.d.ts +0 -1
  23. package/dist/http/cors.js +0 -1
  24. package/dist/http/download-routes.d.ts +1 -2
  25. package/dist/http/download-routes.d.ts.map +1 -1
  26. package/dist/http/download-routes.js +2 -2
  27. package/dist/http/mcp-routes.d.ts +0 -1
  28. package/dist/http/mcp-routes.js +0 -1
  29. package/dist/http/mcp-session-helpers.d.ts +0 -1
  30. package/dist/http/mcp-session-helpers.js +0 -1
  31. package/dist/http/mcp-session.d.ts +1 -2
  32. package/dist/http/mcp-session.d.ts.map +1 -1
  33. package/dist/http/mcp-session.js +0 -1
  34. package/dist/http/mcp-validation.d.ts +1 -2
  35. package/dist/http/mcp-validation.d.ts.map +1 -1
  36. package/dist/http/mcp-validation.js +6 -27
  37. package/dist/http/mcp-validation.js.map +1 -1
  38. package/dist/http/rate-limit.d.ts +1 -2
  39. package/dist/http/rate-limit.d.ts.map +1 -1
  40. package/dist/http/rate-limit.js +0 -1
  41. package/dist/http/rate-limit.js.map +1 -1
  42. package/dist/http/server-middleware.d.ts +0 -1
  43. package/dist/http/server-middleware.d.ts.map +1 -1
  44. package/dist/http/server-middleware.js +60 -4
  45. package/dist/http/server-middleware.js.map +1 -1
  46. package/dist/http/server.d.ts +0 -1
  47. package/dist/http/server.js +0 -1
  48. package/dist/http/session-cleanup.d.ts +0 -1
  49. package/dist/http/session-cleanup.js +0 -1
  50. package/dist/http/sessions.d.ts +1 -2
  51. package/dist/http/sessions.d.ts.map +1 -1
  52. package/dist/http/sessions.js +0 -1
  53. package/dist/index.d.ts +0 -1
  54. package/dist/index.js +0 -1
  55. package/dist/middleware/error-handler.d.ts +0 -1
  56. package/dist/middleware/error-handler.js +0 -1
  57. package/dist/resources/cached-content.d.ts +0 -1
  58. package/dist/resources/cached-content.js +0 -1
  59. package/dist/resources/index.d.ts +0 -1
  60. package/dist/resources/index.js +0 -1
  61. package/dist/server.d.ts +0 -1
  62. package/dist/server.d.ts.map +1 -1
  63. package/dist/server.js +8 -3
  64. package/dist/server.js.map +1 -1
  65. package/dist/services/cache.d.ts +2 -2
  66. package/dist/services/cache.d.ts.map +1 -1
  67. package/dist/services/cache.js +4 -2
  68. package/dist/services/context.d.ts +2 -1
  69. package/dist/services/context.js +10 -1
  70. package/dist/services/extractor.d.ts +1 -2
  71. package/dist/services/extractor.d.ts.map +1 -1
  72. package/dist/services/extractor.js +0 -1
  73. package/dist/services/fetcher/agents.d.ts +0 -1
  74. package/dist/services/fetcher/agents.js +55 -2
  75. package/dist/services/fetcher/errors.d.ts +0 -1
  76. package/dist/services/fetcher/errors.js +0 -1
  77. package/dist/services/fetcher/interceptors.d.ts +0 -1
  78. package/dist/services/fetcher/interceptors.js +44 -24
  79. package/dist/services/fetcher/redirects.d.ts +0 -1
  80. package/dist/services/fetcher/redirects.js +2 -3
  81. package/dist/services/fetcher/response.d.ts +1 -2
  82. package/dist/services/fetcher/response.js +28 -16
  83. package/dist/services/fetcher/retry-policy.d.ts +0 -1
  84. package/dist/services/fetcher/retry-policy.js +0 -1
  85. package/dist/services/fetcher.d.ts +2 -2
  86. package/dist/services/fetcher.d.ts.map +1 -1
  87. package/dist/services/fetcher.js +9 -7
  88. package/dist/services/fetcher.js.map +1 -1
  89. package/dist/services/logger.d.ts +1 -2
  90. package/dist/services/logger.d.ts.map +1 -1
  91. package/dist/services/logger.js +0 -1
  92. package/dist/services/parser.d.ts +5 -2
  93. package/dist/services/parser.d.ts.map +1 -1
  94. package/dist/services/parser.js +72 -3
  95. package/dist/tools/handlers/fetch-markdown.tool.d.ts +1 -2
  96. package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
  97. package/dist/tools/handlers/fetch-markdown.tool.js +34 -3
  98. package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
  99. package/dist/tools/handlers/fetch-single.shared.d.ts +5 -3
  100. package/dist/tools/handlers/fetch-single.shared.d.ts.map +1 -1
  101. package/dist/tools/handlers/fetch-single.shared.js +8 -4
  102. package/dist/tools/handlers/fetch-single.shared.js.map +1 -1
  103. package/dist/tools/handlers/fetch-url.tool.d.ts +1 -2
  104. package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
  105. package/dist/tools/handlers/fetch-url.tool.js +36 -3
  106. package/dist/tools/index.d.ts +0 -1
  107. package/dist/tools/index.js +0 -1
  108. package/dist/tools/schemas.d.ts +32 -33
  109. package/dist/tools/schemas.js +21 -5
  110. package/dist/tools/utils/cache-vary.d.ts +0 -1
  111. package/dist/tools/utils/cache-vary.d.ts.map +1 -1
  112. package/dist/tools/utils/cache-vary.js +7 -25
  113. package/dist/tools/utils/cache-vary.js.map +1 -1
  114. package/dist/tools/utils/common.d.ts +2 -3
  115. package/dist/tools/utils/common.d.ts.map +1 -1
  116. package/dist/tools/utils/common.js +6 -7
  117. package/dist/tools/utils/common.js.map +1 -1
  118. package/dist/tools/utils/content-transform.d.ts +1 -3
  119. package/dist/tools/utils/content-transform.d.ts.map +1 -1
  120. package/dist/tools/utils/content-transform.js +73 -8
  121. package/dist/tools/utils/content-transform.js.map +1 -1
  122. package/dist/tools/utils/fetch-pipeline.d.ts +1 -2
  123. package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -1
  124. package/dist/tools/utils/fetch-pipeline.js +18 -22
  125. package/dist/tools/utils/inline-content.d.ts +0 -1
  126. package/dist/tools/utils/inline-content.js +0 -1
  127. package/dist/transformers/jsonl.transformer.d.ts +1 -2
  128. package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
  129. package/dist/transformers/jsonl.transformer.js +0 -1
  130. package/dist/transformers/jsonl.transformer.js.map +1 -1
  131. package/dist/transformers/markdown.transformer.d.ts +1 -2
  132. package/dist/transformers/markdown.transformer.d.ts.map +1 -1
  133. package/dist/transformers/markdown.transformer.js +8 -1
  134. package/dist/utils/code-language.d.ts +0 -1
  135. package/dist/utils/code-language.js +0 -1
  136. package/dist/utils/content-cleaner.d.ts +0 -1
  137. package/dist/utils/content-cleaner.js +0 -1
  138. package/dist/utils/crypto.d.ts +0 -1
  139. package/dist/utils/crypto.js +0 -1
  140. package/dist/utils/download-url.d.ts +1 -2
  141. package/dist/utils/download-url.d.ts.map +1 -1
  142. package/dist/utils/download-url.js +0 -1
  143. package/dist/utils/error-utils.d.ts +0 -1
  144. package/dist/utils/error-utils.js +1 -3
  145. package/dist/utils/filename-generator.d.ts +0 -1
  146. package/dist/utils/filename-generator.js +0 -1
  147. package/dist/utils/header-normalizer.d.ts +0 -1
  148. package/dist/utils/header-normalizer.js +0 -1
  149. package/dist/utils/html-truncator.d.ts +0 -1
  150. package/dist/utils/html-truncator.js +0 -1
  151. package/dist/utils/sanitizer.d.ts +0 -1
  152. package/dist/utils/sanitizer.js +0 -1
  153. package/dist/utils/tool-error-handler.d.ts +1 -2
  154. package/dist/utils/tool-error-handler.d.ts.map +1 -1
  155. package/dist/utils/tool-error-handler.js +0 -1
  156. package/dist/utils/url-validator.d.ts +6 -3
  157. package/dist/utils/url-validator.d.ts.map +1 -1
  158. package/dist/utils/url-validator.js +49 -11
  159. package/dist/utils/url-validator.js.map +1 -1
  160. package/package.json +4 -4
@@ -1,6 +1,6 @@
1
1
  import { z } from 'zod';
2
2
  export declare const fetchUrlInputSchema: z.ZodObject<{
3
- customHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
3
+ customHeaders: z.ZodOptional<z.ZodEffects<z.ZodRecord<z.ZodString, z.ZodString>, Record<string, string>, Record<string, string>>>;
4
4
  timeout: z.ZodDefault<z.ZodNumber>;
5
5
  retries: z.ZodDefault<z.ZodNumber>;
6
6
  } & {
@@ -13,25 +13,25 @@ export declare const fetchUrlInputSchema: z.ZodObject<{
13
13
  format: z.ZodDefault<z.ZodEnum<["jsonl", "markdown"]>>;
14
14
  }, "strict", z.ZodTypeAny, {
15
15
  url: string;
16
+ timeout: number;
17
+ retries: number;
16
18
  extractMainContent: boolean;
17
19
  includeMetadata: boolean;
18
20
  format: "jsonl" | "markdown";
19
- timeout: number;
20
- retries: number;
21
- maxContentLength?: number | undefined;
22
21
  customHeaders?: Record<string, string> | undefined;
22
+ maxContentLength?: number | undefined;
23
23
  }, {
24
24
  url: string;
25
+ customHeaders?: Record<string, string> | undefined;
26
+ timeout?: number | undefined;
27
+ retries?: number | undefined;
25
28
  extractMainContent?: boolean | undefined;
26
29
  includeMetadata?: boolean | undefined;
27
30
  maxContentLength?: number | undefined;
28
31
  format?: "jsonl" | "markdown" | undefined;
29
- customHeaders?: Record<string, string> | undefined;
30
- timeout?: number | undefined;
31
- retries?: number | undefined;
32
32
  }>;
33
33
  export declare const fetchMarkdownInputSchema: z.ZodObject<{
34
- customHeaders: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
34
+ customHeaders: z.ZodOptional<z.ZodEffects<z.ZodRecord<z.ZodString, z.ZodString>, Record<string, string>, Record<string, string>>>;
35
35
  timeout: z.ZodDefault<z.ZodNumber>;
36
36
  retries: z.ZodDefault<z.ZodNumber>;
37
37
  } & {
@@ -42,20 +42,20 @@ export declare const fetchMarkdownInputSchema: z.ZodObject<{
42
42
  maxContentLength: z.ZodOptional<z.ZodNumber>;
43
43
  }, "strict", z.ZodTypeAny, {
44
44
  url: string;
45
- extractMainContent: boolean;
46
- includeMetadata: boolean;
47
45
  timeout: number;
48
46
  retries: number;
49
- maxContentLength?: number | undefined;
47
+ extractMainContent: boolean;
48
+ includeMetadata: boolean;
50
49
  customHeaders?: Record<string, string> | undefined;
50
+ maxContentLength?: number | undefined;
51
51
  }, {
52
52
  url: string;
53
- extractMainContent?: boolean | undefined;
54
- includeMetadata?: boolean | undefined;
55
- maxContentLength?: number | undefined;
56
53
  customHeaders?: Record<string, string> | undefined;
57
54
  timeout?: number | undefined;
58
55
  retries?: number | undefined;
56
+ extractMainContent?: boolean | undefined;
57
+ includeMetadata?: boolean | undefined;
58
+ maxContentLength?: number | undefined;
59
59
  }>;
60
60
  export declare const fetchUrlOutputSchema: z.ZodObject<{
61
61
  url: z.ZodString;
@@ -75,31 +75,31 @@ export declare const fetchUrlOutputSchema: z.ZodObject<{
75
75
  }, "strict", z.ZodTypeAny, {
76
76
  url: string;
77
77
  format: "jsonl" | "markdown";
78
+ contentBlocks: number;
78
79
  fetchedAt: string;
79
80
  cached: boolean;
80
- contentBlocks: number;
81
81
  error?: string | undefined;
82
- content?: string | undefined;
83
- errorCode?: string | undefined;
84
82
  title?: string | undefined;
83
+ content?: string | undefined;
85
84
  contentSize?: number | undefined;
86
- truncated?: boolean | undefined;
87
85
  resourceUri?: string | undefined;
88
86
  resourceMimeType?: string | undefined;
87
+ truncated?: boolean | undefined;
88
+ errorCode?: string | undefined;
89
89
  }, {
90
90
  url: string;
91
91
  format: "jsonl" | "markdown";
92
+ contentBlocks: number;
92
93
  fetchedAt: string;
93
94
  cached: boolean;
94
- contentBlocks: number;
95
95
  error?: string | undefined;
96
- content?: string | undefined;
97
- errorCode?: string | undefined;
98
96
  title?: string | undefined;
97
+ content?: string | undefined;
99
98
  contentSize?: number | undefined;
100
- truncated?: boolean | undefined;
101
99
  resourceUri?: string | undefined;
102
100
  resourceMimeType?: string | undefined;
101
+ truncated?: boolean | undefined;
102
+ errorCode?: string | undefined;
103
103
  }>;
104
104
  export declare const fetchMarkdownOutputSchema: z.ZodObject<{
105
105
  url: z.ZodString;
@@ -111,12 +111,12 @@ export declare const fetchMarkdownOutputSchema: z.ZodObject<{
111
111
  fileName: z.ZodString;
112
112
  expiresAt: z.ZodString;
113
113
  }, "strip", z.ZodTypeAny, {
114
- fileName: string;
115
114
  downloadUrl: string;
115
+ fileName: string;
116
116
  expiresAt: string;
117
117
  }, {
118
- fileName: string;
119
118
  downloadUrl: string;
119
+ fileName: string;
120
120
  expiresAt: string;
121
121
  }>>;
122
122
  } & {
@@ -133,33 +133,32 @@ export declare const fetchMarkdownOutputSchema: z.ZodObject<{
133
133
  cached: boolean;
134
134
  error?: string | undefined;
135
135
  markdown?: string | undefined;
136
- errorCode?: string | undefined;
137
136
  title?: string | undefined;
138
137
  contentSize?: number | undefined;
138
+ resourceUri?: string | undefined;
139
+ resourceMimeType?: string | undefined;
139
140
  truncated?: boolean | undefined;
141
+ errorCode?: string | undefined;
140
142
  file?: {
141
- fileName: string;
142
143
  downloadUrl: string;
144
+ fileName: string;
143
145
  expiresAt: string;
144
146
  } | undefined;
145
- resourceUri?: string | undefined;
146
- resourceMimeType?: string | undefined;
147
147
  }, {
148
148
  url: string;
149
149
  fetchedAt: string;
150
150
  cached: boolean;
151
151
  error?: string | undefined;
152
152
  markdown?: string | undefined;
153
- errorCode?: string | undefined;
154
153
  title?: string | undefined;
155
154
  contentSize?: number | undefined;
155
+ resourceUri?: string | undefined;
156
+ resourceMimeType?: string | undefined;
156
157
  truncated?: boolean | undefined;
158
+ errorCode?: string | undefined;
157
159
  file?: {
158
- fileName: string;
159
160
  downloadUrl: string;
161
+ fileName: string;
160
162
  expiresAt: string;
161
163
  } | undefined;
162
- resourceUri?: string | undefined;
163
- resourceMimeType?: string | undefined;
164
164
  }>;
165
- //# sourceMappingURL=schemas.d.ts.map
@@ -1,8 +1,16 @@
1
1
  import { z } from 'zod';
2
2
  import { config } from '../config/index.js';
3
+ const MAX_HEADER_NAME_LENGTH = 128;
4
+ const MAX_HEADER_VALUE_LENGTH = 2048;
5
+ const MAX_HEADER_COUNT = 50;
6
+ const MAX_CONTENT_LENGTH = config.constants.maxContentSize;
7
+ const customHeadersSchema = z
8
+ .record(z.string().max(MAX_HEADER_NAME_LENGTH), z.string().max(MAX_HEADER_VALUE_LENGTH))
9
+ .refine((headers) => Object.keys(headers).length <= MAX_HEADER_COUNT, {
10
+ message: `customHeaders must have at most ${MAX_HEADER_COUNT} entries`,
11
+ });
3
12
  const requestOptionsSchema = z.object({
4
- customHeaders: z
5
- .record(z.string())
13
+ customHeaders: customHeadersSchema
6
14
  .optional()
7
15
  .describe('Custom HTTP headers for the request'),
8
16
  timeout: z
@@ -30,6 +38,7 @@ const extractionOptionsSchema = z.object({
30
38
  maxContentLength: z
31
39
  .number()
32
40
  .positive()
41
+ .max(MAX_CONTENT_LENGTH)
33
42
  .optional()
34
43
  .describe('Maximum content length in characters'),
35
44
  });
@@ -64,14 +73,22 @@ const fileDownloadSchema = z.object({
64
73
  });
65
74
  export const fetchUrlInputSchema = requestOptionsSchema
66
75
  .extend({
67
- url: z.string().min(1).describe('The URL to fetch'),
76
+ url: z
77
+ .string()
78
+ .min(1)
79
+ .max(config.constants.maxUrlLength)
80
+ .describe('The URL to fetch'),
68
81
  })
69
82
  .merge(extractionOptionsSchema)
70
83
  .merge(formatOptionsSchema)
71
84
  .strict();
72
85
  export const fetchMarkdownInputSchema = requestOptionsSchema
73
86
  .extend({
74
- url: z.string().min(1).describe('The URL to fetch'),
87
+ url: z
88
+ .string()
89
+ .min(1)
90
+ .max(config.constants.maxUrlLength)
91
+ .describe('The URL to fetch'),
75
92
  })
76
93
  .merge(extractionOptionsSchema)
77
94
  .strict();
@@ -110,4 +127,3 @@ export const fetchMarkdownOutputSchema = z
110
127
  })
111
128
  .merge(resourceFieldsSchema)
112
129
  .strict();
113
- //# sourceMappingURL=schemas.js.map
@@ -1,2 +1 @@
1
1
  export declare function appendHeaderVary(cacheVary: Record<string, unknown> | string | undefined, customHeaders?: Record<string, string>): Record<string, unknown> | string | undefined;
2
- //# sourceMappingURL=cache-vary.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"cache-vary.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/cache-vary.ts"],"names":[],"mappings":"AAYA,wBAAgB,gBAAgB,CAC9B,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,GAAG,SAAS,EACvD,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GACrC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,GAAG,SAAS,CAG9C"}
1
+ {"version":3,"file":"cache-vary.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/cache-vary.ts"],"names":[],"mappings":"AAIA,wBAAgB,gBAAgB,CAC9B,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,GAAG,SAAS,EACvD,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GACrC,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,MAAM,GAAG,SAAS,CAY9C"}
@@ -1,30 +1,12 @@
1
1
  import { config } from '../../config/index.js';
2
2
  import { normalizeHeaderRecord } from '../../utils/header-normalizer.js';
3
- function normalizeHeadersForCache(headers) {
4
- return normalizeHeaderRecord(headers, config.security.blockedHeaders, {
5
- trimValues: true,
6
- });
7
- }
8
3
  export function appendHeaderVary(cacheVary, customHeaders) {
9
- const headerVary = normalizeHeadersForCache(customHeaders);
10
- return mergeCacheVary(cacheVary, headerVary);
11
- }
12
- function mergeCacheVary(cacheVary, headerVary) {
13
- if (!cacheVary && !headerVary)
14
- return undefined;
15
- if (typeof cacheVary === 'string') {
16
- return buildStringVary(cacheVary, headerVary);
17
- }
18
- return mergeObjectVary(cacheVary, headerVary);
19
- }
20
- function mergeObjectVary(cacheVary, headerVary) {
21
- if (!headerVary)
4
+ const headers = normalizeHeaderRecord(customHeaders, config.security.blockedHeaders, { trimValues: true });
5
+ if (!headers)
22
6
  return cacheVary;
23
- return { ...(cacheVary ?? {}), headers: headerVary };
24
- }
25
- function buildStringVary(key, headerVary) {
26
- if (!headerVary)
27
- return { key };
28
- return { key, headers: headerVary };
7
+ if (!cacheVary)
8
+ return { headers };
9
+ return typeof cacheVary === 'string'
10
+ ? { key: cacheVary, headers }
11
+ : { ...cacheVary, headers };
29
12
  }
30
- //# sourceMappingURL=cache-vary.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"cache-vary.js","sourceRoot":"","sources":["../../../src/tools/utils/cache-vary.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAE/C,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,SAAS,wBAAwB,CAC/B,OAAgC;IAEhC,OAAO,qBAAqB,CAAC,OAAO,EAAE,MAAM,CAAC,QAAQ,CAAC,cAAc,EAAE;QACpE,UAAU,EAAE,IAAI;KACjB,CAAC,CAAC;AACL,CAAC;AAED,MAAM,UAAU,gBAAgB,CAC9B,SAAuD,EACvD,aAAsC;IAEtC,MAAM,UAAU,GAAG,wBAAwB,CAAC,aAAa,CAAC,CAAC;IAC3D,OAAO,cAAc,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,cAAc,CACrB,SAAuD,EACvD,UAA8C;IAE9C,IAAI,CAAC,SAAS,IAAI,CAAC,UAAU;QAAE,OAAO,SAAS,CAAC;IAChD,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QAClC,OAAO,eAAe,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;IAChD,CAAC;IACD,OAAO,eAAe,CAAC,SAAS,EAAE,UAAU,CAAC,CAAC;AAChD,CAAC;AAED,SAAS,eAAe,CACtB,SAA8C,EAC9C,UAA8C;IAE9C,IAAI,CAAC,UAAU;QAAE,OAAO,SAAS,CAAC;IAClC,OAAO,EAAE,GAAG,CAAC,SAAS,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;AACvD,CAAC;AAED,SAAS,eAAe,CACtB,GAAW,EACX,UAA8C;IAE9C,IAAI,CAAC,UAAU;QAAE,OAAO,EAAE,GAAG,EAAE,CAAC;IAChC,OAAO,EAAE,GAAG,EAAE,OAAO,EAAE,UAAU,EAAE,CAAC;AACtC,CAAC"}
1
+ {"version":3,"file":"cache-vary.js","sourceRoot":"","sources":["../../../src/tools/utils/cache-vary.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAE/C,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,UAAU,gBAAgB,CAC9B,SAAuD,EACvD,aAAsC;IAEtC,MAAM,OAAO,GAAG,qBAAqB,CACnC,aAAa,EACb,MAAM,CAAC,QAAQ,CAAC,cAAc,EAC9B,EAAE,UAAU,EAAE,IAAI,EAAE,CACrB,CAAC;IAEF,IAAI,CAAC,OAAO;QAAE,OAAO,SAAS,CAAC;IAC/B,IAAI,CAAC,SAAS;QAAE,OAAO,EAAE,OAAO,EAAE,CAAC;IACnC,OAAO,OAAO,SAAS,KAAK,QAAQ;QAClC,CAAC,CAAC,EAAE,GAAG,EAAE,SAAS,EAAE,OAAO,EAAE;QAC7B,CAAC,CAAC,EAAE,GAAG,SAAS,EAAE,OAAO,EAAE,CAAC;AAChC,CAAC"}
@@ -1,6 +1,5 @@
1
- import type { ExtractedArticle, ExtractedMetadata, MetadataBlock, TruncationResult } from '../../config/types.js';
1
+ import type { ExtractedArticle, ExtractedMetadata, MetadataBlock } from '../../config/types/content.js';
2
+ import type { TruncationResult } from '../../config/types/runtime.js';
2
3
  export declare function determineContentExtractionSource(extractMainContent: boolean, article: ExtractedArticle | null): article is ExtractedArticle;
3
4
  export declare function createContentMetadataBlock(url: string, article: ExtractedArticle | null, extractedMeta: ExtractedMetadata, shouldExtractFromArticle: boolean, includeMetadata: boolean): MetadataBlock | undefined;
4
- export declare function enforceContentLengthLimit(content: string, maxLength?: number): TruncationResult;
5
5
  export declare function truncateContent(content: string, maxLength?: number, suffix?: string): TruncationResult;
6
- //# sourceMappingURL=common.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"common.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,aAAa,EACb,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,wBAAgB,gCAAgC,CAC9C,kBAAkB,EAAE,OAAO,EAC3B,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,eAAe,EAAE,OAAO,GACvB,aAAa,GAAG,SAAS,CAmB3B;AAED,wBAAgB,yBAAyB,CACvC,OAAO,EAAE,MAAM,EACf,SAAS,CAAC,EAAE,MAAM,GACjB,gBAAgB,CAElB;AAED,wBAAgB,eAAe,CAC7B,OAAO,EAAE,MAAM,EACf,SAAS,CAAC,EAAE,MAAM,EAClB,MAAM,SAAoB,GACzB,gBAAgB,CAYlB"}
1
+ {"version":3,"file":"common.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,gBAAgB,EAChB,iBAAiB,EACjB,aAAa,EACd,MAAM,+BAA+B,CAAC;AACvC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,+BAA+B,CAAC;AAEtE,wBAAgB,gCAAgC,CAC9C,kBAAkB,EAAE,OAAO,EAC3B,OAAO,EAAE,gBAAgB,GAAG,IAAI,GAC/B,OAAO,IAAI,gBAAgB,CAE7B;AAED,wBAAgB,0BAA0B,CACxC,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,gBAAgB,GAAG,IAAI,EAChC,aAAa,EAAE,iBAAiB,EAChC,wBAAwB,EAAE,OAAO,EACjC,eAAe,EAAE,OAAO,GACvB,aAAa,GAAG,SAAS,CAmB3B;AAED,wBAAgB,eAAe,CAC7B,OAAO,EAAE,MAAM,EACf,SAAS,CAAC,EAAE,MAAM,EAClB,MAAM,SAAoB,GACzB,gBAAgB,CAYlB"}
@@ -23,17 +23,16 @@ export function createContentMetadataBlock(url, article, extractedMeta, shouldEx
23
23
  fetchedAt: now,
24
24
  };
25
25
  }
26
- export function enforceContentLengthLimit(content, maxLength) {
27
- return truncateContent(content, maxLength);
28
- }
29
26
  export function truncateContent(content, maxLength, suffix = TRUNCATION_MARKER) {
30
- const shouldTruncate = maxLength !== undefined && maxLength > 0 && content.length > maxLength;
31
- if (!shouldTruncate) {
27
+ if (maxLength === undefined ||
28
+ maxLength <= 0 ||
29
+ content.length <= maxLength) {
32
30
  return { content, truncated: false };
33
31
  }
32
+ const safeMax = Math.max(0, maxLength - suffix.length);
33
+ const marker = suffix.length > maxLength ? suffix.substring(0, maxLength) : suffix;
34
34
  return {
35
- content: `${content.substring(0, maxLength)}${suffix}`,
35
+ content: `${content.substring(0, safeMax)}${marker}`,
36
36
  truncated: true,
37
37
  };
38
38
  }
39
- //# sourceMappingURL=common.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"common.js","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAQ/D,MAAM,UAAU,gCAAgC,CAC9C,kBAA2B,EAC3B,OAAgC;IAEhC,OAAO,kBAAkB,IAAI,CAAC,CAAC,OAAO,CAAC;AACzC,CAAC;AAED,MAAM,UAAU,0BAA0B,CACxC,GAAW,EACX,OAAgC,EAChC,aAAgC,EAChC,wBAAiC,EACjC,eAAwB;IAExB,IAAI,CAAC,eAAe;QAAE,OAAO,SAAS,CAAC;IACvC,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,OAAO,wBAAwB,IAAI,OAAO;QACxC,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,GAAG;YACH,SAAS,EAAE,GAAG;SACf;QACH,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,aAAa,CAAC,KAAK;YAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;YACtC,MAAM,EAAE,aAAa,CAAC,MAAM;YAC5B,GAAG;YACH,SAAS,EAAE,GAAG;SACf,CAAC;AACR,CAAC;AAED,MAAM,UAAU,yBAAyB,CACvC,OAAe,EACf,SAAkB;IAElB,OAAO,eAAe,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;AAC7C,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,OAAe,EACf,SAAkB,EAClB,MAAM,GAAG,iBAAiB;IAE1B,MAAM,cAAc,GAClB,SAAS,KAAK,SAAS,IAAI,SAAS,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAEzE,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;IACvC,CAAC;IAED,OAAO;QACL,OAAO,EAAE,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,MAAM,EAAE;QACtD,SAAS,EAAE,IAAI;KAChB,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"common.js","sourceRoot":"","sources":["../../../src/tools/utils/common.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAQ/D,MAAM,UAAU,gCAAgC,CAC9C,kBAA2B,EAC3B,OAAgC;IAEhC,OAAO,kBAAkB,IAAI,CAAC,CAAC,OAAO,CAAC;AACzC,CAAC;AAED,MAAM,UAAU,0BAA0B,CACxC,GAAW,EACX,OAAgC,EAChC,aAAgC,EAChC,wBAAiC,EACjC,eAAwB;IAExB,IAAI,CAAC,eAAe;QAAE,OAAO,SAAS,CAAC;IACvC,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IACrC,OAAO,wBAAwB,IAAI,OAAO;QACxC,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,GAAG;YACH,SAAS,EAAE,GAAG;SACf;QACH,CAAC,CAAC;YACE,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,aAAa,CAAC,KAAK;YAC1B,WAAW,EAAE,aAAa,CAAC,WAAW;YACtC,MAAM,EAAE,aAAa,CAAC,MAAM;YAC5B,GAAG;YACH,SAAS,EAAE,GAAG;SACf,CAAC;AACR,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,OAAe,EACf,SAAkB,EAClB,MAAM,GAAG,iBAAiB;IAE1B,MAAM,cAAc,GAClB,SAAS,KAAK,SAAS,IAAI,SAAS,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,SAAS,CAAC;IAEzE,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,KAAK,EAAE,CAAC;IACvC,CAAC;IAED,OAAO;QACL,OAAO,EAAE,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,CAAC,GAAG,MAAM,EAAE;QACtD,SAAS,EAAE,IAAI;KAChB,CAAC;AACJ,CAAC"}
@@ -1,4 +1,4 @@
1
- import type { JsonlTransformResult, MarkdownTransformResult } from '../../config/types.js';
1
+ import type { JsonlTransformResult, MarkdownTransformResult } from '../../config/types/content.js';
2
2
  interface ExtractionOptions {
3
3
  readonly extractMainContent: boolean;
4
4
  readonly includeMetadata: boolean;
@@ -7,10 +7,8 @@ interface ContentLengthOptions {
7
7
  readonly maxContentLength?: number;
8
8
  }
9
9
  interface MarkdownOptions extends ExtractionOptions, ContentLengthOptions {
10
- readonly generateToc?: boolean;
11
10
  }
12
11
  export declare function transformHtmlToJsonl(html: string, url: string, options: ExtractionOptions & ContentLengthOptions): JsonlTransformResult;
13
12
  export declare function transformHtmlToMarkdown(html: string, url: string, options: MarkdownOptions): MarkdownTransformResult;
14
13
  export declare function transformHtmlToMarkdownWithBlocks(html: string, url: string, options: ExtractionOptions & ContentLengthOptions): JsonlTransformResult;
15
14
  export {};
16
- //# sourceMappingURL=content-transform.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"content-transform.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/content-transform.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,oBAAoB,EACpB,uBAAuB,EACxB,MAAM,uBAAuB,CAAC;AAc/B,UAAU,iBAAiB;IACzB,QAAQ,CAAC,kBAAkB,EAAE,OAAO,CAAC;IACrC,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;CACnC;AAQD,UAAU,oBAAoB;IAC5B,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;CACpC;AAED,UAAU,eAAgB,SAAQ,iBAAiB,EAAE,oBAAoB;IACvE,QAAQ,CAAC,WAAW,CAAC,EAAE,OAAO,CAAC;CAChC;AA0ED,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,iBAAiB,GAAG,oBAAoB,GAChD,oBAAoB,CAatB;AAED,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,eAAe,GACvB,uBAAuB,CAYzB;AAED,wBAAgB,iCAAiC,CAC/C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,iBAAiB,GAAG,oBAAoB,GAChD,oBAAoB,CActB"}
1
+ {"version":3,"file":"content-transform.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/content-transform.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EACV,oBAAoB,EACpB,uBAAuB,EACxB,MAAM,+BAA+B,CAAC;AAcvC,UAAU,iBAAiB;IACzB,QAAQ,CAAC,kBAAkB,EAAE,OAAO,CAAC;IACrC,QAAQ,CAAC,eAAe,EAAE,OAAO,CAAC;CACnC;AAQD,UAAU,oBAAoB;IAC5B,QAAQ,CAAC,gBAAgB,CAAC,EAAE,MAAM,CAAC;CACpC;AAED,UAAU,eAAgB,SAAQ,iBAAiB,EAAE,oBAAoB;CAAG;AA4D5E,wBAAgB,oBAAoB,CAClC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,iBAAiB,GAAG,oBAAoB,GAChD,oBAAoB,CAatB;AAED,wBAAgB,uBAAuB,CACrC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,eAAe,GACvB,uBAAuB,CAYzB;AAED,wBAAgB,iCAAiC,CAC/C,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,iBAAiB,GAAG,oBAAoB,GAChD,oBAAoB,CActB"}
@@ -1,10 +1,19 @@
1
1
  import { TRUNCATION_MARKER } from '../../config/formatting.js';
2
2
  import { extractContent } from '../../services/extractor.js';
3
- import { parseHtml } from '../../services/parser.js';
3
+ import { parseHtml, parseHtmlWithMetadata } from '../../services/parser.js';
4
+ import { sanitizeText } from '../../utils/sanitizer.js';
4
5
  import { toJsonl } from '../../transformers/jsonl.transformer.js';
5
6
  import { htmlToMarkdown } from '../../transformers/markdown.transformer.js';
6
7
  import { createContentMetadataBlock, determineContentExtractionSource, truncateContent, } from './common.js';
8
+ const TITLE_PATTERN = /<title[^>]*>([\s\S]*?)<\/title>/i;
7
9
  function resolveContentSource(html, url, options) {
10
+ if (!options.extractMainContent && !options.includeMetadata) {
11
+ return {
12
+ sourceHtml: html,
13
+ title: extractTitleFromHtml(html),
14
+ metadata: undefined,
15
+ };
16
+ }
8
17
  const { article, metadata: extractedMeta } = extractContent(html, url, {
9
18
  extractArticle: options.extractMainContent,
10
19
  });
@@ -14,12 +23,43 @@ function resolveContentSource(html, url, options) {
14
23
  const title = shouldExtractFromArticle ? article.title : extractedMeta.title;
15
24
  return { sourceHtml, title, metadata };
16
25
  }
17
- function buildTransformContext(html, url, options) {
18
- return resolveContentSource(html, url, options);
26
+ function extractTitleFromHtml(html) {
27
+ const match = TITLE_PATTERN.exec(html);
28
+ if (!match?.[1])
29
+ return undefined;
30
+ const decoded = decodeHtmlEntities(match[1]);
31
+ const text = sanitizeText(decoded);
32
+ return text || undefined;
33
+ }
34
+ function decodeHtmlEntities(value) {
35
+ if (!value.includes('&'))
36
+ return value;
37
+ const basicDecoded = value
38
+ .replace(/&amp;/g, '&')
39
+ .replace(/&lt;/g, '<')
40
+ .replace(/&gt;/g, '>')
41
+ .replace(/&quot;/g, '"')
42
+ .replace(/&#39;/g, "'");
43
+ return basicDecoded
44
+ .replace(/&#(\d+);/g, (match, code) => {
45
+ const parsed = Number.parseInt(code, 10);
46
+ return Number.isFinite(parsed) && parsed >= 0 && parsed <= 0x10ffff
47
+ ? String.fromCodePoint(parsed)
48
+ : match;
49
+ })
50
+ .replace(/&#x([0-9a-fA-F]+);/g, (match, code) => {
51
+ const parsed = Number.parseInt(code, 16);
52
+ return Number.isFinite(parsed) && parsed >= 0 && parsed <= 0x10ffff
53
+ ? String.fromCodePoint(parsed)
54
+ : match;
55
+ });
19
56
  }
20
57
  function buildJsonlPayload(context, maxContentLength) {
21
58
  const contentBlocks = parseHtml(context.sourceHtml);
22
- const { content, truncated } = truncateContent(toJsonl(contentBlocks, context.metadata), maxContentLength);
59
+ return buildJsonlPayloadFromBlocks(contentBlocks, context.metadata, maxContentLength);
60
+ }
61
+ function buildJsonlPayloadFromBlocks(contentBlocks, metadata, maxContentLength) {
62
+ const { content, truncated } = truncateContent(toJsonl(contentBlocks, metadata), maxContentLength);
23
63
  return {
24
64
  content,
25
65
  contentBlocks: contentBlocks.length,
@@ -32,7 +72,18 @@ function buildMarkdownPayload(context, maxContentLength) {
32
72
  return { content, truncated };
33
73
  }
34
74
  export function transformHtmlToJsonl(html, url, options) {
35
- const context = buildTransformContext(html, url, options);
75
+ if (!options.extractMainContent && options.includeMetadata) {
76
+ const parsed = parseHtmlWithMetadata(html);
77
+ const metadataBlock = createContentMetadataBlock(url, null, parsed.metadata, false, true);
78
+ const { content, contentBlocks, truncated } = buildJsonlPayloadFromBlocks(parsed.blocks, metadataBlock, options.maxContentLength);
79
+ return {
80
+ content,
81
+ contentBlocks,
82
+ title: parsed.metadata.title,
83
+ ...(truncated && { truncated }),
84
+ };
85
+ }
86
+ const context = resolveContentSource(html, url, options);
36
87
  const { content, contentBlocks, truncated } = buildJsonlPayload(context, options.maxContentLength);
37
88
  return {
38
89
  content,
@@ -42,7 +93,7 @@ export function transformHtmlToJsonl(html, url, options) {
42
93
  };
43
94
  }
44
95
  export function transformHtmlToMarkdown(html, url, options) {
45
- const context = buildTransformContext(html, url, options);
96
+ const context = resolveContentSource(html, url, options);
46
97
  const { content, truncated } = buildMarkdownPayload(context, options.maxContentLength);
47
98
  return {
48
99
  markdown: content,
@@ -51,7 +102,22 @@ export function transformHtmlToMarkdown(html, url, options) {
51
102
  };
52
103
  }
53
104
  export function transformHtmlToMarkdownWithBlocks(html, url, options) {
54
- const context = buildTransformContext(html, url, options);
105
+ if (!options.extractMainContent && options.includeMetadata) {
106
+ const parsed = parseHtmlWithMetadata(html);
107
+ const context = {
108
+ sourceHtml: html,
109
+ title: parsed.metadata.title,
110
+ metadata: createContentMetadataBlock(url, null, parsed.metadata, false, true),
111
+ };
112
+ const { content, truncated } = buildMarkdownPayload(context, options.maxContentLength);
113
+ return {
114
+ content,
115
+ contentBlocks: parsed.blocks.length,
116
+ title: context.title,
117
+ ...(truncated && { truncated }),
118
+ };
119
+ }
120
+ const context = resolveContentSource(html, url, options);
55
121
  const contentBlocks = parseHtml(context.sourceHtml);
56
122
  const { content, truncated } = buildMarkdownPayload(context, options.maxContentLength);
57
123
  return {
@@ -61,4 +127,3 @@ export function transformHtmlToMarkdownWithBlocks(html, url, options) {
61
127
  ...(truncated && { truncated }),
62
128
  };
63
129
  }
64
- //# sourceMappingURL=content-transform.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"content-transform.js","sourceRoot":"","sources":["../../../src/tools/utils/content-transform.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAM/D,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,4CAA4C,CAAC;AAE5E,OAAO,EACL,0BAA0B,EAC1B,gCAAgC,EAChC,eAAe,GAChB,MAAM,aAAa,CAAC;AA2BrB,SAAS,oBAAoB,CAC3B,IAAY,EACZ,GAAW,EACX,OAA0B;IAE1B,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE;QACrE,cAAc,EAAE,OAAO,CAAC,kBAAkB;KAC3C,CAAC,CAAC;IAEH,MAAM,wBAAwB,GAAG,gCAAgC,CAC/D,OAAO,CAAC,kBAAkB,EAC1B,OAAO,CACR,CAAC;IAEF,MAAM,UAAU,GAAG,wBAAwB,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IACrE,MAAM,QAAQ,GAAG,0BAA0B,CACzC,GAAG,EACH,OAAO,EACP,aAAa,EACb,wBAAwB,EACxB,OAAO,CAAC,eAAe,CACxB,CAAC;IACF,MAAM,KAAK,GAAG,wBAAwB,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;IAE7E,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACzC,CAAC;AAED,SAAS,qBAAqB,CAC5B,IAAY,EACZ,GAAW,EACX,OAA0B;IAE1B,OAAO,oBAAoB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;AAClD,CAAC;AAED,SAAS,iBAAiB,CACxB,OAAyB,EACzB,gBAAyB;IAEzB,MAAM,aAAa,GAAG,SAAS,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IACpD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,eAAe,CAC5C,OAAO,CAAC,aAAa,EAAE,OAAO,CAAC,QAAQ,CAAC,EACxC,gBAAgB,CACjB,CAAC;IAEF,OAAO;QACL,OAAO;QACP,aAAa,EAAE,aAAa,CAAC,MAAM;QACnC,SAAS;KACV,CAAC;AACJ,CAAC;AAED,SAAS,oBAAoB,CAC3B,OAAyB,EACzB,gBAAyB;IAEzB,MAAM,QAAQ,GAAG,cAAc,CAAC,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACtE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,eAAe,CAC5C,QAAQ,EACR,gBAAgB,EAChB,iBAAiB,CAClB,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;AAChC,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,IAAY,EACZ,GAAW,EACX,OAAiD;IAEjD,MAAM,OAAO,GAAG,qBAAqB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IAC1D,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,GAAG,iBAAiB,CAC7D,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;IAEF,OAAO;QACL,OAAO;QACP,aAAa;QACb,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,GAAG,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,CAAC;KAChC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,uBAAuB,CACrC,IAAY,EACZ,GAAW,EACX,OAAwB;IAExB,MAAM,OAAO,GAAG,qBAAqB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IAC1D,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,oBAAoB,CACjD,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;IAEF,OAAO;QACL,QAAQ,EAAE,OAAO;QACjB,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,SAAS;KACV,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,iCAAiC,CAC/C,IAAY,EACZ,GAAW,EACX,OAAiD;IAEjD,MAAM,OAAO,GAAG,qBAAqB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IAC1D,MAAM,aAAa,GAAG,SAAS,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IACpD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,oBAAoB,CACjD,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;IAEF,OAAO;QACL,OAAO;QACP,aAAa,EAAE,aAAa,CAAC,MAAM;QACnC,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,GAAG,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,CAAC;KAChC,CAAC;AACJ,CAAC"}
1
+ {"version":3,"file":"content-transform.js","sourceRoot":"","sources":["../../../src/tools/utils/content-transform.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAC;AAM/D,OAAO,EAAE,cAAc,EAAE,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAE,SAAS,EAAE,MAAM,0BAA0B,CAAC;AAErD,OAAO,EAAE,OAAO,EAAE,MAAM,yCAAyC,CAAC;AAClE,OAAO,EAAE,cAAc,EAAE,MAAM,4CAA4C,CAAC;AAE5E,OAAO,EACL,0BAA0B,EAC1B,gCAAgC,EAChC,eAAe,GAChB,MAAM,aAAa,CAAC;AAmBrB,SAAS,oBAAoB,CAC3B,IAAY,EACZ,GAAW,EACX,OAA0B;IAE1B,MAAM,EAAE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,GAAG,cAAc,CAAC,IAAI,EAAE,GAAG,EAAE;QACrE,cAAc,EAAE,OAAO,CAAC,kBAAkB;KAC3C,CAAC,CAAC;IAEH,MAAM,wBAAwB,GAAG,gCAAgC,CAC/D,OAAO,CAAC,kBAAkB,EAC1B,OAAO,CACR,CAAC;IAEF,MAAM,UAAU,GAAG,wBAAwB,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC;IACrE,MAAM,QAAQ,GAAG,0BAA0B,CACzC,GAAG,EACH,OAAO,EACP,aAAa,EACb,wBAAwB,EACxB,OAAO,CAAC,eAAe,CACxB,CAAC;IACF,MAAM,KAAK,GAAG,wBAAwB,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,aAAa,CAAC,KAAK,CAAC;IAE7E,OAAO,EAAE,UAAU,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC;AACzC,CAAC;AAED,SAAS,iBAAiB,CACxB,OAAsB,EACtB,gBAAyB;IAEzB,MAAM,aAAa,GAAG,SAAS,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IACpD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,eAAe,CAC5C,OAAO,CAAC,aAAa,EAAE,OAAO,CAAC,QAAQ,CAAC,EACxC,gBAAgB,CACjB,CAAC;IAEF,OAAO;QACL,OAAO;QACP,aAAa,EAAE,aAAa,CAAC,MAAM;QACnC,SAAS;KACV,CAAC;AACJ,CAAC;AAED,SAAS,oBAAoB,CAC3B,OAAsB,EACtB,gBAAyB;IAEzB,MAAM,QAAQ,GAAG,cAAc,CAAC,OAAO,CAAC,UAAU,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACtE,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,eAAe,CAC5C,QAAQ,EACR,gBAAgB,EAChB,iBAAiB,CAClB,CAAC;IAEF,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,CAAC;AAChC,CAAC;AAED,MAAM,UAAU,oBAAoB,CAClC,IAAY,EACZ,GAAW,EACX,OAAiD;IAEjD,MAAM,OAAO,GAAG,oBAAoB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IACzD,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,SAAS,EAAE,GAAG,iBAAiB,CAC7D,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;IAEF,OAAO;QACL,OAAO;QACP,aAAa;QACb,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,GAAG,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,CAAC;KAChC,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,uBAAuB,CACrC,IAAY,EACZ,GAAW,EACX,OAAwB;IAExB,MAAM,OAAO,GAAG,oBAAoB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IACzD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,oBAAoB,CACjD,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;IAEF,OAAO;QACL,QAAQ,EAAE,OAAO;QACjB,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,SAAS;KACV,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,iCAAiC,CAC/C,IAAY,EACZ,GAAW,EACX,OAAiD;IAEjD,MAAM,OAAO,GAAG,oBAAoB,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,CAAC,CAAC;IACzD,MAAM,aAAa,GAAG,SAAS,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC;IACpD,MAAM,EAAE,OAAO,EAAE,SAAS,EAAE,GAAG,oBAAoB,CACjD,OAAO,EACP,OAAO,CAAC,gBAAgB,CACzB,CAAC;IAEF,OAAO;QACL,OAAO;QACP,aAAa,EAAE,aAAa,CAAC,MAAM;QACnC,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,GAAG,CAAC,SAAS,IAAI,EAAE,SAAS,EAAE,CAAC;KAChC,CAAC;AACJ,CAAC"}
@@ -1,4 +1,4 @@
1
- import type { FetchPipelineOptions, PipelineResult } from '../../config/types.js';
1
+ import type { FetchPipelineOptions, PipelineResult } from '../../config/types/runtime.js';
2
2
  /**
3
3
  * Unified fetch pipeline that handles caching, fetching, and transformation.
4
4
  * Implements cache-first strategy with automatic serialization.
@@ -8,4 +8,3 @@ import type { FetchPipelineOptions, PipelineResult } from '../../config/types.js
8
8
  * @returns Promise resolving to the pipeline result
9
9
  */
10
10
  export declare function executeFetchPipeline<T>(options: FetchPipelineOptions<T>): Promise<PipelineResult<T>>;
11
- //# sourceMappingURL=fetch-pipeline.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"fetch-pipeline.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAEV,oBAAoB,EACpB,cAAc,EACf,MAAM,uBAAuB,CAAC;AAuD/B;;;;;;;GAOG;AACH,wBAAsB,oBAAoB,CAAC,CAAC,EAC1C,OAAO,EAAE,oBAAoB,CAAC,CAAC,CAAC,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAwB5B"}
1
+ {"version":3,"file":"fetch-pipeline.d.ts","sourceRoot":"","sources":["../../../src/tools/utils/fetch-pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAEV,oBAAoB,EACpB,cAAc,EACf,MAAM,+BAA+B,CAAC;AAuDvC;;;;;;;GAOG;AACH,wBAAsB,oBAAoB,CAAC,CAAC,EAC1C,OAAO,EAAE,oBAAoB,CAAC,CAAC,CAAC,GAC/B,OAAO,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,CAwB5B"}
@@ -1,29 +1,22 @@
1
1
  import * as cache from '../../services/cache.js';
2
- import { fetchUrlWithRetry } from '../../services/fetcher.js';
3
- import { logDebug, logWarn } from '../../services/logger.js';
4
- import { validateAndNormalizeUrl } from '../../utils/url-validator.js';
2
+ import { fetchNormalizedUrlWithRetry } from '../../services/fetcher.js';
3
+ import { logDebug } from '../../services/logger.js';
4
+ import { assertResolvedAddressesAllowed, normalizeUrl, } from '../../utils/url-validator.js';
5
5
  import { appendHeaderVary } from './cache-vary.js';
6
- function safeJsonParse(cached, cacheKey) {
7
- try {
8
- return JSON.parse(cached);
9
- }
10
- catch {
11
- logWarn('Cache deserialize failed, treating as miss', {
12
- key: cacheKey.substring(0, 100),
13
- });
14
- return undefined;
15
- }
16
- }
17
6
  function attemptCacheRetrieval(cacheKey, deserialize, cacheNamespace, normalizedUrl) {
18
7
  if (!cacheKey)
19
8
  return null;
20
9
  const cached = cache.get(cacheKey);
21
10
  if (!cached)
22
11
  return null;
23
- logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
24
- const data = deserialize
25
- ? deserialize(cached.content)
26
- : safeJsonParse(cached.content, cacheKey);
12
+ if (!deserialize) {
13
+ logDebug('Cache miss due to missing deserializer', {
14
+ namespace: cacheNamespace,
15
+ url: normalizedUrl,
16
+ });
17
+ return null;
18
+ }
19
+ const data = deserialize(cached.content);
27
20
  if (data === undefined) {
28
21
  logDebug('Cache miss due to deserialize failure', {
29
22
  namespace: cacheNamespace,
@@ -31,6 +24,7 @@ function attemptCacheRetrieval(cacheKey, deserialize, cacheNamespace, normalized
31
24
  });
32
25
  return null;
33
26
  }
27
+ logDebug('Cache hit', { namespace: cacheNamespace, url: normalizedUrl });
34
28
  return {
35
29
  data,
36
30
  fromCache: true,
@@ -48,16 +42,19 @@ function attemptCacheRetrieval(cacheKey, deserialize, cacheNamespace, normalized
48
42
  * @returns Promise resolving to the pipeline result
49
43
  */
50
44
  export async function executeFetchPipeline(options) {
51
- const normalizedUrl = validateAndNormalizeUrl(options.url);
45
+ const { normalizedUrl, hostname } = normalizeUrl(options.url);
52
46
  const cacheKey = resolveCacheKey(options, normalizedUrl);
53
47
  const cachedResult = attemptCacheRetrieval(cacheKey, options.deserialize, options.cacheNamespace, normalizedUrl);
54
48
  if (cachedResult)
55
49
  return cachedResult;
50
+ await assertResolvedAddressesAllowed(hostname);
56
51
  const fetchOptions = buildFetchOptions(options);
57
52
  logDebug('Fetching URL', { url: normalizedUrl, retries: options.retries });
58
- const html = await fetchUrlWithRetry(normalizedUrl, fetchOptions, options.retries);
53
+ const html = await fetchNormalizedUrlWithRetry(normalizedUrl, fetchOptions, options.retries);
59
54
  const data = options.transform(html, normalizedUrl);
60
- persistCache(cacheKey, data, options.serialize, normalizedUrl);
55
+ if (cache.isEnabled()) {
56
+ persistCache(cacheKey, data, options.serialize, normalizedUrl);
57
+ }
61
58
  return buildPipelineResult(normalizedUrl, data, cacheKey);
62
59
  }
63
60
  function resolveCacheKey(options, normalizedUrl) {
@@ -97,4 +94,3 @@ function buildPipelineResult(url, data, cacheKey) {
97
94
  cacheKey,
98
95
  };
99
96
  }
100
- //# sourceMappingURL=fetch-pipeline.js.map
@@ -9,4 +9,3 @@ interface InlineContentResult {
9
9
  }
10
10
  export declare function applyInlineContentLimit(content: string, cacheKey: string | null, format: InlineContentFormat): InlineContentResult;
11
11
  export {};
12
- //# sourceMappingURL=inline-content.d.ts.map
@@ -36,4 +36,3 @@ function buildTruncatedFallback(content, contentSize, inlineLimit) {
36
36
  truncated: true,
37
37
  };
38
38
  }
39
- //# sourceMappingURL=inline-content.js.map
@@ -1,3 +1,2 @@
1
- import type { ContentBlockUnion, MetadataBlock } from '../config/types.js';
1
+ import type { ContentBlockUnion, MetadataBlock } from '../config/types/content.js';
2
2
  export declare function toJsonl(blocks: readonly ContentBlockUnion[], metadata?: MetadataBlock): string;
3
- //# sourceMappingURL=jsonl.transformer.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"jsonl.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AAkE3E,wBAAgB,OAAO,CACrB,MAAM,EAAE,SAAS,iBAAiB,EAAE,EACpC,QAAQ,CAAC,EAAE,aAAa,GACvB,MAAM,CAGR"}
1
+ {"version":3,"file":"jsonl.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EACV,iBAAiB,EACjB,aAAa,EACd,MAAM,4BAA4B,CAAC;AAkEpC,wBAAgB,OAAO,CACrB,MAAM,EAAE,SAAS,iBAAiB,EAAE,EACpC,QAAQ,CAAC,EAAE,aAAa,GACvB,MAAM,CAGR"}
@@ -73,4 +73,3 @@ function serializeMetadata(metadata) {
73
73
  return null;
74
74
  }
75
75
  }
76
- //# sourceMappingURL=jsonl.transformer.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"jsonl.transformer.js","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yBAAyB,CAAC;AACpD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAG5C,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAErD,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC;IAC/B,WAAW;IACX,SAAS;IACT,MAAM;IACN,YAAY;CACb,CAAC,CAAC;AAEH,SAAS,WAAW,CAClB,KAAwB;IAExB,OAAO,MAAM,IAAI,KAAK,CAAC;AACzB,CAAC;AAED,SAAS,WAAW,CAClB,KAAwB;IAExB,OAAO,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;AAC/B,CAAC;AAED,SAAS,iBAAiB,CACxB,KAAmD,EACnD,SAAiB;IAEjB,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IACtD,OAAO,SAAS,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;AAC1E,CAAC;AAED,SAAS,iBAAiB,CACxB,KAAsD,EACtD,SAAiB;IAEjB,MAAM,cAAc,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAC9C,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAC9B,CAAC;IACF,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CACpC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,IAAI,KAAK,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAC7C,CAAC;IACF,OAAO,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;AAClE,CAAC;AAED,SAAS,aAAa,CAAC,KAAwB;IAC7C,MAAM,SAAS,GAAG,MAAM,CAAC,UAAU,CAAC,cAAc,CAAC;IAEnD,IAAI,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3D,OAAO,iBAAiB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAC7C,CAAC;IAED,IAAI,WAAW,CAAC,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,iBAAiB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAC7C,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,cAAc,CAAC,KAAwB;IAC9C,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,OAAO,CACrB,MAAoC,EACpC,QAAwB;IAExB,MAAM,KAAK,GAAG,iBAAiB,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IAClD,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,iBAAiB,CACxB,MAAoC,EACpC,QAAwB;IAExB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,MAAM,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IAC3C,IAAI,MAAM,EAAE,CAAC;QACX,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACrB,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,UAAU,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACzC,IAAI,UAAU,EAAE,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAwB;IACjD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAC3B,IAAI,CAAC;QACH,MAAM,eAAe,GAAG;YACtB,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,GAAG,EAAE,QAAQ,CAAC,GAAG;SAClB,CAAC;QACF,OAAO,IAAI,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"jsonl.transformer.js","sourceRoot":"","sources":["../../src/transformers/jsonl.transformer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,yBAAyB,CAAC;AACpD,OAAO,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAM5C,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAC;AAErD,MAAM,gBAAgB,GAAG,IAAI,GAAG,CAAC;IAC/B,WAAW;IACX,SAAS;IACT,MAAM;IACN,YAAY;CACb,CAAC,CAAC;AAEH,SAAS,WAAW,CAClB,KAAwB;IAExB,OAAO,MAAM,IAAI,KAAK,CAAC;AACzB,CAAC;AAED,SAAS,WAAW,CAClB,KAAwB;IAExB,OAAO,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;AAC/B,CAAC;AAED,SAAS,iBAAiB,CACxB,KAAmD,EACnD,SAAiB;IAEjB,MAAM,SAAS,GAAG,YAAY,CAAC,KAAK,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IACtD,OAAO,SAAS,KAAK,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;AAC1E,CAAC;AAED,SAAS,iBAAiB,CACxB,KAAsD,EACtD,SAAiB;IAEjB,MAAM,cAAc,GAAG,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAC9C,YAAY,CAAC,IAAI,EAAE,SAAS,CAAC,CAC9B,CAAC;IACF,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,CACpC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,IAAI,KAAK,KAAK,CAAC,KAAK,CAAC,KAAK,CAAC,CAC7C,CAAC;IACF,OAAO,UAAU,CAAC,CAAC,CAAC,EAAE,GAAG,KAAK,EAAE,KAAK,EAAE,cAAc,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC;AAClE,CAAC;AAED,SAAS,aAAa,CAAC,KAAwB;IAC7C,MAAM,SAAS,GAAG,MAAM,CAAC,UAAU,CAAC,cAAc,CAAC;IAEnD,IAAI,gBAAgB,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,WAAW,CAAC,KAAK,CAAC,EAAE,CAAC;QAC3D,OAAO,iBAAiB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAC7C,CAAC;IAED,IAAI,WAAW,CAAC,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,iBAAiB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAC7C,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,cAAc,CAAC,KAAwB;IAC9C,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC,CAAC;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,MAAM,UAAU,OAAO,CACrB,MAAoC,EACpC,QAAwB;IAExB,MAAM,KAAK,GAAG,iBAAiB,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC;IAClD,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;AAC1B,CAAC;AAED,SAAS,iBAAiB,CACxB,MAAoC,EACpC,QAAwB;IAExB,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,MAAM,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IAC3C,IAAI,MAAM,EAAE,CAAC;QACX,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IACrB,CAAC;IAED,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,MAAM,UAAU,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;QACzC,IAAI,UAAU,EAAE,CAAC;YACf,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,iBAAiB,CAAC,QAAwB;IACjD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAC3B,IAAI,CAAC;QACH,MAAM,eAAe,GAAG;YACtB,IAAI,EAAE,QAAQ,CAAC,IAAI;YACnB,KAAK,EAAE,QAAQ,CAAC,KAAK;YACrB,GAAG,EAAE,QAAQ,CAAC,GAAG;SAClB,CAAC;QACF,OAAO,IAAI,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;IACzC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -1,3 +1,2 @@
1
- import type { MetadataBlock } from '../config/types.js';
1
+ import type { MetadataBlock } from '../config/types/content.js';
2
2
  export declare function htmlToMarkdown(html: string, metadata?: MetadataBlock): string;
3
- //# sourceMappingURL=markdown.transformer.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AA+HxD,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,aAAa,GAAG,MAAM,CAa7E"}
1
+ {"version":3,"file":"markdown.transformer.d.ts","sourceRoot":"","sources":["../../src/transformers/markdown.transformer.ts"],"names":[],"mappings":"AAOA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,4BAA4B,CAAC;AA+HhE,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,CAAC,EAAE,aAAa,GAAG,MAAM,CAa7E"}