@j0hanz/superfetch 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +590 -327
- package/dist/config/index.d.ts.map +1 -1
- package/dist/config/index.js +6 -10
- package/dist/config/index.js.map +1 -1
- package/dist/config/types.d.ts +251 -0
- package/dist/config/types.d.ts.map +1 -0
- package/dist/config/types.js +2 -0
- package/dist/config/types.js.map +1 -0
- package/dist/errors/app-error.d.ts +2 -20
- package/dist/errors/app-error.d.ts.map +1 -1
- package/dist/errors/app-error.js +0 -18
- package/dist/errors/app-error.js.map +1 -1
- package/dist/index.js +13 -47
- package/dist/index.js.map +1 -1
- package/dist/middleware/error-handler.d.ts +1 -5
- package/dist/middleware/error-handler.d.ts.map +1 -1
- package/dist/middleware/error-handler.js +1 -11
- package/dist/middleware/error-handler.js.map +1 -1
- package/dist/middleware/rate-limiter.d.ts +2 -20
- package/dist/middleware/rate-limiter.d.ts.map +1 -1
- package/dist/middleware/rate-limiter.js +11 -44
- package/dist/middleware/rate-limiter.js.map +1 -1
- package/dist/prompts/index.d.ts +0 -3
- package/dist/prompts/index.d.ts.map +1 -1
- package/dist/prompts/index.js +0 -3
- package/dist/prompts/index.js.map +1 -1
- package/dist/resources/index.d.ts +0 -3
- package/dist/resources/index.d.ts.map +1 -1
- package/dist/resources/index.js +1 -4
- package/dist/resources/index.js.map +1 -1
- package/dist/server.d.ts +0 -4
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +2 -6
- package/dist/server.js.map +1 -1
- package/dist/services/cache.d.ts +9 -6
- package/dist/services/cache.d.ts.map +1 -1
- package/dist/services/cache.js +71 -20
- package/dist/services/cache.js.map +1 -1
- package/dist/services/card-extractor.d.ts +10 -0
- package/dist/services/card-extractor.d.ts.map +1 -0
- package/dist/services/card-extractor.js +187 -0
- package/dist/services/card-extractor.js.map +1 -0
- package/dist/services/extractor.d.ts +6 -19
- package/dist/services/extractor.d.ts.map +1 -1
- package/dist/services/extractor.js +53 -46
- package/dist/services/extractor.js.map +1 -1
- package/dist/services/fetcher.d.ts +4 -11
- package/dist/services/fetcher.d.ts.map +1 -1
- package/dist/services/fetcher.js +30 -36
- package/dist/services/fetcher.js.map +1 -1
- package/dist/services/logger.d.ts.map +1 -1
- package/dist/services/logger.js +4 -6
- package/dist/services/logger.js.map +1 -1
- package/dist/services/parser.d.ts +1 -6
- package/dist/services/parser.d.ts.map +1 -1
- package/dist/services/parser.js +64 -47
- package/dist/services/parser.js.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.d.ts +5 -12
- package/dist/tools/handlers/fetch-links.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-links.tool.js +104 -79
- package/dist/tools/handlers/fetch-links.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.d.ts +7 -4
- package/dist/tools/handlers/fetch-markdown.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-markdown.tool.js +84 -84
- package/dist/tools/handlers/fetch-markdown.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.d.ts +8 -6
- package/dist/tools/handlers/fetch-url.tool.d.ts.map +1 -1
- package/dist/tools/handlers/fetch-url.tool.js +51 -93
- package/dist/tools/handlers/fetch-url.tool.js.map +1 -1
- package/dist/tools/handlers/fetch-urls.tool.d.ts +5 -0
- package/dist/tools/handlers/fetch-urls.tool.d.ts.map +1 -0
- package/dist/tools/handlers/fetch-urls.tool.js +147 -0
- package/dist/tools/handlers/fetch-urls.tool.js.map +1 -0
- package/dist/tools/index.d.ts +0 -4
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +145 -15
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/utils/common.d.ts +8 -0
- package/dist/tools/utils/common.d.ts.map +1 -0
- package/dist/tools/utils/common.js +35 -0
- package/dist/tools/utils/common.js.map +1 -0
- package/dist/tools/utils/fetch-pipeline.d.ts +3 -0
- package/dist/tools/utils/fetch-pipeline.d.ts.map +1 -0
- package/dist/tools/utils/fetch-pipeline.js +37 -0
- package/dist/tools/utils/fetch-pipeline.js.map +1 -0
- package/dist/tools/utils/index.d.ts +4 -0
- package/dist/tools/utils/index.d.ts.map +1 -0
- package/dist/tools/utils/index.js +3 -0
- package/dist/tools/utils/index.js.map +1 -0
- package/dist/tools/utils/response-builder.d.ts +3 -0
- package/dist/tools/utils/response-builder.d.ts.map +1 -0
- package/dist/tools/utils/response-builder.js +24 -0
- package/dist/tools/utils/response-builder.js.map +1 -0
- package/dist/transformers/jsonl.transformer.d.ts +1 -1
- package/dist/transformers/jsonl.transformer.d.ts.map +1 -1
- package/dist/transformers/jsonl.transformer.js +2 -1
- package/dist/transformers/jsonl.transformer.js.map +1 -1
- package/dist/transformers/markdown.transformer.d.ts +1 -1
- package/dist/transformers/markdown.transformer.d.ts.map +1 -1
- package/dist/transformers/markdown.transformer.js +116 -2
- package/dist/transformers/markdown.transformer.js.map +1 -1
- package/dist/types/content.types.d.ts +11 -11
- package/dist/types/content.types.d.ts.map +1 -1
- package/dist/types/index.d.ts +1 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/index.js +1 -2
- package/dist/types/index.js.map +1 -1
- package/dist/types/schemas.d.ts +39 -12
- package/dist/types/schemas.d.ts.map +1 -1
- package/dist/utils/concurrency.d.ts +2 -0
- package/dist/utils/concurrency.d.ts.map +1 -0
- package/dist/utils/concurrency.js +25 -0
- package/dist/utils/concurrency.js.map +1 -0
- package/dist/utils/content-cleaner.d.ts +32 -0
- package/dist/utils/content-cleaner.d.ts.map +1 -0
- package/dist/utils/content-cleaner.js +240 -0
- package/dist/utils/content-cleaner.js.map +1 -0
- package/dist/utils/language-detector.d.ts +5 -0
- package/dist/utils/language-detector.d.ts.map +1 -0
- package/dist/utils/language-detector.js +50 -0
- package/dist/utils/language-detector.js.map +1 -0
- package/dist/utils/sanitizer.d.ts +0 -10
- package/dist/utils/sanitizer.d.ts.map +1 -1
- package/dist/utils/sanitizer.js +3 -11
- package/dist/utils/sanitizer.js.map +1 -1
- package/dist/utils/tool-error-handler.d.ts +1 -15
- package/dist/utils/tool-error-handler.d.ts.map +1 -1
- package/dist/utils/tool-error-handler.js +1 -1
- package/dist/utils/tool-error-handler.js.map +1 -1
- package/dist/utils/url-validator.d.ts +0 -8
- package/dist/utils/url-validator.d.ts.map +1 -1
- package/dist/utils/url-validator.js +17 -31
- package/dist/utils/url-validator.js.map +1 -1
- package/package.json +4 -3
|
@@ -4,9 +4,9 @@ interface ContentBlock {
|
|
|
4
4
|
}
|
|
5
5
|
export interface MetadataBlock extends ContentBlock {
|
|
6
6
|
type: 'metadata';
|
|
7
|
-
title?: string;
|
|
8
|
-
description?: string;
|
|
9
|
-
author?: string;
|
|
7
|
+
title?: string | undefined;
|
|
8
|
+
description?: string | undefined;
|
|
9
|
+
author?: string | undefined;
|
|
10
10
|
url: string;
|
|
11
11
|
fetchedAt: string;
|
|
12
12
|
}
|
|
@@ -26,27 +26,27 @@ export interface ListBlock extends ContentBlock {
|
|
|
26
26
|
}
|
|
27
27
|
export interface CodeBlock extends ContentBlock {
|
|
28
28
|
type: 'code';
|
|
29
|
-
language?: string;
|
|
29
|
+
language?: string | undefined;
|
|
30
30
|
text: string;
|
|
31
31
|
}
|
|
32
32
|
export interface TableBlock extends ContentBlock {
|
|
33
33
|
type: 'table';
|
|
34
|
-
headers?: string[];
|
|
34
|
+
headers?: string[] | undefined;
|
|
35
35
|
rows: string[][];
|
|
36
36
|
}
|
|
37
37
|
export interface ImageBlock extends ContentBlock {
|
|
38
38
|
type: 'image';
|
|
39
39
|
src: string;
|
|
40
|
-
alt?: string;
|
|
40
|
+
alt?: string | undefined;
|
|
41
41
|
}
|
|
42
42
|
export type ContentBlockUnion = MetadataBlock | HeadingBlock | ParagraphBlock | ListBlock | CodeBlock | TableBlock | ImageBlock;
|
|
43
43
|
export interface ExtractedArticle {
|
|
44
|
-
title?: string;
|
|
45
|
-
byline?: string;
|
|
44
|
+
title?: string | undefined;
|
|
45
|
+
byline?: string | undefined;
|
|
46
46
|
content: string;
|
|
47
47
|
textContent: string;
|
|
48
|
-
excerpt?: string;
|
|
49
|
-
siteName?: string;
|
|
48
|
+
excerpt?: string | undefined;
|
|
49
|
+
siteName?: string | undefined;
|
|
50
50
|
}
|
|
51
51
|
export interface CacheEntry {
|
|
52
52
|
url: string;
|
|
@@ -57,7 +57,7 @@ export interface CacheEntry {
|
|
|
57
57
|
export interface ExtractedLink {
|
|
58
58
|
href: string;
|
|
59
59
|
text: string;
|
|
60
|
-
type: 'internal' | 'external';
|
|
60
|
+
type: 'internal' | 'external' | 'image';
|
|
61
61
|
}
|
|
62
62
|
export {};
|
|
63
63
|
//# sourceMappingURL=content.types.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"content.types.d.ts","sourceRoot":"","sources":["../../src/types/content.types.ts"],"names":[],"mappings":"AACA,KAAK,gBAAgB,GACjB,UAAU,GACV,SAAS,GACT,WAAW,GACX,MAAM,GACN,MAAM,GACN,OAAO,GACP,OAAO,CAAC;AAGZ,UAAU,YAAY;IACpB,IAAI,EAAE,gBAAgB,CAAC;CACxB;AAGD,MAAM,WAAW,aAAc,SAAQ,YAAY;IACjD,IAAI,EAAE,UAAU,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"content.types.d.ts","sourceRoot":"","sources":["../../src/types/content.types.ts"],"names":[],"mappings":"AACA,KAAK,gBAAgB,GACjB,UAAU,GACV,SAAS,GACT,WAAW,GACX,MAAM,GACN,MAAM,GACN,OAAO,GACP,OAAO,CAAC;AAGZ,UAAU,YAAY;IACpB,IAAI,EAAE,gBAAgB,CAAC;CACxB;AAGD,MAAM,WAAW,aAAc,SAAQ,YAAY;IACjD,IAAI,EAAE,UAAU,CAAC;IACjB,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,GAAG,EAAE,MAAM,CAAC;IACZ,SAAS,EAAE,MAAM,CAAC;CACnB;AAGD,MAAM,WAAW,YAAa,SAAQ,YAAY;IAChD,IAAI,EAAE,SAAS,CAAC;IAChB,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;CACd;AAGD,MAAM,WAAW,cAAe,SAAQ,YAAY;IAClD,IAAI,EAAE,WAAW,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;CACd;AAGD,MAAM,WAAW,SAAU,SAAQ,YAAY;IAC7C,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,EAAE,CAAC;CACjB;AAGD,MAAM,WAAW,SAAU,SAAQ,YAAY;IAC7C,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,IAAI,EAAE,MAAM,CAAC;CACd;AAGD,MAAM,WAAW,UAAW,SAAQ,YAAY;IAC9C,IAAI,EAAE,OAAO,CAAC;IACd,OAAO,CAAC,EAAE,MAAM,EAAE,GAAG,SAAS,CAAC;IAC/B,IAAI,EAAE,MAAM,EAAE,EAAE,CAAC;CAClB;AAGD,MAAM,WAAW,UAAW,SAAQ,YAAY;IAC9C,IAAI,EAAE,OAAO,CAAC;IACd,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CAC1B;AAGD,MAAM,MAAM,iBAAiB,GACzB,aAAa,GACb,YAAY,GACZ,cAAc,GACd,SAAS,GACT,SAAS,GACT,UAAU,GACV,UAAU,CAAC;AAGf,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC3B,MAAM,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC5B,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CAC/B;AAGD,MAAM,WAAW,UAAU;IACzB,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,SAAS,EAAE,MAAM,CAAC;CACnB;AAGD,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,UAAU,GAAG,UAAU,GAAG,OAAO,CAAC;CACzC"}
|
package/dist/types/index.d.ts
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"AAAA,mBAAmB,oBAAoB,CAAC"}
|
package/dist/types/index.js
CHANGED
package/dist/types/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/types/index.ts"],"names":[],"mappings":""}
|
package/dist/types/schemas.d.ts
CHANGED
|
@@ -1,22 +1,49 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Tool input types - used for type safety in tool handlers
|
|
3
3
|
*/
|
|
4
|
-
|
|
4
|
+
/** Common request options shared across tools */
|
|
5
|
+
export interface RequestOptions {
|
|
6
|
+
/** Custom HTTP headers for the request */
|
|
7
|
+
customHeaders?: Record<string, string> | undefined;
|
|
8
|
+
/** Request timeout in milliseconds (1000-60000) */
|
|
9
|
+
timeout?: number | undefined;
|
|
10
|
+
/** Number of retry attempts (1-10) */
|
|
11
|
+
retries?: number | undefined;
|
|
12
|
+
}
|
|
13
|
+
export interface FetchUrlInput extends RequestOptions {
|
|
5
14
|
url: string;
|
|
6
|
-
extractMainContent?: boolean;
|
|
7
|
-
includeMetadata?: boolean;
|
|
8
|
-
maxContentLength?: number;
|
|
9
|
-
format?: 'jsonl' | 'markdown';
|
|
10
|
-
customHeaders?: Record<string, string>;
|
|
15
|
+
extractMainContent?: boolean | undefined;
|
|
16
|
+
includeMetadata?: boolean | undefined;
|
|
17
|
+
maxContentLength?: number | undefined;
|
|
18
|
+
format?: 'jsonl' | 'markdown' | undefined;
|
|
11
19
|
}
|
|
12
|
-
export interface FetchLinksInput {
|
|
20
|
+
export interface FetchLinksInput extends RequestOptions {
|
|
13
21
|
url: string;
|
|
14
|
-
includeExternal?: boolean;
|
|
15
|
-
includeInternal?: boolean;
|
|
22
|
+
includeExternal?: boolean | undefined;
|
|
23
|
+
includeInternal?: boolean | undefined;
|
|
24
|
+
/** Maximum number of links to return */
|
|
25
|
+
maxLinks?: number | undefined;
|
|
26
|
+
/** Regex pattern to filter links (matches against href) */
|
|
27
|
+
filterPattern?: string | undefined;
|
|
28
|
+
/** Include image links (img src attributes) */
|
|
29
|
+
includeImages?: boolean | undefined;
|
|
16
30
|
}
|
|
17
|
-
export interface FetchMarkdownInput {
|
|
31
|
+
export interface FetchMarkdownInput extends RequestOptions {
|
|
18
32
|
url: string;
|
|
19
|
-
extractMainContent?: boolean;
|
|
20
|
-
includeMetadata?: boolean;
|
|
33
|
+
extractMainContent?: boolean | undefined;
|
|
34
|
+
includeMetadata?: boolean | undefined;
|
|
35
|
+
/** Maximum content length in characters */
|
|
36
|
+
maxContentLength?: number | undefined;
|
|
37
|
+
/** Generate table of contents from headings */
|
|
38
|
+
generateToc?: boolean | undefined;
|
|
39
|
+
}
|
|
40
|
+
export interface FetchUrlsInput extends RequestOptions {
|
|
41
|
+
urls: string[];
|
|
42
|
+
extractMainContent?: boolean | undefined;
|
|
43
|
+
includeMetadata?: boolean | undefined;
|
|
44
|
+
maxContentLength?: number | undefined;
|
|
45
|
+
format?: 'jsonl' | 'markdown' | undefined;
|
|
46
|
+
concurrency?: number | undefined;
|
|
47
|
+
continueOnError?: boolean | undefined;
|
|
21
48
|
}
|
|
22
49
|
//# sourceMappingURL=schemas.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schemas.d.ts","sourceRoot":"","sources":["../../src/types/schemas.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,MAAM,WAAW,aAAa;
|
|
1
|
+
{"version":3,"file":"schemas.d.ts","sourceRoot":"","sources":["../../src/types/schemas.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,iDAAiD;AACjD,MAAM,WAAW,cAAc;IAC7B,0CAA0C;IAC1C,aAAa,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,GAAG,SAAS,CAAC;IACnD,mDAAmD;IACnD,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC7B,sCAAsC;IACtC,OAAO,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;CAC9B;AAED,MAAM,WAAW,aAAc,SAAQ,cAAc;IACnD,GAAG,EAAE,MAAM,CAAC;IACZ,kBAAkB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACzC,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACtC,gBAAgB,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACtC,MAAM,CAAC,EAAE,OAAO,GAAG,UAAU,GAAG,SAAS,CAAC;CAC3C;AAED,MAAM,WAAW,eAAgB,SAAQ,cAAc;IACrD,GAAG,EAAE,MAAM,CAAC;IACZ,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACtC,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACtC,wCAAwC;IACxC,QAAQ,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IAC9B,2DAA2D;IAC3D,aAAa,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACnC,+CAA+C;IAC/C,aAAa,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACrC;AAED,MAAM,WAAW,kBAAmB,SAAQ,cAAc;IACxD,GAAG,EAAE,MAAM,CAAC;IACZ,kBAAkB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACzC,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACtC,2CAA2C;IAC3C,gBAAgB,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACtC,+CAA+C;IAC/C,WAAW,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACnC;AAED,MAAM,WAAW,cAAe,SAAQ,cAAc;IACpD,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,kBAAkB,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACzC,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;IACtC,gBAAgB,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACtC,MAAM,CAAC,EAAE,OAAO,GAAG,UAAU,GAAG,SAAS,CAAC;IAC1C,WAAW,CAAC,EAAE,MAAM,GAAG,SAAS,CAAC;IACjC,eAAe,CAAC,EAAE,OAAO,GAAG,SAAS,CAAC;CACvC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"concurrency.d.ts","sourceRoot":"","sources":["../../src/utils/concurrency.ts"],"names":[],"mappings":"AAsBA,wBAAsB,kBAAkB,CAAC,CAAC,EACxC,KAAK,EAAE,MAAM,EACb,KAAK,EAAE,CAAC,MAAM,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,GAC1B,OAAO,CAAC,oBAAoB,CAAC,CAAC,CAAC,EAAE,CAAC,CAGpC"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
function createConcurrencyLimiter(limit) {
|
|
2
|
+
const maxConcurrency = Math.min(Math.max(1, limit), 10);
|
|
3
|
+
let active = 0;
|
|
4
|
+
const queue = [];
|
|
5
|
+
return async (fn) => {
|
|
6
|
+
while (active >= maxConcurrency) {
|
|
7
|
+
await new Promise((resolve) => queue.push(resolve));
|
|
8
|
+
}
|
|
9
|
+
active++;
|
|
10
|
+
try {
|
|
11
|
+
return await fn();
|
|
12
|
+
}
|
|
13
|
+
finally {
|
|
14
|
+
active--;
|
|
15
|
+
const next = queue.shift();
|
|
16
|
+
if (next)
|
|
17
|
+
next();
|
|
18
|
+
}
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
export async function runWithConcurrency(limit, tasks) {
|
|
22
|
+
const limiter = createConcurrencyLimiter(limit);
|
|
23
|
+
return Promise.allSettled(tasks.map(async (task) => limiter(task)));
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=concurrency.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"concurrency.js","sourceRoot":"","sources":["../../src/utils/concurrency.ts"],"names":[],"mappings":"AAEA,SAAS,wBAAwB,CAAC,KAAa;IAC7C,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,CAAC;IACxD,IAAI,MAAM,GAAG,CAAC,CAAC;IACf,MAAM,KAAK,GAAmB,EAAE,CAAC;IAEjC,OAAO,KAAK,EAAK,EAAoB,EAAc,EAAE;QACnD,OAAO,MAAM,IAAI,cAAc,EAAE,CAAC;YAChC,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC;QAC5D,CAAC;QAED,MAAM,EAAE,CAAC;QACT,IAAI,CAAC;YACH,OAAO,MAAM,EAAE,EAAE,CAAC;QACpB,CAAC;gBAAS,CAAC;YACT,MAAM,EAAE,CAAC;YACT,MAAM,IAAI,GAAG,KAAK,CAAC,KAAK,EAAE,CAAC;YAC3B,IAAI,IAAI;gBAAE,IAAI,EAAE,CAAC;QACnB,CAAC;IACH,CAAC,CAAC;AACJ,CAAC;AACD,MAAM,CAAC,KAAK,UAAU,kBAAkB,CACtC,KAAa,EACb,KAA2B;IAE3B,MAAM,OAAO,GAAG,wBAAwB,CAAC,KAAK,CAAC,CAAC;IAChD,OAAO,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AACtE,CAAC"}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Post-processing content cleaner for removing noise artifacts
|
|
3
|
+
* that slip through Readability extraction.
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Clean paragraph text by removing noise
|
|
7
|
+
*/
|
|
8
|
+
export declare function cleanParagraph(text: string): string | null;
|
|
9
|
+
/**
|
|
10
|
+
* Clean heading text by removing noise and markdown link syntax
|
|
11
|
+
*/
|
|
12
|
+
export declare function cleanHeading(text: string): string | null;
|
|
13
|
+
/**
|
|
14
|
+
* Clean list items by filtering out noise
|
|
15
|
+
*/
|
|
16
|
+
export declare function cleanListItems(items: string[]): string[];
|
|
17
|
+
/**
|
|
18
|
+
* Clean code block text - minimal cleaning to preserve code integrity
|
|
19
|
+
*/
|
|
20
|
+
export declare function cleanCodeBlock(code: string): string | null;
|
|
21
|
+
/**
|
|
22
|
+
* Strip markdown link syntax from text for cleaner slugs/display
|
|
23
|
+
* [Text](#anchor) -> Text
|
|
24
|
+
* [Text](url) -> Text
|
|
25
|
+
*/
|
|
26
|
+
export declare function stripMarkdownLinks(text: string): string;
|
|
27
|
+
/**
|
|
28
|
+
* Remove common timestamp patterns from text (inline removal)
|
|
29
|
+
* Use when you want to strip timestamps from within longer content
|
|
30
|
+
*/
|
|
31
|
+
export declare function removeInlineTimestamps(text: string): string;
|
|
32
|
+
//# sourceMappingURL=content-cleaner.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-cleaner.d.ts","sourceRoot":"","sources":["../../src/utils/content-cleaner.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAwKH;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAsB1D;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAuBxD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE,CAQxD;AAED;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,IAAI,CAc1D;AAED;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEvD;AAED;;;GAGG;AACH,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAsB3D"}
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Post-processing content cleaner for removing noise artifacts
|
|
3
|
+
* that slip through Readability extraction.
|
|
4
|
+
*/
|
|
5
|
+
// Patterns for noise content removal - exact matches (case-insensitive)
|
|
6
|
+
const NOISE_PATTERNS = [
|
|
7
|
+
// Relative timestamps (standalone)
|
|
8
|
+
/^\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago$/i,
|
|
9
|
+
/^(just now|recently|today|yesterday|last week|last month)$/i,
|
|
10
|
+
/^(updated|modified|edited|created|published)\s*:?\s*\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago$/i,
|
|
11
|
+
/^(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\s+\d{1,2},?\s+\d{4}$/i,
|
|
12
|
+
/^\d{1,2}\s+(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\s+\d{4}$/i,
|
|
13
|
+
/^\d{4}-\d{2}-\d{2}$/i, // ISO date
|
|
14
|
+
/^last\s+updated\s*:?/i,
|
|
15
|
+
// Share/action button labels (standalone)
|
|
16
|
+
/^(share|copy|like|follow|subscribe|download|print|save|bookmark|tweet|pin it|email|export)$/i,
|
|
17
|
+
/^(copy to clipboard|copied!?|copy code|copy link)$/i,
|
|
18
|
+
/^(share on|share to|share via)\s+(twitter|facebook|linkedin|reddit|x|email)$/i,
|
|
19
|
+
// UI artifacts and button labels
|
|
20
|
+
/^(click to copy|expand|collapse|show more|show less|load more|view more|read more|see more|see all|view all)$/i,
|
|
21
|
+
/^(toggle|switch|enable|disable|on|off)$/i,
|
|
22
|
+
/^(edit|delete|remove|add|new|create|update|cancel|confirm|submit|reset|clear)$/i,
|
|
23
|
+
/^(open in|view in|edit in)\s+\w+$/i,
|
|
24
|
+
/^(try it|run|execute|play|preview|demo|live demo|playground)$/i,
|
|
25
|
+
/^(source|view source|edit this page|edit on github|improve this doc)$/i,
|
|
26
|
+
// Empty or placeholder content
|
|
27
|
+
/^(loading\.{0,3}|please wait\.{0,3}|\.{2,})$/i,
|
|
28
|
+
/^(n\/a|tbd|todo|coming soon|placeholder|untitled)$/i,
|
|
29
|
+
// Navigation artifacts
|
|
30
|
+
/^(next|previous|prev|back|forward|home|menu|close|open|skip to|jump to|go to)$/i,
|
|
31
|
+
/^(table of contents|toc|contents|on this page|in this article|in this section)$/i,
|
|
32
|
+
/^(scroll to top|back to top|top)$/i,
|
|
33
|
+
// Cookie/consent/legal notices
|
|
34
|
+
/^(accept|reject|accept all|reject all|cookie settings|privacy settings|manage preferences)$/i,
|
|
35
|
+
/^(accept cookies|decline cookies|cookie policy|privacy policy|terms of service|terms & conditions)$/i,
|
|
36
|
+
// Comment/reaction counts
|
|
37
|
+
/^\d+\s*(comments?|replies?|reactions?|responses?)$/i,
|
|
38
|
+
// Social counts and engagement
|
|
39
|
+
/^\d+\s*(likes?|shares?|views?|followers?|retweets?|stars?|forks?|claps?|upvotes?|downvotes?)$/i,
|
|
40
|
+
/^(liked by|shared by|followed by)\s+\d+/i,
|
|
41
|
+
// Version badges (standalone)
|
|
42
|
+
/^v?\d+\.\d+(\.\d+)?(-\w+)?$/i, // v1.2.3, 1.2.3-beta
|
|
43
|
+
/^(stable|beta|alpha|rc|preview|experimental|deprecated|legacy|new|updated)$/i,
|
|
44
|
+
// Empty structural elements
|
|
45
|
+
/^(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)$/i, // Single letters
|
|
46
|
+
/^panel\s*[a-z]?$/i, // Panel A, Panel B, etc.
|
|
47
|
+
// API explorer artifacts
|
|
48
|
+
/^(required|optional|default|type|example|description|parameters?|returns?|response|request)$/i,
|
|
49
|
+
/^(get|post|put|patch|delete|head|options)\s*$/i, // HTTP methods alone
|
|
50
|
+
// Interactive element labels
|
|
51
|
+
/^(drag|drop|resize|zoom|scroll|swipe|tap|click|hover|focus)(\s+to\s+\w+)?$/i,
|
|
52
|
+
/^(drag the|move the|resize the|drag to|click to)\s+\w+/i,
|
|
53
|
+
// Breadcrumb separators
|
|
54
|
+
/^[/\\>→»›]+$/,
|
|
55
|
+
// Advertisement markers
|
|
56
|
+
/^(ad|advertisement|sponsored|promoted|partner content)$/i,
|
|
57
|
+
];
|
|
58
|
+
// Patterns that indicate noise when text is very short (< 25 chars)
|
|
59
|
+
const SHORT_TEXT_NOISE_PATTERNS = [
|
|
60
|
+
/^#\w+$/, // Hashtags only
|
|
61
|
+
/^@\w+$/, // Mentions only
|
|
62
|
+
/^\d+$/, // Numbers only
|
|
63
|
+
/^[•·→←↑↓►▼▲◄▶◀■□●○★☆✓✗✔✘×]+$/, // Bullet/arrow/symbol characters only
|
|
64
|
+
/^[,;:\-–—]+$/, // Punctuation only
|
|
65
|
+
/^\[\d+\]$/, // Reference numbers [1], [2]
|
|
66
|
+
/^\(\d+\)$/, // Reference numbers (1), (2)
|
|
67
|
+
/^fig\.?\s*\d+$/i, // Figure references
|
|
68
|
+
/^table\s*\d+$/i, // Table references
|
|
69
|
+
/^step\s*\d+$/i, // Step numbers alone
|
|
70
|
+
/^note:?$/i, // "Note" alone
|
|
71
|
+
/^tip:?$/i, // "Tip" alone
|
|
72
|
+
/^warning:?$/i, // "Warning" alone
|
|
73
|
+
/^info:?$/i, // "Info" alone
|
|
74
|
+
/^caution:?$/i, // "Caution" alone
|
|
75
|
+
];
|
|
76
|
+
// Patterns to detect content that's likely part of UI chrome (not main content)
|
|
77
|
+
const UI_CHROME_PATTERNS = [
|
|
78
|
+
/^(sign in|sign up|log in|log out|register|create account)$/i,
|
|
79
|
+
/^(search|search\.\.\.|search docs|search documentation)$/i,
|
|
80
|
+
/^(dark mode|light mode|theme|language|locale)$/i,
|
|
81
|
+
/^(feedback|report issue|report a bug|file an issue|suggest edit)$/i,
|
|
82
|
+
/^(documentation|docs|api|reference|guide|tutorial|examples?)$/i,
|
|
83
|
+
/^(version|changelog|release notes|what's new)$/i,
|
|
84
|
+
];
|
|
85
|
+
// Minimum lengths for different content types
|
|
86
|
+
const MIN_PARAGRAPH_LENGTH = 20;
|
|
87
|
+
const MIN_HEADING_LENGTH = 2;
|
|
88
|
+
const MIN_LIST_ITEM_LENGTH = 3;
|
|
89
|
+
const SHORT_TEXT_THRESHOLD = 25;
|
|
90
|
+
/**
|
|
91
|
+
* Check if text matches any noise pattern
|
|
92
|
+
*/
|
|
93
|
+
function isNoiseText(text) {
|
|
94
|
+
const trimmed = text.trim();
|
|
95
|
+
// Empty or whitespace-only
|
|
96
|
+
if (!trimmed) {
|
|
97
|
+
return true;
|
|
98
|
+
}
|
|
99
|
+
// Check against all noise patterns
|
|
100
|
+
for (const pattern of NOISE_PATTERNS) {
|
|
101
|
+
if (pattern.test(trimmed)) {
|
|
102
|
+
return true;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
// Check short text patterns for brief content
|
|
106
|
+
if (trimmed.length < SHORT_TEXT_THRESHOLD) {
|
|
107
|
+
for (const pattern of SHORT_TEXT_NOISE_PATTERNS) {
|
|
108
|
+
if (pattern.test(trimmed)) {
|
|
109
|
+
return true;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
// Also check UI chrome patterns for short text
|
|
113
|
+
for (const pattern of UI_CHROME_PATTERNS) {
|
|
114
|
+
if (pattern.test(trimmed)) {
|
|
115
|
+
return true;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
return false;
|
|
120
|
+
}
|
|
121
|
+
/**
|
|
122
|
+
* Check if text looks like placeholder/demo content
|
|
123
|
+
*/
|
|
124
|
+
function isPlaceholderContent(text) {
|
|
125
|
+
const trimmed = text.trim().toLowerCase();
|
|
126
|
+
// Common placeholder patterns in examples
|
|
127
|
+
const placeholders = [
|
|
128
|
+
/^lorem ipsum/i,
|
|
129
|
+
/^sample text/i,
|
|
130
|
+
/^placeholder/i,
|
|
131
|
+
/^example (text|content|data)/i,
|
|
132
|
+
/^test (text|content|data)/i,
|
|
133
|
+
/^your (text|content|name|email) here/i,
|
|
134
|
+
/^enter (your|a) /i,
|
|
135
|
+
/^type (your|a|something) /i,
|
|
136
|
+
];
|
|
137
|
+
for (const pattern of placeholders) {
|
|
138
|
+
if (pattern.test(trimmed)) {
|
|
139
|
+
return true;
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
return false;
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Clean paragraph text by removing noise
|
|
146
|
+
*/
|
|
147
|
+
export function cleanParagraph(text) {
|
|
148
|
+
const trimmed = text.trim();
|
|
149
|
+
// Too short to be meaningful
|
|
150
|
+
if (trimmed.length < MIN_PARAGRAPH_LENGTH) {
|
|
151
|
+
// Allow very short paragraphs if they end with punctuation (likely real content)
|
|
152
|
+
if (!/[.!?]$/.test(trimmed)) {
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
// Is noise content
|
|
157
|
+
if (isNoiseText(trimmed)) {
|
|
158
|
+
return null;
|
|
159
|
+
}
|
|
160
|
+
// Is placeholder content (in paragraphs, not in examples)
|
|
161
|
+
if (isPlaceholderContent(trimmed)) {
|
|
162
|
+
return null;
|
|
163
|
+
}
|
|
164
|
+
return trimmed;
|
|
165
|
+
}
|
|
166
|
+
/**
|
|
167
|
+
* Clean heading text by removing noise and markdown link syntax
|
|
168
|
+
*/
|
|
169
|
+
export function cleanHeading(text) {
|
|
170
|
+
let cleaned = text.trim();
|
|
171
|
+
// Too short
|
|
172
|
+
if (cleaned.length < MIN_HEADING_LENGTH) {
|
|
173
|
+
return null;
|
|
174
|
+
}
|
|
175
|
+
// Remove markdown link syntax: [Text](#anchor) -> Text
|
|
176
|
+
cleaned = cleaned.replace(/\[([^\]]+)\]\([^)]*\)/g, '$1');
|
|
177
|
+
// Remove trailing anchor links like "Link for this heading"
|
|
178
|
+
cleaned = cleaned.replace(/\s*Link for (this heading|[\w\s]+)\s*$/i, '');
|
|
179
|
+
// Remove trailing hash symbols often used for anchor links
|
|
180
|
+
cleaned = cleaned.replace(/\s*#+\s*$/, '');
|
|
181
|
+
// Is noise content
|
|
182
|
+
if (isNoiseText(cleaned)) {
|
|
183
|
+
return null;
|
|
184
|
+
}
|
|
185
|
+
return cleaned.trim();
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Clean list items by filtering out noise
|
|
189
|
+
*/
|
|
190
|
+
export function cleanListItems(items) {
|
|
191
|
+
return items
|
|
192
|
+
.map((item) => item.trim())
|
|
193
|
+
.filter((item) => {
|
|
194
|
+
if (item.length < MIN_LIST_ITEM_LENGTH)
|
|
195
|
+
return false;
|
|
196
|
+
if (isNoiseText(item))
|
|
197
|
+
return false;
|
|
198
|
+
return true;
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
/**
|
|
202
|
+
* Clean code block text - minimal cleaning to preserve code integrity
|
|
203
|
+
*/
|
|
204
|
+
export function cleanCodeBlock(code) {
|
|
205
|
+
const trimmed = code.trim();
|
|
206
|
+
// Empty code block
|
|
207
|
+
if (trimmed.length === 0) {
|
|
208
|
+
return null;
|
|
209
|
+
}
|
|
210
|
+
// Very short code blocks that are likely just labels
|
|
211
|
+
if (trimmed.length < 3 && !/^[{}[\]();<>]$/.test(trimmed)) {
|
|
212
|
+
return null;
|
|
213
|
+
}
|
|
214
|
+
return trimmed;
|
|
215
|
+
}
|
|
216
|
+
/**
|
|
217
|
+
* Strip markdown link syntax from text for cleaner slugs/display
|
|
218
|
+
* [Text](#anchor) -> Text
|
|
219
|
+
* [Text](url) -> Text
|
|
220
|
+
*/
|
|
221
|
+
export function stripMarkdownLinks(text) {
|
|
222
|
+
return text.replace(/\[([^\]]+)\]\([^)]*\)/g, '$1');
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* Remove common timestamp patterns from text (inline removal)
|
|
226
|
+
* Use when you want to strip timestamps from within longer content
|
|
227
|
+
*/
|
|
228
|
+
export function removeInlineTimestamps(text) {
|
|
229
|
+
return (text
|
|
230
|
+
// Remove "X days/hours/etc ago" patterns
|
|
231
|
+
.replace(/\b\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago\b/gi, '')
|
|
232
|
+
// Remove "Updated: date" patterns
|
|
233
|
+
.replace(/\b(updated|modified|edited|created|published)\s*:?\s*\d+\s*(seconds?|minutes?|hours?|days?|weeks?|months?|years?)\s*ago\b/gi, '')
|
|
234
|
+
// Remove standalone dates
|
|
235
|
+
.replace(/\b(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\s+\d{1,2},?\s+\d{4}\b/gi, '')
|
|
236
|
+
// Clean up extra whitespace
|
|
237
|
+
.replace(/\s{2,}/g, ' ')
|
|
238
|
+
.trim());
|
|
239
|
+
}
|
|
240
|
+
//# sourceMappingURL=content-cleaner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-cleaner.js","sourceRoot":"","sources":["../../src/utils/content-cleaner.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,wEAAwE;AACxE,MAAM,cAAc,GAAa;IAC/B,mCAAmC;IACnC,uEAAuE;IACvE,6DAA6D;IAC7D,0HAA0H;IAC1H,0EAA0E;IAC1E,wEAAwE;IACxE,sBAAsB,EAAE,WAAW;IACnC,uBAAuB;IAEvB,0CAA0C;IAC1C,8FAA8F;IAC9F,qDAAqD;IACrD,+EAA+E;IAE/E,iCAAiC;IACjC,gHAAgH;IAChH,0CAA0C;IAC1C,iFAAiF;IACjF,oCAAoC;IACpC,gEAAgE;IAChE,wEAAwE;IAExE,+BAA+B;IAC/B,+CAA+C;IAC/C,qDAAqD;IAErD,uBAAuB;IACvB,iFAAiF;IACjF,kFAAkF;IAClF,oCAAoC;IAEpC,+BAA+B;IAC/B,8FAA8F;IAC9F,sGAAsG;IAEtG,0BAA0B;IAC1B,qDAAqD;IAErD,+BAA+B;IAC/B,gGAAgG;IAChG,0CAA0C;IAE1C,8BAA8B;IAC9B,8BAA8B,EAAE,qBAAqB;IACrD,8EAA8E;IAE9E,4BAA4B;IAC5B,0DAA0D,EAAE,iBAAiB;IAC7E,mBAAmB,EAAE,yBAAyB;IAE9C,yBAAyB;IACzB,+FAA+F;IAC/F,gDAAgD,EAAE,qBAAqB;IAEvE,6BAA6B;IAC7B,6EAA6E;IAC7E,yDAAyD;IAEzD,wBAAwB;IACxB,cAAc;IAEd,wBAAwB;IACxB,0DAA0D;CAC3D,CAAC;AAEF,oEAAoE;AACpE,MAAM,yBAAyB,GAAa;IAC1C,QAAQ,EAAE,gBAAgB;IAC1B,QAAQ,EAAE,gBAAgB;IAC1B,OAAO,EAAE,eAAe;IACxB,8BAA8B,EAAE,sCAAsC;IACtE,cAAc,EAAE,mBAAmB;IACnC,WAAW,EAAE,6BAA6B;IAC1C,WAAW,EAAE,6BAA6B;IAC1C,iBAAiB,EAAE,oBAAoB;IACvC,gBAAgB,EAAE,mBAAmB;IACrC,eAAe,EAAE,qBAAqB;IACtC,WAAW,EAAE,eAAe;IAC5B,UAAU,EAAE,cAAc;IAC1B,cAAc,EAAE,kBAAkB;IAClC,WAAW,EAAE,eAAe;IAC5B,cAAc,EAAE,kBAAkB;CACnC,CAAC;AAEF,gFAAgF;AAChF,MAAM,kBAAkB,GAAa;IACnC,6DAA6D;IAC7D,2DAA2D;IAC3D,iDAAiD;IACjD,oEAAoE;IACpE,gEAAgE;IAChE,iDAAiD;CAClD,CAAC;AAEF,8CAA8C;AAC9C,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAChC,MAAM,kBAAkB,GAAG,CAAC,CAAC;AAC7B,MAAM,oBAAoB,GAAG,CAAC,CAAC;AAC/B,MAAM,oBAAoB,GAAG,EAAE,CAAC;AAEhC;;GAEG;AACH,SAAS,WAAW,CAAC,IAAY;IAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,2BAA2B;IAC3B,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,IAAI,CAAC;IACd,CAAC;IAED,mCAAmC;IACnC,KAAK,MAAM,OAAO,IAAI,cAAc,EAAE,CAAC;QACrC,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,8CAA8C;IAC9C,IAAI,OAAO,CAAC,MAAM,GAAG,oBAAoB,EAAE,CAAC;QAC1C,KAAK,MAAM,OAAO,IAAI,yBAAyB,EAAE,CAAC;YAChD,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC1B,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QAED,+CAA+C;QAC/C,KAAK,MAAM,OAAO,IAAI,kBAAkB,EAAE,CAAC;YACzC,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC1B,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,SAAS,oBAAoB,CAAC,IAAY;IACxC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAE1C,0CAA0C;IAC1C,MAAM,YAAY,GAAG;QACnB,eAAe;QACf,eAAe;QACf,eAAe;QACf,+BAA+B;QAC/B,4BAA4B;QAC5B,uCAAuC;QACvC,mBAAmB;QACnB,4BAA4B;KAC7B,CAAC;IAEF,KAAK,MAAM,OAAO,IAAI,YAAY,EAAE,CAAC;QACnC,IAAI,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,6BAA6B;IAC7B,IAAI,OAAO,CAAC,MAAM,GAAG,oBAAoB,EAAE,CAAC;QAC1C,iFAAiF;QACjF,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC5B,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,mBAAmB;IACnB,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,0DAA0D;IAC1D,IAAI,oBAAoB,CAAC,OAAO,CAAC,EAAE,CAAC;QAClC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,IAAI,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE1B,YAAY;IACZ,IAAI,OAAO,CAAC,MAAM,GAAG,kBAAkB,EAAE,CAAC;QACxC,OAAO,IAAI,CAAC;IACd,CAAC;IAED,uDAAuD;IACvD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC,CAAC;IAE1D,4DAA4D;IAC5D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,yCAAyC,EAAE,EAAE,CAAC,CAAC;IAEzE,2DAA2D;IAC3D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;IAE3C,mBAAmB;IACnB,IAAI,WAAW,CAAC,OAAO,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC,IAAI,EAAE,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,KAAe;IAC5C,OAAO,KAAK;SACT,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;SAC1B,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE;QACf,IAAI,IAAI,CAAC,MAAM,GAAG,oBAAoB;YAAE,OAAO,KAAK,CAAC;QACrD,IAAI,WAAW,CAAC,IAAI,CAAC;YAAE,OAAO,KAAK,CAAC;QACpC,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACP,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE5B,mBAAmB;IACnB,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,CAAC;IACd,CAAC;IAED,qDAAqD;IACrD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;QAC1D,OAAO,IAAI,CAAC;IACd,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,kBAAkB,CAAC,IAAY;IAC7C,OAAO,IAAI,CAAC,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC,CAAC;AACtD,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,sBAAsB,CAAC,IAAY;IACjD,OAAO,CACL,IAAI;QACF,yCAAyC;SACxC,OAAO,CACN,0EAA0E,EAC1E,EAAE,CACH;QACD,kCAAkC;SACjC,OAAO,CACN,6HAA6H,EAC7H,EAAE,CACH;QACD,0BAA0B;SACzB,OAAO,CACN,6EAA6E,EAC7E,EAAE,CACH;QACD,4BAA4B;SAC3B,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,IAAI,EAAE,CACV,CAAC;AACJ,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Detect programming language from code content
|
|
3
|
+
*/
|
|
4
|
+
export declare function detectLanguage(code: string): "json" | "html" | "jsx" | "typescript" | "rust" | "javascript" | "python" | "bash" | "css" | "yaml" | "sql" | "go" | undefined;
|
|
5
|
+
//# sourceMappingURL=language-detector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"language-detector.d.ts","sourceRoot":"","sources":["../../src/utils/language-detector.ts"],"names":[],"mappings":"AA4CA;;GAEG;AACH,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,kIAE1C"}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Language detection patterns for code blocks
|
|
3
|
+
* Shared between parser and markdown transformer
|
|
4
|
+
*/
|
|
5
|
+
const LANGUAGE_PATTERNS = [
|
|
6
|
+
// JSX/TSX patterns
|
|
7
|
+
[
|
|
8
|
+
/^\s*import\s+.*\s+from\s+['"]react['"]|<[A-Z][a-zA-Z]*[\s/>]|jsx\s*:|className=/m,
|
|
9
|
+
'jsx',
|
|
10
|
+
],
|
|
11
|
+
// TypeScript patterns
|
|
12
|
+
[
|
|
13
|
+
/:\s*(string|number|boolean|void|any|unknown|never)\b|interface\s+\w+|type\s+\w+\s*=/m,
|
|
14
|
+
'typescript',
|
|
15
|
+
],
|
|
16
|
+
// Rust patterns
|
|
17
|
+
[/^\s*(fn|let\s+mut|impl|struct|enum|use\s+\w+::)/m, 'rust'],
|
|
18
|
+
// JavaScript patterns (generic)
|
|
19
|
+
[
|
|
20
|
+
/^\s*(export|const|let|var|function|class|async|await)\b|^\s*import\s+.*['"]/m,
|
|
21
|
+
'javascript',
|
|
22
|
+
],
|
|
23
|
+
// Python patterns
|
|
24
|
+
[/^\s*(def|class|import|from|if __name__|print\()/m, 'python'],
|
|
25
|
+
// Bash/Shell patterns
|
|
26
|
+
[
|
|
27
|
+
/^\s*(npm|yarn|pnpm|npx|brew|apt|pip|cargo|go )\s+(install|add|run|build|start)/m,
|
|
28
|
+
'bash',
|
|
29
|
+
],
|
|
30
|
+
[/^\s*[$#]\s+\w+|^\s*#!|^\s*(sudo|chmod|mkdir|cd|ls|cat|echo)\s+/m, 'bash'],
|
|
31
|
+
// CSS patterns
|
|
32
|
+
[/^\s*[.#@]?[\w-]+\s*\{[^}]*\}|@media|@import|@keyframes/m, 'css'],
|
|
33
|
+
// HTML patterns
|
|
34
|
+
[/^\s*<(!DOCTYPE|html|head|body|div|span|p|a|script|style)\b/im, 'html'],
|
|
35
|
+
// JSON patterns
|
|
36
|
+
[/^\s*\{\s*"|^\s*\[\s*("|\d|true|false|null)/m, 'json'],
|
|
37
|
+
// YAML patterns
|
|
38
|
+
[/^\s*[\w-]+:\s*.+$/m, 'yaml'],
|
|
39
|
+
// SQL patterns
|
|
40
|
+
[/^\s*(SELECT|INSERT|UPDATE|DELETE|CREATE|ALTER|DROP)\s+/im, 'sql'],
|
|
41
|
+
// Go patterns
|
|
42
|
+
[/^\s*(func|package|import\s+")/m, 'go'],
|
|
43
|
+
];
|
|
44
|
+
/**
|
|
45
|
+
* Detect programming language from code content
|
|
46
|
+
*/
|
|
47
|
+
export function detectLanguage(code) {
|
|
48
|
+
return LANGUAGE_PATTERNS.find(([pattern]) => pattern.test(code))?.[1];
|
|
49
|
+
}
|
|
50
|
+
//# sourceMappingURL=language-detector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"language-detector.js","sourceRoot":"","sources":["../../src/utils/language-detector.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,iBAAiB,GAAG;IACxB,mBAAmB;IACnB;QACE,kFAAkF;QAClF,KAAK;KACN;IACD,sBAAsB;IACtB;QACE,sFAAsF;QACtF,YAAY;KACb;IACD,gBAAgB;IAChB,CAAC,kDAAkD,EAAE,MAAM,CAAC;IAC5D,gCAAgC;IAChC;QACE,8EAA8E;QAC9E,YAAY;KACb;IACD,kBAAkB;IAClB,CAAC,kDAAkD,EAAE,QAAQ,CAAC;IAC9D,sBAAsB;IACtB;QACE,iFAAiF;QACjF,MAAM;KACP;IACD,CAAC,iEAAiE,EAAE,MAAM,CAAC;IAC3E,eAAe;IACf,CAAC,yDAAyD,EAAE,KAAK,CAAC;IAClE,gBAAgB;IAChB,CAAC,8DAA8D,EAAE,MAAM,CAAC;IACxE,gBAAgB;IAChB,CAAC,6CAA6C,EAAE,MAAM,CAAC;IACvD,gBAAgB;IAChB,CAAC,oBAAoB,EAAE,MAAM,CAAC;IAC9B,eAAe;IACf,CAAC,0DAA0D,EAAE,KAAK,CAAC;IACnE,cAAc;IACd,CAAC,gCAAgC,EAAE,IAAI,CAAC;CAChC,CAAC;AAEX;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;AACxE,CAAC"}
|
|
@@ -1,13 +1,3 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Sanitizes text content by collapsing whitespace and trimming
|
|
3
|
-
* Returns empty string for null/undefined input
|
|
4
|
-
*/
|
|
5
1
|
export declare function sanitizeText(text: string | null | undefined): string;
|
|
6
|
-
/**
|
|
7
|
-
* Truncates text to a maximum length with ellipsis
|
|
8
|
-
* @param text - Text to truncate
|
|
9
|
-
* @param maxLength - Maximum length (must be > 3 to accommodate ellipsis)
|
|
10
|
-
* @returns Truncated text with ellipsis if needed
|
|
11
|
-
*/
|
|
12
2
|
export declare function truncateText(text: string, maxLength: number): string;
|
|
13
3
|
//# sourceMappingURL=sanitizer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sanitizer.d.ts","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"sanitizer.d.ts","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAGA,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,GAAG,SAAS,GAAG,MAAM,CAIpE;AAED,wBAAgB,YAAY,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,CAQpE"}
|
package/dist/utils/sanitizer.js
CHANGED
|
@@ -1,20 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
* Returns empty string for null/undefined input
|
|
4
|
-
*/
|
|
1
|
+
// Pre-compiled regex patterns for hot path optimization
|
|
2
|
+
const WHITESPACE_REGEX = /\s+/g;
|
|
5
3
|
export function sanitizeText(text) {
|
|
6
4
|
if (text == null)
|
|
7
5
|
return '';
|
|
8
6
|
if (typeof text !== 'string')
|
|
9
7
|
return String(text);
|
|
10
|
-
return text.replace(
|
|
8
|
+
return text.replace(WHITESPACE_REGEX, ' ').trim();
|
|
11
9
|
}
|
|
12
|
-
/**
|
|
13
|
-
* Truncates text to a maximum length with ellipsis
|
|
14
|
-
* @param text - Text to truncate
|
|
15
|
-
* @param maxLength - Maximum length (must be > 3 to accommodate ellipsis)
|
|
16
|
-
* @returns Truncated text with ellipsis if needed
|
|
17
|
-
*/
|
|
18
10
|
export function truncateText(text, maxLength) {
|
|
19
11
|
if (maxLength < 4) {
|
|
20
12
|
return text.length > 0 ? text.charAt(0) : '';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sanitizer.js","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"sanitizer.js","sourceRoot":"","sources":["../../src/utils/sanitizer.ts"],"names":[],"mappings":"AAAA,wDAAwD;AACxD,MAAM,gBAAgB,GAAG,MAAM,CAAC;AAEhC,MAAM,UAAU,YAAY,CAAC,IAA+B;IAC1D,IAAI,IAAI,IAAI,IAAI;QAAE,OAAO,EAAE,CAAC;IAC5B,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO,MAAM,CAAC,IAAI,CAAC,CAAC;IAClD,OAAO,IAAI,CAAC,OAAO,CAAC,gBAAgB,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;AACpD,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,IAAY,EAAE,SAAiB;IAC1D,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;QAClB,OAAO,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;IAC/C,CAAC;IACD,IAAI,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC7B,OAAO,IAAI,CAAC;IACd,CAAC;IACD,OAAO,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,SAAS,GAAG,CAAC,CAAC,GAAG,KAAK,CAAC;AAClD,CAAC"}
|
|
@@ -1,18 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
export type ToolErrorResponse = {
|
|
3
|
-
[x: string]: unknown;
|
|
4
|
-
content: {
|
|
5
|
-
type: 'text';
|
|
6
|
-
text: string;
|
|
7
|
-
}[];
|
|
8
|
-
structuredContent: {
|
|
9
|
-
[x: string]: unknown;
|
|
10
|
-
error: string;
|
|
11
|
-
url: string;
|
|
12
|
-
errorCode: string;
|
|
13
|
-
};
|
|
14
|
-
isError: true;
|
|
15
|
-
};
|
|
1
|
+
import type { ToolErrorResponse } from '../config/types.js';
|
|
16
2
|
export declare function createToolErrorResponse(message: string, url: string, code: string): ToolErrorResponse;
|
|
17
3
|
export declare function handleToolError(error: unknown, url: string, fallbackMessage?: string): ToolErrorResponse;
|
|
18
4
|
//# sourceMappingURL=tool-error-handler.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"tool-error-handler.d.ts","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"tool-error-handler.d.ts","sourceRoot":"","sources":["../../src/utils/tool-error-handler.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,oBAAoB,CAAC;AAS5D,wBAAgB,uBAAuB,CACrC,OAAO,EAAE,MAAM,EACf,GAAG,EAAE,MAAM,EACX,IAAI,EAAE,MAAM,GACX,iBAAiB,CAOnB;AAED,wBAAgB,eAAe,CAC7B,KAAK,EAAE,OAAO,EACd,GAAG,EAAE,MAAM,EACX,eAAe,SAAqB,GACnC,iBAAiB,CAyBnB"}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { AppError,
|
|
1
|
+
import { AppError, FetchError, TimeoutError, UrlValidationError, } from '../errors/index.js';
|
|
2
2
|
export function createToolErrorResponse(message, url, code) {
|
|
3
3
|
const structuredContent = { error: message, url, errorCode: code };
|
|
4
4
|
return {
|