@cosmocoder/mcp-web-docs 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +368 -0
- package/build/__mocks__/embeddings.d.ts +17 -0
- package/build/__mocks__/embeddings.js +66 -0
- package/build/__mocks__/embeddings.js.map +1 -0
- package/build/config.d.ts +44 -0
- package/build/config.js +158 -0
- package/build/config.js.map +1 -0
- package/build/config.test.d.ts +1 -0
- package/build/config.test.js +165 -0
- package/build/config.test.js.map +1 -0
- package/build/crawler/auth.d.ts +128 -0
- package/build/crawler/auth.js +546 -0
- package/build/crawler/auth.js.map +1 -0
- package/build/crawler/auth.test.d.ts +1 -0
- package/build/crawler/auth.test.js +174 -0
- package/build/crawler/auth.test.js.map +1 -0
- package/build/crawler/base.d.ts +24 -0
- package/build/crawler/base.js +149 -0
- package/build/crawler/base.js.map +1 -0
- package/build/crawler/base.test.d.ts +1 -0
- package/build/crawler/base.test.js +234 -0
- package/build/crawler/base.test.js.map +1 -0
- package/build/crawler/browser-config.d.ts +2 -0
- package/build/crawler/browser-config.js +29 -0
- package/build/crawler/browser-config.js.map +1 -0
- package/build/crawler/browser-config.test.d.ts +1 -0
- package/build/crawler/browser-config.test.js +56 -0
- package/build/crawler/browser-config.test.js.map +1 -0
- package/build/crawler/cheerio.d.ts +11 -0
- package/build/crawler/cheerio.js +134 -0
- package/build/crawler/cheerio.js.map +1 -0
- package/build/crawler/chromium.d.ts +21 -0
- package/build/crawler/chromium.js +596 -0
- package/build/crawler/chromium.js.map +1 -0
- package/build/crawler/content-extractor-types.d.ts +25 -0
- package/build/crawler/content-extractor-types.js +2 -0
- package/build/crawler/content-extractor-types.js.map +1 -0
- package/build/crawler/content-extractors.d.ts +9 -0
- package/build/crawler/content-extractors.js +9 -0
- package/build/crawler/content-extractors.js.map +1 -0
- package/build/crawler/content-utils.d.ts +2 -0
- package/build/crawler/content-utils.js +22 -0
- package/build/crawler/content-utils.js.map +1 -0
- package/build/crawler/content-utils.test.d.ts +1 -0
- package/build/crawler/content-utils.test.js +99 -0
- package/build/crawler/content-utils.test.js.map +1 -0
- package/build/crawler/crawlee-crawler.d.ts +63 -0
- package/build/crawler/crawlee-crawler.js +342 -0
- package/build/crawler/crawlee-crawler.js.map +1 -0
- package/build/crawler/crawlee-crawler.test.d.ts +1 -0
- package/build/crawler/crawlee-crawler.test.js +280 -0
- package/build/crawler/crawlee-crawler.test.js.map +1 -0
- package/build/crawler/default-extractor.d.ts +4 -0
- package/build/crawler/default-extractor.js +26 -0
- package/build/crawler/default-extractor.js.map +1 -0
- package/build/crawler/default-extractor.test.d.ts +1 -0
- package/build/crawler/default-extractor.test.js +200 -0
- package/build/crawler/default-extractor.test.js.map +1 -0
- package/build/crawler/default.d.ts +11 -0
- package/build/crawler/default.js +138 -0
- package/build/crawler/default.js.map +1 -0
- package/build/crawler/docs-crawler.d.ts +26 -0
- package/build/crawler/docs-crawler.js +97 -0
- package/build/crawler/docs-crawler.js.map +1 -0
- package/build/crawler/docs-crawler.test.d.ts +1 -0
- package/build/crawler/docs-crawler.test.js +185 -0
- package/build/crawler/docs-crawler.test.js.map +1 -0
- package/build/crawler/factory.d.ts +6 -0
- package/build/crawler/factory.js +83 -0
- package/build/crawler/factory.js.map +1 -0
- package/build/crawler/github-pages-extractor.d.ts +4 -0
- package/build/crawler/github-pages-extractor.js +33 -0
- package/build/crawler/github-pages-extractor.js.map +1 -0
- package/build/crawler/github-pages-extractor.test.d.ts +1 -0
- package/build/crawler/github-pages-extractor.test.js +184 -0
- package/build/crawler/github-pages-extractor.test.js.map +1 -0
- package/build/crawler/github.d.ts +20 -0
- package/build/crawler/github.js +181 -0
- package/build/crawler/github.js.map +1 -0
- package/build/crawler/github.test.d.ts +1 -0
- package/build/crawler/github.test.js +326 -0
- package/build/crawler/github.test.js.map +1 -0
- package/build/crawler/puppeteer.d.ts +16 -0
- package/build/crawler/puppeteer.js +191 -0
- package/build/crawler/puppeteer.js.map +1 -0
- package/build/crawler/queue-manager.d.ts +43 -0
- package/build/crawler/queue-manager.js +169 -0
- package/build/crawler/queue-manager.js.map +1 -0
- package/build/crawler/queue-manager.test.d.ts +1 -0
- package/build/crawler/queue-manager.test.js +509 -0
- package/build/crawler/queue-manager.test.js.map +1 -0
- package/build/crawler/site-rules.d.ts +11 -0
- package/build/crawler/site-rules.js +104 -0
- package/build/crawler/site-rules.js.map +1 -0
- package/build/crawler/site-rules.test.d.ts +1 -0
- package/build/crawler/site-rules.test.js +139 -0
- package/build/crawler/site-rules.test.js.map +1 -0
- package/build/crawler/storybook-extractor.d.ts +34 -0
- package/build/crawler/storybook-extractor.js +767 -0
- package/build/crawler/storybook-extractor.js.map +1 -0
- package/build/crawler/storybook-extractor.test.d.ts +1 -0
- package/build/crawler/storybook-extractor.test.js +491 -0
- package/build/crawler/storybook-extractor.test.js.map +1 -0
- package/build/embeddings/fastembed.d.ts +25 -0
- package/build/embeddings/fastembed.js +188 -0
- package/build/embeddings/fastembed.js.map +1 -0
- package/build/embeddings/fastembed.test.d.ts +1 -0
- package/build/embeddings/fastembed.test.js +307 -0
- package/build/embeddings/fastembed.test.js.map +1 -0
- package/build/embeddings/openai.d.ts +8 -0
- package/build/embeddings/openai.js +56 -0
- package/build/embeddings/openai.js.map +1 -0
- package/build/embeddings/types.d.ts +4 -0
- package/build/embeddings/types.js +2 -0
- package/build/embeddings/types.js.map +1 -0
- package/build/index.d.ts +2 -0
- package/build/index.js +1007 -0
- package/build/index.js.map +1 -0
- package/build/index.test.d.ts +1 -0
- package/build/index.test.js +364 -0
- package/build/index.test.js.map +1 -0
- package/build/indexing/queue-manager.d.ts +36 -0
- package/build/indexing/queue-manager.js +86 -0
- package/build/indexing/queue-manager.js.map +1 -0
- package/build/indexing/queue-manager.test.d.ts +1 -0
- package/build/indexing/queue-manager.test.js +257 -0
- package/build/indexing/queue-manager.test.js.map +1 -0
- package/build/indexing/status.d.ts +39 -0
- package/build/indexing/status.js +207 -0
- package/build/indexing/status.js.map +1 -0
- package/build/indexing/status.test.d.ts +1 -0
- package/build/indexing/status.test.js +246 -0
- package/build/indexing/status.test.js.map +1 -0
- package/build/processor/content.d.ts +16 -0
- package/build/processor/content.js +286 -0
- package/build/processor/content.js.map +1 -0
- package/build/processor/content.test.d.ts +1 -0
- package/build/processor/content.test.js +369 -0
- package/build/processor/content.test.js.map +1 -0
- package/build/processor/markdown.d.ts +11 -0
- package/build/processor/markdown.js +256 -0
- package/build/processor/markdown.js.map +1 -0
- package/build/processor/markdown.test.d.ts +1 -0
- package/build/processor/markdown.test.js +312 -0
- package/build/processor/markdown.test.js.map +1 -0
- package/build/processor/metadata-parser.d.ts +37 -0
- package/build/processor/metadata-parser.js +245 -0
- package/build/processor/metadata-parser.js.map +1 -0
- package/build/processor/metadata-parser.test.d.ts +1 -0
- package/build/processor/metadata-parser.test.js +357 -0
- package/build/processor/metadata-parser.test.js.map +1 -0
- package/build/processor/processor.d.ts +8 -0
- package/build/processor/processor.js +190 -0
- package/build/processor/processor.js.map +1 -0
- package/build/processor/processor.test.d.ts +1 -0
- package/build/processor/processor.test.js +357 -0
- package/build/processor/processor.test.js.map +1 -0
- package/build/rag/cache.d.ts +10 -0
- package/build/rag/cache.js +10 -0
- package/build/rag/cache.js.map +1 -0
- package/build/rag/code-generator.d.ts +11 -0
- package/build/rag/code-generator.js +30 -0
- package/build/rag/code-generator.js.map +1 -0
- package/build/rag/context-assembler.d.ts +23 -0
- package/build/rag/context-assembler.js +113 -0
- package/build/rag/context-assembler.js.map +1 -0
- package/build/rag/docs-search.d.ts +55 -0
- package/build/rag/docs-search.js +380 -0
- package/build/rag/docs-search.js.map +1 -0
- package/build/rag/pipeline.d.ts +26 -0
- package/build/rag/pipeline.js +91 -0
- package/build/rag/pipeline.js.map +1 -0
- package/build/rag/query-processor.d.ts +14 -0
- package/build/rag/query-processor.js +57 -0
- package/build/rag/query-processor.js.map +1 -0
- package/build/rag/reranker.d.ts +55 -0
- package/build/rag/reranker.js +210 -0
- package/build/rag/reranker.js.map +1 -0
- package/build/rag/response-generator.d.ts +20 -0
- package/build/rag/response-generator.js +101 -0
- package/build/rag/response-generator.js.map +1 -0
- package/build/rag/retriever.d.ts +19 -0
- package/build/rag/retriever.js +111 -0
- package/build/rag/retriever.js.map +1 -0
- package/build/rag/validator.d.ts +22 -0
- package/build/rag/validator.js +128 -0
- package/build/rag/validator.js.map +1 -0
- package/build/rag/version-manager.d.ts +23 -0
- package/build/rag/version-manager.js +98 -0
- package/build/rag/version-manager.js.map +1 -0
- package/build/setupTests.d.ts +4 -0
- package/build/setupTests.js +50 -0
- package/build/setupTests.js.map +1 -0
- package/build/storage/storage.d.ts +38 -0
- package/build/storage/storage.js +700 -0
- package/build/storage/storage.js.map +1 -0
- package/build/storage/storage.test.d.ts +1 -0
- package/build/storage/storage.test.js +338 -0
- package/build/storage/storage.test.js.map +1 -0
- package/build/types/rag.d.ts +27 -0
- package/build/types/rag.js +2 -0
- package/build/types/rag.js.map +1 -0
- package/build/types.d.ts +120 -0
- package/build/types.js +2 -0
- package/build/types.js.map +1 -0
- package/build/util/content-utils.d.ts +31 -0
- package/build/util/content-utils.js +120 -0
- package/build/util/content-utils.js.map +1 -0
- package/build/util/content.d.ts +1 -0
- package/build/util/content.js +16 -0
- package/build/util/content.js.map +1 -0
- package/build/util/docs.d.ts +1 -0
- package/build/util/docs.js +26 -0
- package/build/util/docs.js.map +1 -0
- package/build/util/docs.test.d.ts +1 -0
- package/build/util/docs.test.js +49 -0
- package/build/util/docs.test.js.map +1 -0
- package/build/util/favicon.d.ts +6 -0
- package/build/util/favicon.js +88 -0
- package/build/util/favicon.js.map +1 -0
- package/build/util/favicon.test.d.ts +1 -0
- package/build/util/favicon.test.js +140 -0
- package/build/util/favicon.test.js.map +1 -0
- package/build/util/logger.d.ts +17 -0
- package/build/util/logger.js +72 -0
- package/build/util/logger.js.map +1 -0
- package/build/util/logger.test.d.ts +1 -0
- package/build/util/logger.test.js +46 -0
- package/build/util/logger.test.js.map +1 -0
- package/build/util/security.d.ts +312 -0
- package/build/util/security.js +719 -0
- package/build/util/security.js.map +1 -0
- package/build/util/security.test.d.ts +1 -0
- package/build/util/security.test.js +524 -0
- package/build/util/security.test.js.map +1 -0
- package/build/util/site-detector.d.ts +22 -0
- package/build/util/site-detector.js +42 -0
- package/build/util/site-detector.js.map +1 -0
- package/package.json +112 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-extractor-types.js","sourceRoot":"","sources":["../../src/crawler/content-extractor-types.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { StorybookExtractor } from './storybook-extractor.js';
|
|
2
|
+
import { GitHubPagesExtractor } from './github-pages-extractor.js';
|
|
3
|
+
import { DefaultExtractor } from './default-extractor.js';
|
|
4
|
+
export { ContentExtractor, ExtractedContent } from './content-extractor-types.js';
|
|
5
|
+
export declare const contentExtractors: {
|
|
6
|
+
readonly storybook: StorybookExtractor;
|
|
7
|
+
readonly github: GitHubPagesExtractor;
|
|
8
|
+
readonly default: DefaultExtractor;
|
|
9
|
+
};
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { StorybookExtractor } from './storybook-extractor.js';
|
|
2
|
+
import { GitHubPagesExtractor } from './github-pages-extractor.js';
|
|
3
|
+
import { DefaultExtractor } from './default-extractor.js';
|
|
4
|
+
export const contentExtractors = {
|
|
5
|
+
storybook: new StorybookExtractor(),
|
|
6
|
+
github: new GitHubPagesExtractor(),
|
|
7
|
+
default: new DefaultExtractor(),
|
|
8
|
+
};
|
|
9
|
+
//# sourceMappingURL=content-extractors.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-extractors.js","sourceRoot":"","sources":["../../src/crawler/content-extractors.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,kBAAkB,EAAE,MAAM,0BAA0B,CAAC;AAC9D,OAAO,EAAE,oBAAoB,EAAE,MAAM,6BAA6B,CAAC;AACnE,OAAO,EAAE,gBAAgB,EAAE,MAAM,wBAAwB,CAAC;AAI1D,MAAM,CAAC,MAAM,iBAAiB,GAAG;IAC/B,SAAS,EAAE,IAAI,kBAAkB,EAAE;IACnC,MAAM,EAAE,IAAI,oBAAoB,EAAE;IAClC,OAAO,EAAE,IAAI,gBAAgB,EAAE;CACvB,CAAC"}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
// Utility function for cleaning regular content
|
|
2
|
+
export function cleanContent(text) {
|
|
3
|
+
return text
|
|
4
|
+
.replace(/\\n/g, '\n') // Convert escaped newlines
|
|
5
|
+
.replace(/\r\n/g, '\n') // Normalize line endings
|
|
6
|
+
.replace(/\t/g, ' ') // Convert tabs to spaces
|
|
7
|
+
.replace(/[^\S\n]+/g, ' ') // Replace multiple spaces with single space (except newlines)
|
|
8
|
+
.split('\n')
|
|
9
|
+
.map((line) => line.trimEnd()) // Only trim trailing whitespace, preserve indentation
|
|
10
|
+
.join('\n')
|
|
11
|
+
.replace(/\n{3,}/g, '\n\n') // Max 2 consecutive newlines
|
|
12
|
+
.trim();
|
|
13
|
+
}
|
|
14
|
+
// Utility function specifically for cleaning code blocks
|
|
15
|
+
export function cleanCodeBlock(code) {
|
|
16
|
+
return code
|
|
17
|
+
.replace(/^\s+|\s+$/g, '') // Trim whitespace
|
|
18
|
+
.replace(/\t/g, ' ') // Convert tabs to spaces
|
|
19
|
+
.replace(/\n{3,}/g, '\n\n') // Reduce multiple blank lines
|
|
20
|
+
.replace(/\u00A0/g, ' '); // Replace non-breaking spaces
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=content-utils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-utils.js","sourceRoot":"","sources":["../../src/crawler/content-utils.ts"],"names":[],"mappings":"AAAA,gDAAgD;AAChD,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,OAAO,IAAI;SACR,OAAO,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC,2BAA2B;SACjD,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,yBAAyB;SAChD,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,yBAAyB;SAC9C,OAAO,CAAC,WAAW,EAAE,GAAG,CAAC,CAAC,8DAA8D;SACxF,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,sDAAsD;SACpF,IAAI,CAAC,IAAI,CAAC;SACV,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,6BAA6B;SACxD,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,yDAAyD;AACzD,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI;SACR,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC,kBAAkB;SAC5C,OAAO,CAAC,KAAK,EAAE,IAAI,CAAC,CAAC,yBAAyB;SAC9C,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,8BAA8B;SACzD,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC,CAAC,8BAA8B;AAC5D,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import { cleanContent, cleanCodeBlock } from './content-utils.js';
|
|
2
|
+
describe('Content Utilities', () => {
|
|
3
|
+
describe('cleanContent', () => {
|
|
4
|
+
it('should convert escaped newlines to actual newlines', () => {
|
|
5
|
+
const input = 'Line 1\\nLine 2\\nLine 3';
|
|
6
|
+
const result = cleanContent(input);
|
|
7
|
+
expect(result).toBe('Line 1\nLine 2\nLine 3');
|
|
8
|
+
});
|
|
9
|
+
it('should normalize Windows line endings', () => {
|
|
10
|
+
const input = 'Line 1\r\nLine 2\r\nLine 3';
|
|
11
|
+
const result = cleanContent(input);
|
|
12
|
+
expect(result).toBe('Line 1\nLine 2\nLine 3');
|
|
13
|
+
});
|
|
14
|
+
it('should convert tabs to spaces', () => {
|
|
15
|
+
const input = 'Line 1\tindented';
|
|
16
|
+
const result = cleanContent(input);
|
|
17
|
+
// Tabs are converted to 2 spaces, then multiple spaces become single space
|
|
18
|
+
expect(result).toBe('Line 1 indented');
|
|
19
|
+
});
|
|
20
|
+
it('should replace multiple spaces with single space', () => {
|
|
21
|
+
const input = 'Too many spaces';
|
|
22
|
+
const result = cleanContent(input);
|
|
23
|
+
expect(result).toBe('Too many spaces');
|
|
24
|
+
});
|
|
25
|
+
it('should trim trailing whitespace from each line', () => {
|
|
26
|
+
const input = 'Line 1 \nLine 2 ';
|
|
27
|
+
const result = cleanContent(input);
|
|
28
|
+
expect(result).toBe('Line 1\nLine 2');
|
|
29
|
+
});
|
|
30
|
+
it('should preserve leading indentation', () => {
|
|
31
|
+
const input = ' indented line\n more indented';
|
|
32
|
+
const result = cleanContent(input);
|
|
33
|
+
expect(result).toBe('indented line\n more indented');
|
|
34
|
+
});
|
|
35
|
+
it('should reduce more than 2 consecutive newlines to 2', () => {
|
|
36
|
+
const input = 'Para 1\n\n\n\n\nPara 2';
|
|
37
|
+
const result = cleanContent(input);
|
|
38
|
+
expect(result).toBe('Para 1\n\nPara 2');
|
|
39
|
+
});
|
|
40
|
+
it('should trim leading and trailing whitespace', () => {
|
|
41
|
+
const input = ' \n\nContent here\n\n ';
|
|
42
|
+
const result = cleanContent(input);
|
|
43
|
+
expect(result).toBe('Content here');
|
|
44
|
+
});
|
|
45
|
+
it('should handle empty string', () => {
|
|
46
|
+
expect(cleanContent('')).toBe('');
|
|
47
|
+
});
|
|
48
|
+
it('should handle string with only whitespace', () => {
|
|
49
|
+
expect(cleanContent(' \n\n ')).toBe('');
|
|
50
|
+
});
|
|
51
|
+
it('should handle complex mixed content', () => {
|
|
52
|
+
const input = ' Title \r\n\r\n\r\n\r\n Content with multiple spaces \n\tTabbed';
|
|
53
|
+
const result = cleanContent(input);
|
|
54
|
+
// Leading spaces preserved as single space, tabs converted to spaces then collapsed
|
|
55
|
+
expect(result).toBe('Title\n\n Content with multiple spaces\n Tabbed');
|
|
56
|
+
});
|
|
57
|
+
});
|
|
58
|
+
describe('cleanCodeBlock', () => {
|
|
59
|
+
it('should trim leading and trailing whitespace', () => {
|
|
60
|
+
const input = ' \nconst x = 1;\n ';
|
|
61
|
+
const result = cleanCodeBlock(input);
|
|
62
|
+
expect(result).toBe('const x = 1;');
|
|
63
|
+
});
|
|
64
|
+
it('should convert tabs to spaces', () => {
|
|
65
|
+
const input = 'function test() {\n\treturn true;\n}';
|
|
66
|
+
const result = cleanCodeBlock(input);
|
|
67
|
+
expect(result).toBe('function test() {\n return true;\n}');
|
|
68
|
+
});
|
|
69
|
+
it('should reduce multiple blank lines', () => {
|
|
70
|
+
const input = 'line 1\n\n\n\nline 2';
|
|
71
|
+
const result = cleanCodeBlock(input);
|
|
72
|
+
expect(result).toBe('line 1\n\nline 2');
|
|
73
|
+
});
|
|
74
|
+
it('should replace non-breaking spaces with regular spaces', () => {
|
|
75
|
+
const input = 'const\u00A0x\u00A0=\u00A01;';
|
|
76
|
+
const result = cleanCodeBlock(input);
|
|
77
|
+
expect(result).toBe('const x = 1;');
|
|
78
|
+
});
|
|
79
|
+
it('should handle empty string', () => {
|
|
80
|
+
expect(cleanCodeBlock('')).toBe('');
|
|
81
|
+
});
|
|
82
|
+
it('should handle code with multiple formatting issues', () => {
|
|
83
|
+
const input = '\n\t\tconst x\u00A0= 1;\n\n\n\n\treturn x;\n';
|
|
84
|
+
const result = cleanCodeBlock(input);
|
|
85
|
+
expect(result).toBe('const x = 1;\n\n return x;');
|
|
86
|
+
});
|
|
87
|
+
it('should preserve single blank lines', () => {
|
|
88
|
+
const input = 'line 1\n\nline 2';
|
|
89
|
+
const result = cleanCodeBlock(input);
|
|
90
|
+
expect(result).toBe('line 1\n\nline 2');
|
|
91
|
+
});
|
|
92
|
+
it('should handle code with mixed indentation', () => {
|
|
93
|
+
const input = 'function test() {\n\tif (true) {\n\t\treturn;\n\t}\n}';
|
|
94
|
+
const result = cleanCodeBlock(input);
|
|
95
|
+
expect(result).toBe('function test() {\n if (true) {\n return;\n }\n}');
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
});
|
|
99
|
+
//# sourceMappingURL=content-utils.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content-utils.test.js","sourceRoot":"","sources":["../../src/crawler/content-utils.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AAElE,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;IACjC,QAAQ,CAAC,cAAc,EAAE,GAAG,EAAE;QAC5B,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;YAC5D,MAAM,KAAK,GAAG,0BAA0B,CAAC;YACzC,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;QAChD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,GAAG,EAAE;YAC/C,MAAM,KAAK,GAAG,4BAA4B,CAAC;YAC3C,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;QAChD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACvC,MAAM,KAAK,GAAG,kBAAkB,CAAC;YACjC,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YACnC,2EAA2E;YAC3E,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QACzC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,kDAAkD,EAAE,GAAG,EAAE;YAC1D,MAAM,KAAK,GAAG,wBAAwB,CAAC;YACvC,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QACzC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gDAAgD,EAAE,GAAG,EAAE;YACxD,MAAM,KAAK,GAAG,sBAAsB,CAAC;YACrC,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,CAAC;QACxC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;YAC7C,MAAM,KAAK,GAAG,oCAAoC,CAAC;YACnD,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;QACvD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,qDAAqD,EAAE,GAAG,EAAE;YAC7D,MAAM,KAAK,GAAG,wBAAwB,CAAC;YACvC,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;YACrD,MAAM,KAAK,GAAG,4BAA4B,CAAC;YAC3C,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YACnC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QACtC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;YACpC,MAAM,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;YACnD,MAAM,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAC9C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;YAC7C,MAAM,KAAK,GAAG,0EAA0E,CAAC;YACzF,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YACnC,oFAAoF;YACpF,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,iDAAiD,CAAC,CAAC;QACzE,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;QAC9B,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;YACrD,MAAM,KAAK,GAAG,wBAAwB,CAAC;YACvC,MAAM,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QACtC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACvC,MAAM,KAAK,GAAG,sCAAsC,CAAC;YACrD,MAAM,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;QAC9D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;YAC5C,MAAM,KAAK,GAAG,sBAAsB,CAAC;YACrC,MAAM,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wDAAwD,EAAE,GAAG,EAAE;YAChE,MAAM,KAAK,GAAG,6BAA6B,CAAC;YAC5C,MAAM,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QACtC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,4BAA4B,EAAE,GAAG,EAAE;YACpC,MAAM,CAAC,cAAc,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACtC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;YAC5D,MAAM,KAAK,GAAG,8CAA8C,CAAC;YAC7D,MAAM,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;QACrD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oCAAoC,EAAE,GAAG,EAAE;YAC5C,MAAM,KAAK,GAAG,kBAAkB,CAAC;YACjC,MAAM,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QAC1C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;YACnD,MAAM,KAAK,GAAG,uDAAuD,CAAC;YACtE,MAAM,MAAM,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;QAC/E,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import { CrawlResult } from '../types.js';
|
|
2
|
+
import { BaseCrawler } from './base.js';
|
|
3
|
+
/** Storage state for authentication (cookies and localStorage) */
|
|
4
|
+
export interface StorageState {
|
|
5
|
+
cookies: Array<{
|
|
6
|
+
name: string;
|
|
7
|
+
value: string;
|
|
8
|
+
domain: string;
|
|
9
|
+
path: string;
|
|
10
|
+
expires?: number;
|
|
11
|
+
httpOnly?: boolean;
|
|
12
|
+
secure?: boolean;
|
|
13
|
+
sameSite?: 'Strict' | 'Lax' | 'None';
|
|
14
|
+
}>;
|
|
15
|
+
origins?: Array<{
|
|
16
|
+
origin: string;
|
|
17
|
+
localStorage: Array<{
|
|
18
|
+
name: string;
|
|
19
|
+
value: string;
|
|
20
|
+
}>;
|
|
21
|
+
}>;
|
|
22
|
+
}
|
|
23
|
+
export declare class CrawleeCrawler extends BaseCrawler {
|
|
24
|
+
private crawler;
|
|
25
|
+
private queueManager;
|
|
26
|
+
private storageState?;
|
|
27
|
+
private isFirstPage;
|
|
28
|
+
private sessionExpiredError;
|
|
29
|
+
private expectedUrl;
|
|
30
|
+
/** The allowed hostname for crawling - pages outside this domain are skipped */
|
|
31
|
+
private allowedHostname;
|
|
32
|
+
/** Track pages skipped due to domain mismatch */
|
|
33
|
+
private skippedExternalPages;
|
|
34
|
+
/** Optional path prefix to restrict crawling */
|
|
35
|
+
private pathPrefix?;
|
|
36
|
+
/**
|
|
37
|
+
* Set authentication cookies/localStorage to use when crawling
|
|
38
|
+
*/
|
|
39
|
+
setStorageState(state: StorageState): void;
|
|
40
|
+
/**
|
|
41
|
+
* Check if a URL is within the allowed domain for this crawl.
|
|
42
|
+
* This prevents following redirects or links to external domains.
|
|
43
|
+
*
|
|
44
|
+
* @param url - The URL to check
|
|
45
|
+
* @returns true if the URL is within the allowed domain
|
|
46
|
+
*/
|
|
47
|
+
private isWithinAllowedDomain;
|
|
48
|
+
/**
|
|
49
|
+
* Check if a page appears to be a login/authentication page.
|
|
50
|
+
* This is used to detect expired sessions during crawling.
|
|
51
|
+
*/
|
|
52
|
+
private checkForLoginPage;
|
|
53
|
+
private findContentFrame;
|
|
54
|
+
private evaluateExtractor;
|
|
55
|
+
private extractContent;
|
|
56
|
+
/**
|
|
57
|
+
* Set an optional path prefix to restrict crawling to URLs under this path.
|
|
58
|
+
* Must be called before crawl().
|
|
59
|
+
*/
|
|
60
|
+
setPathPrefix(prefix: string): void;
|
|
61
|
+
crawl(url: string): AsyncGenerator<CrawlResult, void, unknown>;
|
|
62
|
+
abort(): void;
|
|
63
|
+
}
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
import { PlaywrightCrawler } from 'crawlee';
|
|
2
|
+
import { BaseCrawler } from './base.js';
|
|
3
|
+
import { siteRules } from './site-rules.js';
|
|
4
|
+
import { QueueManager } from './queue-manager.js';
|
|
5
|
+
import { getBrowserConfig } from './browser-config.js';
|
|
6
|
+
import { cleanContent } from './content-utils.js';
|
|
7
|
+
import { logger } from '../util/logger.js';
|
|
8
|
+
import { detectLoginPage, isLoginPageUrl, SessionExpiredError } from '../util/security.js';
|
|
9
|
+
export class CrawleeCrawler extends BaseCrawler {
|
|
10
|
+
crawler = null;
|
|
11
|
+
queueManager = new QueueManager();
|
|
12
|
+
storageState;
|
|
13
|
+
isFirstPage = true;
|
|
14
|
+
sessionExpiredError = null;
|
|
15
|
+
expectedUrl = '';
|
|
16
|
+
/** The allowed hostname for crawling - pages outside this domain are skipped */
|
|
17
|
+
allowedHostname = '';
|
|
18
|
+
/** Track pages skipped due to domain mismatch */
|
|
19
|
+
skippedExternalPages = 0;
|
|
20
|
+
/** Optional path prefix to restrict crawling */
|
|
21
|
+
pathPrefix;
|
|
22
|
+
/**
|
|
23
|
+
* Set authentication cookies/localStorage to use when crawling
|
|
24
|
+
*/
|
|
25
|
+
setStorageState(state) {
|
|
26
|
+
this.storageState = state;
|
|
27
|
+
logger.info(`[CrawleeCrawler] Set storage state with ${state.cookies?.length || 0} cookies`);
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Check if a URL is within the allowed domain for this crawl.
|
|
31
|
+
* This prevents following redirects or links to external domains.
|
|
32
|
+
*
|
|
33
|
+
* @param url - The URL to check
|
|
34
|
+
* @returns true if the URL is within the allowed domain
|
|
35
|
+
*/
|
|
36
|
+
isWithinAllowedDomain(url) {
|
|
37
|
+
if (!this.allowedHostname) {
|
|
38
|
+
return true; // No restriction if not set
|
|
39
|
+
}
|
|
40
|
+
try {
|
|
41
|
+
const urlObj = new URL(url);
|
|
42
|
+
const hostname = urlObj.hostname.toLowerCase();
|
|
43
|
+
const allowed = this.allowedHostname.toLowerCase();
|
|
44
|
+
// Exact match
|
|
45
|
+
if (hostname === allowed) {
|
|
46
|
+
return true;
|
|
47
|
+
}
|
|
48
|
+
// Allow subdomains (e.g., docs.example.com when allowed is example.com)
|
|
49
|
+
// But NOT the other way around (github.com is not allowed for *.github.io)
|
|
50
|
+
if (hostname.endsWith('.' + allowed)) {
|
|
51
|
+
return true;
|
|
52
|
+
}
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
catch {
|
|
56
|
+
return false;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Check if a page appears to be a login/authentication page.
|
|
61
|
+
* This is used to detect expired sessions during crawling.
|
|
62
|
+
*/
|
|
63
|
+
async checkForLoginPage(page, currentUrl) {
|
|
64
|
+
// Only check the first page with high scrutiny
|
|
65
|
+
// (subsequent pages being login pages might be intentional navigation)
|
|
66
|
+
if (!this.isFirstPage) {
|
|
67
|
+
return false;
|
|
68
|
+
}
|
|
69
|
+
// Check URL pattern first (fast)
|
|
70
|
+
if (isLoginPageUrl(currentUrl)) {
|
|
71
|
+
logger.warn(`[CrawleeCrawler] First page URL matches login pattern: ${currentUrl}`);
|
|
72
|
+
return true;
|
|
73
|
+
}
|
|
74
|
+
// Check page content
|
|
75
|
+
try {
|
|
76
|
+
const bodyText = await page.evaluate(() => document.body?.textContent || '');
|
|
77
|
+
const pageHtml = await page.content();
|
|
78
|
+
const detection = detectLoginPage(bodyText + pageHtml, currentUrl);
|
|
79
|
+
if (detection.isLoginPage && detection.confidence >= 0.5) {
|
|
80
|
+
logger.warn(`[CrawleeCrawler] First page appears to be a login page (confidence: ${detection.confidence.toFixed(2)})`);
|
|
81
|
+
logger.debug(`[CrawleeCrawler] Detection reasons: ${detection.reasons.join(', ')}`);
|
|
82
|
+
// Store the error for throwing later (can't throw from request handler)
|
|
83
|
+
this.sessionExpiredError = new SessionExpiredError(`Authentication session has expired - crawled page is a login page`, this.expectedUrl, currentUrl, detection);
|
|
84
|
+
return true;
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
catch (error) {
|
|
88
|
+
logger.debug(`[CrawleeCrawler] Error checking for login page:`, error);
|
|
89
|
+
}
|
|
90
|
+
return false;
|
|
91
|
+
}
|
|
92
|
+
async findContentFrame(page) {
|
|
93
|
+
const frames = await page.frames();
|
|
94
|
+
const contentFrames = await Promise.all(frames.map(async (frame) => {
|
|
95
|
+
try {
|
|
96
|
+
const hasContent = await frame
|
|
97
|
+
.evaluate(() => {
|
|
98
|
+
return document.querySelector('.sbdocs-content, #docs-root, .docs-story, [class*="story-"]') !== null;
|
|
99
|
+
})
|
|
100
|
+
.catch(() => false);
|
|
101
|
+
if (hasContent) {
|
|
102
|
+
await Promise.all([
|
|
103
|
+
frame.waitForLoadState('domcontentloaded'),
|
|
104
|
+
frame
|
|
105
|
+
.waitForLoadState('networkidle', { timeout: 5000 })
|
|
106
|
+
.catch(() => logger.debug('Frame network idle timeout - continuing anyway')),
|
|
107
|
+
]);
|
|
108
|
+
return frame;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
catch (error) {
|
|
112
|
+
logger.debug('Error checking frame', { error: String(error) });
|
|
113
|
+
}
|
|
114
|
+
return null;
|
|
115
|
+
}));
|
|
116
|
+
const frame = contentFrames.find((f) => f !== null) || null;
|
|
117
|
+
if (frame) {
|
|
118
|
+
logger.debug('Found content in iframe');
|
|
119
|
+
await new Promise((resolve) => setTimeout(resolve, 2000));
|
|
120
|
+
}
|
|
121
|
+
return frame;
|
|
122
|
+
}
|
|
123
|
+
async evaluateExtractor(context, extractor) {
|
|
124
|
+
const extractorCode = extractor.constructor.toString();
|
|
125
|
+
return context.evaluate(async (code) => {
|
|
126
|
+
const ExtractorClass = new Function(`return ${code}`)();
|
|
127
|
+
const extractor = new ExtractorClass();
|
|
128
|
+
const result = await extractor.extractContent(document);
|
|
129
|
+
return result.content;
|
|
130
|
+
}, extractorCode);
|
|
131
|
+
}
|
|
132
|
+
async extractContent(page, siteType, extractor) {
|
|
133
|
+
let content = '';
|
|
134
|
+
let extractorUsed = extractor.constructor.name;
|
|
135
|
+
try {
|
|
136
|
+
if (siteType === 'storybook') {
|
|
137
|
+
// Try iframe first
|
|
138
|
+
const frame = await this.findContentFrame(page);
|
|
139
|
+
if (frame) {
|
|
140
|
+
content = await this.evaluateExtractor(frame, extractor);
|
|
141
|
+
}
|
|
142
|
+
// Fallback to main page
|
|
143
|
+
if (!content) {
|
|
144
|
+
content = await this.evaluateExtractor(page, extractor);
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
content = await this.evaluateExtractor(page, extractor);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
catch {
|
|
152
|
+
content = await page.evaluate(() => document.body.textContent || '');
|
|
153
|
+
extractorUsed = 'ErrorFallback';
|
|
154
|
+
}
|
|
155
|
+
return { content, extractorUsed };
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Set an optional path prefix to restrict crawling to URLs under this path.
|
|
159
|
+
* Must be called before crawl().
|
|
160
|
+
*/
|
|
161
|
+
setPathPrefix(prefix) {
|
|
162
|
+
this.pathPrefix = prefix;
|
|
163
|
+
logger.info(`[CrawleeCrawler] Path prefix restriction set: ${prefix}`);
|
|
164
|
+
}
|
|
165
|
+
async *crawl(url) {
|
|
166
|
+
logger.debug(`[${this.constructor.name}] Starting crawl of: ${url}`);
|
|
167
|
+
// Reset state for this crawl
|
|
168
|
+
this.isFirstPage = true;
|
|
169
|
+
this.sessionExpiredError = null;
|
|
170
|
+
this.expectedUrl = url;
|
|
171
|
+
this.skippedExternalPages = 0;
|
|
172
|
+
// Extract and store the allowed hostname from the initial URL
|
|
173
|
+
try {
|
|
174
|
+
this.allowedHostname = new URL(url).hostname;
|
|
175
|
+
logger.info(`[CrawleeCrawler] Domain restriction: only crawling pages on ${this.allowedHostname}`);
|
|
176
|
+
}
|
|
177
|
+
catch {
|
|
178
|
+
this.allowedHostname = '';
|
|
179
|
+
}
|
|
180
|
+
await this.queueManager.initialize(url, this.pathPrefix);
|
|
181
|
+
// Build crawler options with optional authentication
|
|
182
|
+
const crawlerOptions = getBrowserConfig(this.queueManager.getRequestQueue() ?? undefined);
|
|
183
|
+
// If we have storage state (auth cookies), configure the browser to use them
|
|
184
|
+
if (this.storageState) {
|
|
185
|
+
logger.info(`[CrawleeCrawler] Using authenticated session with ${this.storageState.cookies?.length || 0} cookies`);
|
|
186
|
+
crawlerOptions.launchContext = {
|
|
187
|
+
...crawlerOptions.launchContext,
|
|
188
|
+
launchOptions: {
|
|
189
|
+
...crawlerOptions.launchContext?.launchOptions,
|
|
190
|
+
},
|
|
191
|
+
};
|
|
192
|
+
crawlerOptions.browserPoolOptions = {
|
|
193
|
+
...crawlerOptions.browserPoolOptions,
|
|
194
|
+
preLaunchHooks: [
|
|
195
|
+
async (pageId) => {
|
|
196
|
+
// Storage state will be set in preNavigationHooks instead
|
|
197
|
+
logger.debug(`[CrawleeCrawler] Browser launching for page ${pageId}`);
|
|
198
|
+
},
|
|
199
|
+
],
|
|
200
|
+
};
|
|
201
|
+
// Add cookies via preNavigationHooks
|
|
202
|
+
const existingHooks = crawlerOptions.preNavigationHooks || [];
|
|
203
|
+
crawlerOptions.preNavigationHooks = [
|
|
204
|
+
...existingHooks,
|
|
205
|
+
async ({ page }) => {
|
|
206
|
+
if (this.storageState?.cookies) {
|
|
207
|
+
logger.debug(`[CrawleeCrawler] Setting ${this.storageState.cookies.length} cookies before navigation`);
|
|
208
|
+
await page.context().addCookies(this.storageState.cookies);
|
|
209
|
+
}
|
|
210
|
+
},
|
|
211
|
+
];
|
|
212
|
+
}
|
|
213
|
+
this.crawler = new PlaywrightCrawler({
|
|
214
|
+
...crawlerOptions,
|
|
215
|
+
requestHandler: async ({ request, page, enqueueLinks, log }) => {
|
|
216
|
+
if (this.isAborting) {
|
|
217
|
+
log.debug('Crawl aborted');
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
try {
|
|
221
|
+
// Wait for initial page load
|
|
222
|
+
await Promise.all([
|
|
223
|
+
page.waitForLoadState('domcontentloaded'),
|
|
224
|
+
page.waitForLoadState('networkidle', { timeout: 5000 }).catch(() => log.debug('Network idle timeout - continuing anyway')),
|
|
225
|
+
]);
|
|
226
|
+
// Get the actual URL after any redirects
|
|
227
|
+
const actualUrl = page.url();
|
|
228
|
+
// Check if the page redirected outside the allowed domain
|
|
229
|
+
if (!this.isWithinAllowedDomain(actualUrl)) {
|
|
230
|
+
const requestedHostname = new URL(request.url).hostname;
|
|
231
|
+
const actualHostname = new URL(actualUrl).hostname;
|
|
232
|
+
if (this.isFirstPage) {
|
|
233
|
+
// First page redirected outside domain - likely auth redirect (session expired)
|
|
234
|
+
logger.warn(`[CrawleeCrawler] First page redirected outside allowed domain: ${requestedHostname} → ${actualHostname}`);
|
|
235
|
+
if (this.storageState) {
|
|
236
|
+
// We had auth but got redirected - session expired
|
|
237
|
+
this.sessionExpiredError = new SessionExpiredError(`Authentication session has expired - page redirected to external domain (${actualHostname})`, this.expectedUrl, actualUrl, { isLoginPage: true, confidence: 1.0, reasons: [`Redirected from ${requestedHostname} to ${actualHostname}`] });
|
|
238
|
+
log.error(`Session expired - redirected to external domain: ${actualHostname}. Aborting crawl.`);
|
|
239
|
+
this.abort();
|
|
240
|
+
return;
|
|
241
|
+
}
|
|
242
|
+
else {
|
|
243
|
+
// No auth but redirected - might be site misconfiguration
|
|
244
|
+
log.error(`First page redirected to external domain: ${actualHostname}. Aborting crawl.`);
|
|
245
|
+
this.abort();
|
|
246
|
+
return;
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
else {
|
|
250
|
+
// Subsequent page redirected outside domain - skip it
|
|
251
|
+
this.skippedExternalPages++;
|
|
252
|
+
log.warning(`Skipping page that redirected outside domain: ${request.url} → ${actualUrl} (skipped ${this.skippedExternalPages} external pages)`);
|
|
253
|
+
return;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
// Check for login page on first page (detects expired sessions)
|
|
257
|
+
if (this.isFirstPage && this.storageState) {
|
|
258
|
+
const isLoginPage = await this.checkForLoginPage(page, actualUrl);
|
|
259
|
+
if (isLoginPage) {
|
|
260
|
+
log.error('Session appears expired - first page is a login page. Aborting crawl.');
|
|
261
|
+
this.abort();
|
|
262
|
+
return;
|
|
263
|
+
}
|
|
264
|
+
this.isFirstPage = false;
|
|
265
|
+
}
|
|
266
|
+
else if (this.isFirstPage) {
|
|
267
|
+
this.isFirstPage = false;
|
|
268
|
+
}
|
|
269
|
+
// Detect site type and get extractor
|
|
270
|
+
for (const rule of siteRules) {
|
|
271
|
+
if (await rule.detect(page)) {
|
|
272
|
+
if (rule.prepare) {
|
|
273
|
+
await rule.prepare(page, log);
|
|
274
|
+
}
|
|
275
|
+
await this.queueManager.handleQueueAndLinks(enqueueLinks, log, rule);
|
|
276
|
+
const title = await page.title();
|
|
277
|
+
const { content, extractorUsed } = await this.extractContent(page, rule.type, rule.extractor);
|
|
278
|
+
const result = {
|
|
279
|
+
url: request.url,
|
|
280
|
+
path: new URL(request.url).pathname + new URL(request.url).search,
|
|
281
|
+
content: cleanContent(content),
|
|
282
|
+
title,
|
|
283
|
+
extractorUsed,
|
|
284
|
+
};
|
|
285
|
+
this.queueManager.addResult(result);
|
|
286
|
+
this.markUrlProcessed(request.url);
|
|
287
|
+
break;
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
catch (error) {
|
|
292
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
293
|
+
log.error(`Error processing ${request.url}: ${errorMessage}`);
|
|
294
|
+
}
|
|
295
|
+
},
|
|
296
|
+
});
|
|
297
|
+
try {
|
|
298
|
+
const crawlerPromise = this.crawler.run();
|
|
299
|
+
while (!this.isAborting) {
|
|
300
|
+
if (this.queueManager.hasEnoughResults()) {
|
|
301
|
+
for (const result of await this.queueManager.processBatch()) {
|
|
302
|
+
yield result;
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
if (await Promise.race([crawlerPromise.then(() => true), new Promise((resolve) => setTimeout(() => resolve(false), 100))]))
|
|
306
|
+
break;
|
|
307
|
+
}
|
|
308
|
+
await crawlerPromise;
|
|
309
|
+
logger.debug('Crawler finished');
|
|
310
|
+
// Log summary of domain-restricted crawling
|
|
311
|
+
if (this.skippedExternalPages > 0) {
|
|
312
|
+
logger.warn(`[CrawleeCrawler] Skipped ${this.skippedExternalPages} pages that redirected outside the allowed domain (${this.allowedHostname})`);
|
|
313
|
+
}
|
|
314
|
+
// Check if we detected an expired session during crawling
|
|
315
|
+
if (this.sessionExpiredError) {
|
|
316
|
+
throw this.sessionExpiredError;
|
|
317
|
+
}
|
|
318
|
+
for (const result of await this.queueManager.processBatch()) {
|
|
319
|
+
yield result;
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
catch (error) {
|
|
323
|
+
// Re-throw session expired errors as-is
|
|
324
|
+
if (error instanceof SessionExpiredError) {
|
|
325
|
+
throw error;
|
|
326
|
+
}
|
|
327
|
+
logger.debug('Crawler error:', error);
|
|
328
|
+
throw error;
|
|
329
|
+
}
|
|
330
|
+
finally {
|
|
331
|
+
await this.queueManager.cleanup();
|
|
332
|
+
this.crawler = null;
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
abort() {
|
|
336
|
+
super.abort();
|
|
337
|
+
if (this.crawler) {
|
|
338
|
+
this.crawler.teardown().catch((err) => logger.error('Failed to teardown crawler:', err));
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
//# sourceMappingURL=crawlee-crawler.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"crawlee-crawler.js","sourceRoot":"","sources":["../../src/crawler/crawlee-crawler.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,iBAAiB,EAAE,MAAM,SAAS,CAAC;AAE5C,OAAO,EAAE,WAAW,EAAE,MAAM,WAAW,CAAC;AAExC,OAAO,EAAE,SAAS,EAAE,MAAM,iBAAiB,CAAC;AAE5C,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,gBAAgB,EAAE,MAAM,qBAAqB,CAAC;AACvD,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAClD,OAAO,EAAE,MAAM,EAAE,MAAM,mBAAmB,CAAC;AAC3C,OAAO,EAAE,eAAe,EAAE,cAAc,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAoB3F,MAAM,OAAO,cAAe,SAAQ,WAAW;IACrC,OAAO,GAA6B,IAAI,CAAC;IACzC,YAAY,GAAiB,IAAI,YAAY,EAAE,CAAC;IAChD,YAAY,CAAgB;IAC5B,WAAW,GAAY,IAAI,CAAC;IAC5B,mBAAmB,GAA+B,IAAI,CAAC;IACvD,WAAW,GAAW,EAAE,CAAC;IACjC,gFAAgF;IACxE,eAAe,GAAW,EAAE,CAAC;IACrC,iDAAiD;IACzC,oBAAoB,GAAW,CAAC,CAAC;IACzC,gDAAgD;IACxC,UAAU,CAAU;IAE5B;;OAEG;IACH,eAAe,CAAC,KAAmB;QACjC,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;QAC1B,MAAM,CAAC,IAAI,CAAC,2CAA2C,KAAK,CAAC,OAAO,EAAE,MAAM,IAAI,CAAC,UAAU,CAAC,CAAC;IAC/F,CAAC;IAED;;;;;;OAMG;IACK,qBAAqB,CAAC,GAAW;QACvC,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;YAC1B,OAAO,IAAI,CAAC,CAAC,4BAA4B;QAC3C,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YAC5B,MAAM,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;YAC/C,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,WAAW,EAAE,CAAC;YAEnD,cAAc;YACd,IAAI,QAAQ,KAAK,OAAO,EAAE,CAAC;gBACzB,OAAO,IAAI,CAAC;YACd,CAAC;YAED,wEAAwE;YACxE,2EAA2E;YAC3E,IAAI,QAAQ,CAAC,QAAQ,CAAC,GAAG,GAAG,OAAO,CAAC,EAAE,CAAC;gBACrC,OAAO,IAAI,CAAC;YACd,CAAC;YAED,OAAO,KAAK,CAAC;QACf,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,iBAAiB,CAAC,IAAU,EAAE,UAAkB;QAC5D,+CAA+C;QAC/C,uEAAuE;QACvE,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,OAAO,KAAK,CAAC;QACf,CAAC;QAED,iCAAiC;QACjC,IAAI,cAAc,CAAC,UAAU,CAAC,EAAE,CAAC;YAC/B,MAAM,CAAC,IAAI,CAAC,0DAA0D,UAAU,EAAE,CAAC,CAAC;YACpF,OAAO,IAAI,CAAC;QACd,CAAC;QAED,qBAAqB;QACrB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE,CAAC,QAAQ,CAAC,IAAI,EAAE,WAAW,IAAI,EAAE,CAAC,CAAC;YAC7E,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YACtC,MAAM,SAAS,GAAG,eAAe,CAAC,QAAQ,GAAG,QAAQ,EAAE,UAAU,CAAC,CAAC;YAEnE,IAAI,SAAS,CAAC,WAAW,IAAI,SAAS,CAAC,UAAU,IAAI,GAAG,EAAE,CAAC;gBACzD,MAAM,CAAC,IAAI,CAAC,uEAAuE,SAAS,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACvH,MAAM,CAAC,KAAK,CAAC,uCAAuC,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBAEpF,wEAAwE;gBACxE,IAAI,CAAC,mBAAmB,GAAG,IAAI,mBAAmB,CAChD,mEAAmE,EACnE,IAAI,CAAC,WAAW,EAChB,UAAU,EACV,SAAS,CACV,CAAC;gBACF,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,iDAAiD,EAAE,KAAK,CAAC,CAAC;QACzE,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,KAAK,CAAC,gBAAgB,CAAC,IAAU;QACvC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,MAAM,EAAE,CAAC;QACnC,MAAM,aAAa,GAAG,MAAM,OAAO,CAAC,GAAG,CACrC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;YACzB,IAAI,CAAC;gBACH,MAAM,UAAU,GAAG,MAAM,KAAK;qBAC3B,QAAQ,CAAC,GAAG,EAAE;oBACb,OAAO,QAAQ,CAAC,aAAa,CAAC,6DAA6D,CAAC,KAAK,IAAI,CAAC;gBACxG,CAAC,CAAC;qBACD,KAAK,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;gBAEtB,IAAI,UAAU,EAAE,CAAC;oBACf,MAAM,OAAO,CAAC,GAAG,CAAC;wBAChB,KAAK,CAAC,gBAAgB,CAAC,kBAAkB,CAAC;wBAC1C,KAAK;6BACF,gBAAgB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;6BAClD,KAAK,CAAC,GAAG,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,gDAAgD,CAAC,CAAC;qBAC/E,CAAC,CAAC;oBACH,OAAO,KAAK,CAAC;gBACf,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,CAAC,KAAK,CAAC,sBAAsB,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YACjE,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,IAAI,IAAI,CAAC;QAC5D,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;YACxC,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;QAC5D,CAAC;QACD,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,KAAK,CAAC,iBAAiB,CAAC,OAAqB,EAAE,SAA2B;QAChF,MAAM,aAAa,GAAG,SAAS,CAAC,WAAW,CAAC,QAAQ,EAAE,CAAC;QACvD,OAAO,OAAO,CAAC,QAAQ,CAAC,KAAK,EAAE,IAAY,EAAE,EAAE;YAC7C,MAAM,cAAc,GAAG,IAAI,QAAQ,CAAC,UAAU,IAAI,EAAE,CAAC,EAAE,CAAC;YACxD,MAAM,SAAS,GAAG,IAAI,cAAc,EAAE,CAAC;YACvC,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;YACxD,OAAO,MAAM,CAAC,OAAO,CAAC;QACxB,CAAC,EAAE,aAAa,CAAC,CAAC;IACpB,CAAC;IAEO,KAAK,CAAC,cAAc,CAC1B,IAAU,EACV,QAAgB,EAChB,SAA2B;QAE3B,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,IAAI,aAAa,GAAG,SAAS,CAAC,WAAW,CAAC,IAAI,CAAC;QAE/C,IAAI,CAAC;YACH,IAAI,QAAQ,KAAK,WAAW,EAAE,CAAC;gBAC7B,mBAAmB;gBACnB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;gBAChD,IAAI,KAAK,EAAE,CAAC;oBACV,OAAO,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;gBAC3D,CAAC;gBAED,wBAAwB;gBACxB,IAAI,CAAC,OAAO,EAAE,CAAC;oBACb,OAAO,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;gBAC1D,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,OAAO,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;YAC1D,CAAC;QACH,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAS,GAAG,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;YAC7E,aAAa,GAAG,eAAe,CAAC;QAClC,CAAC;QAED,OAAO,EAAE,OAAO,EAAE,aAAa,EAAE,CAAC;IACpC,CAAC;IAED;;;OAGG;IACH,aAAa,CAAC,MAAc;QAC1B,IAAI,CAAC,UAAU,GAAG,MAAM,CAAC;QACzB,MAAM,CAAC,IAAI,CAAC,iDAAiD,MAAM,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,KAAK,CAAC,CAAC,KAAK,CAAC,GAAW;QACtB,MAAM,CAAC,KAAK,CAAC,IAAI,IAAI,CAAC,WAAW,CAAC,IAAI,wBAAwB,GAAG,EAAE,CAAC,CAAC;QAErE,6BAA6B;QAC7B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QACxB,IAAI,CAAC,mBAAmB,GAAG,IAAI,CAAC;QAChC,IAAI,CAAC,WAAW,GAAG,GAAG,CAAC;QACvB,IAAI,CAAC,oBAAoB,GAAG,CAAC,CAAC;QAE9B,8DAA8D;QAC9D,IAAI,CAAC;YACH,IAAI,CAAC,eAAe,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YAC7C,MAAM,CAAC,IAAI,CAAC,+DAA+D,IAAI,CAAC,eAAe,EAAE,CAAC,CAAC;QACrG,CAAC;QAAC,MAAM,CAAC;YACP,IAAI,CAAC,eAAe,GAAG,EAAE,CAAC;QAC5B,CAAC;QAED,MAAM,IAAI,CAAC,YAAY,CAAC,UAAU,CAAC,GAAG,EAAE,IAAI,CAAC,UAAU,CAAC,CAAC;QAEzD,qDAAqD;QACrD,MAAM,cAAc,GAAG,gBAAgB,CAAC,IAAI,CAAC,YAAY,CAAC,eAAe,EAAE,IAAI,SAAS,CAAC,CAAC;QAE1F,6EAA6E;QAC7E,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;YACtB,MAAM,CAAC,IAAI,CAAC,qDAAqD,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,MAAM,IAAI,CAAC,UAAU,CAAC,CAAC;YACnH,cAAc,CAAC,aAAa,GAAG;gBAC7B,GAAG,cAAc,CAAC,aAAa;gBAC/B,aAAa,EAAE;oBACb,GAAG,cAAc,CAAC,aAAa,EAAE,aAAa;iBAC/C;aACF,CAAC;YACF,cAAc,CAAC,kBAAkB,GAAG;gBAClC,GAAG,cAAc,CAAC,kBAAkB;gBACpC,cAAc,EAAE;oBACd,KAAK,EAAE,MAAM,EAAE,EAAE;wBACf,0DAA0D;wBAC1D,MAAM,CAAC,KAAK,CAAC,+CAA+C,MAAM,EAAE,CAAC,CAAC;oBACxE,CAAC;iBACF;aACF,CAAC;YACF,qCAAqC;YACrC,MAAM,aAAa,GAAG,cAAc,CAAC,kBAAkB,IAAI,EAAE,CAAC;YAC9D,cAAc,CAAC,kBAAkB,GAAG;gBAClC,GAAG,aAAa;gBAChB,KAAK,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;oBACjB,IAAI,IAAI,CAAC,YAAY,EAAE,OAAO,EAAE,CAAC;wBAC/B,MAAM,CAAC,KAAK,CAAC,4BAA4B,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,MAAM,4BAA4B,CAAC,CAAC;wBACvG,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC,UAAU,CAAC,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;oBAC7D,CAAC;gBACH,CAAC;aACF,CAAC;QACJ,CAAC;QAED,IAAI,CAAC,OAAO,GAAG,IAAI,iBAAiB,CAAC;YACnC,GAAG,cAAc;YACjB,cAAc,EAAE,KAAK,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,GAAG,EAAE,EAAE,EAAE;gBAC7D,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;oBACpB,GAAG,CAAC,KAAK,CAAC,eAAe,CAAC,CAAC;oBAC3B,OAAO;gBACT,CAAC;gBAED,IAAI,CAAC;oBACH,6BAA6B;oBAC7B,MAAM,OAAO,CAAC,GAAG,CAAC;wBAChB,IAAI,CAAC,gBAAgB,CAAC,kBAAkB,CAAC;wBACzC,IAAI,CAAC,gBAAgB,CAAC,aAAa,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,KAAK,CAAC,0CAA0C,CAAC,CAAC;qBAC3H,CAAC,CAAC;oBAEH,yCAAyC;oBACzC,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;oBAE7B,0DAA0D;oBAC1D,IAAI,CAAC,IAAI,CAAC,qBAAqB,CAAC,SAAS,CAAC,EAAE,CAAC;wBAC3C,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;wBACxD,MAAM,cAAc,GAAG,IAAI,GAAG,CAAC,SAAS,CAAC,CAAC,QAAQ,CAAC;wBAEnD,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;4BACrB,gFAAgF;4BAChF,MAAM,CAAC,IAAI,CAAC,kEAAkE,iBAAiB,MAAM,cAAc,EAAE,CAAC,CAAC;4BAEvH,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;gCACtB,mDAAmD;gCACnD,IAAI,CAAC,mBAAmB,GAAG,IAAI,mBAAmB,CAChD,4EAA4E,cAAc,GAAG,EAC7F,IAAI,CAAC,WAAW,EAChB,SAAS,EACT,EAAE,WAAW,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,mBAAmB,iBAAiB,OAAO,cAAc,EAAE,CAAC,EAAE,CAC/G,CAAC;gCACF,GAAG,CAAC,KAAK,CAAC,oDAAoD,cAAc,mBAAmB,CAAC,CAAC;gCACjG,IAAI,CAAC,KAAK,EAAE,CAAC;gCACb,OAAO;4BACT,CAAC;iCAAM,CAAC;gCACN,0DAA0D;gCAC1D,GAAG,CAAC,KAAK,CAAC,6CAA6C,cAAc,mBAAmB,CAAC,CAAC;gCAC1F,IAAI,CAAC,KAAK,EAAE,CAAC;gCACb,OAAO;4BACT,CAAC;wBACH,CAAC;6BAAM,CAAC;4BACN,sDAAsD;4BACtD,IAAI,CAAC,oBAAoB,EAAE,CAAC;4BAC5B,GAAG,CAAC,OAAO,CACT,iDAAiD,OAAO,CAAC,GAAG,MAAM,SAAS,aAAa,IAAI,CAAC,oBAAoB,kBAAkB,CACpI,CAAC;4BACF,OAAO;wBACT,CAAC;oBACH,CAAC;oBAED,gEAAgE;oBAChE,IAAI,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,YAAY,EAAE,CAAC;wBAC1C,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;wBAClE,IAAI,WAAW,EAAE,CAAC;4BAChB,GAAG,CAAC,KAAK,CAAC,uEAAuE,CAAC,CAAC;4BACnF,IAAI,CAAC,KAAK,EAAE,CAAC;4BACb,OAAO;wBACT,CAAC;wBACD,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;oBAC3B,CAAC;yBAAM,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;wBAC5B,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;oBAC3B,CAAC;oBAED,qCAAqC;oBACrC,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;wBAC7B,IAAI,MAAM,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC;4BAC5B,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;gCACjB,MAAM,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;4BAChC,CAAC;4BAED,MAAM,IAAI,CAAC,YAAY,CAAC,mBAAmB,CAAC,YAAY,EAAE,GAAG,EAAE,IAAI,CAAC,CAAC;4BAErE,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;4BACjC,MAAM,EAAE,OAAO,EAAE,aAAa,EAAE,GAAG,MAAM,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;4BAE9F,MAAM,MAAM,GAAgB;gCAC1B,GAAG,EAAE,OAAO,CAAC,GAAG;gCAChB,IAAI,EAAE,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,QAAQ,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM;gCACjE,OAAO,EAAE,YAAY,CAAC,OAAO,CAAC;gCAC9B,KAAK;gCACL,aAAa;6BACd,CAAC;4BAEF,IAAI,CAAC,YAAY,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;4BACpC,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;4BACnC,MAAM;wBACR,CAAC;oBACH,CAAC;gBACH,CAAC;gBAAC,OAAO,KAAK,EAAE,CAAC;oBACf,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;oBAC5E,GAAG,CAAC,KAAK,CAAC,oBAAoB,OAAO,CAAC,GAAG,KAAK,YAAY,EAAE,CAAC,CAAC;gBAChE,CAAC;YACH,CAAC;SACF,CAAC,CAAC;QAEH,IAAI,CAAC;YACH,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,EAAE,CAAC;YAE1C,OAAO,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;gBACxB,IAAI,IAAI,CAAC,YAAY,CAAC,gBAAgB,EAAE,EAAE,CAAC;oBACzC,KAAK,MAAM,MAAM,IAAI,MAAM,IAAI,CAAC,YAAY,CAAC,YAAY,EAAE,EAAE,CAAC;wBAC5D,MAAM,MAAM,CAAC;oBACf,CAAC;gBACH,CAAC;gBAED,IAAI,MAAM,OAAO,CAAC,IAAI,CAAC,CAAC,cAAc,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,EAAE,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC;oBAAE,MAAM;YACpI,CAAC;YAED,MAAM,cAAc,CAAC;YACrB,MAAM,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;YAEjC,4CAA4C;YAC5C,IAAI,IAAI,CAAC,oBAAoB,GAAG,CAAC,EAAE,CAAC;gBAClC,MAAM,CAAC,IAAI,CACT,4BAA4B,IAAI,CAAC,oBAAoB,sDAAsD,IAAI,CAAC,eAAe,GAAG,CACnI,CAAC;YACJ,CAAC;YAED,0DAA0D;YAC1D,IAAI,IAAI,CAAC,mBAAmB,EAAE,CAAC;gBAC7B,MAAM,IAAI,CAAC,mBAAmB,CAAC;YACjC,CAAC;YAED,KAAK,MAAM,MAAM,IAAI,MAAM,IAAI,CAAC,YAAY,CAAC,YAAY,EAAE,EAAE,CAAC;gBAC5D,MAAM,MAAM,CAAC;YACf,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,wCAAwC;YACxC,IAAI,KAAK,YAAY,mBAAmB,EAAE,CAAC;gBACzC,MAAM,KAAK,CAAC;YACd,CAAC;YACD,MAAM,CAAC,KAAK,CAAC,gBAAgB,EAAE,KAAK,CAAC,CAAC;YACtC,MAAM,KAAK,CAAC;QACd,CAAC;gBAAS,CAAC;YACT,MAAM,IAAI,CAAC,YAAY,CAAC,OAAO,EAAE,CAAC;YAClC,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;IAED,KAAK;QACH,KAAK,CAAC,KAAK,EAAE,CAAC;QACd,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,IAAI,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,6BAA6B,EAAE,GAAG,CAAC,CAAC,CAAC;QAC3F,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|