@staticn0va/wigolo 0.6.3 → 0.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -19
- package/SKILL.md +30 -8
- package/assets/blocks/claude-code/CLAUDE.md.block +20 -0
- package/assets/blocks/claude-code/wigolo-command.md +40 -0
- package/assets/blocks/cursor/wigolo.mdc +46 -0
- package/assets/blocks/gemini-cli/GEMINI.md.block +18 -0
- package/assets/blocks/vscode/copilot-instructions.md.block +18 -0
- package/assets/skills/wigolo/SKILL.md +50 -0
- package/assets/skills/wigolo/rules/cache-first.md +30 -0
- package/assets/skills/wigolo/rules/synthesis.md +43 -0
- package/assets/skills/wigolo-agent/SKILL.md +73 -0
- package/assets/skills/wigolo-crawl/SKILL.md +60 -0
- package/assets/skills/wigolo-extract/SKILL.md +59 -0
- package/assets/skills/wigolo-fetch/SKILL.md +65 -0
- package/assets/skills/wigolo-find-similar/SKILL.md +72 -0
- package/assets/skills/wigolo-research/SKILL.md +77 -0
- package/assets/skills/wigolo-search/SKILL.md +78 -0
- package/dist/agent/pipeline.js +3 -3
- package/dist/agent/pipeline.js.map +1 -1
- package/dist/cache/store.d.ts.map +1 -1
- package/dist/cache/store.js +44 -33
- package/dist/cache/store.js.map +1 -1
- package/dist/cli/agents/antigravity.d.ts +20 -0
- package/dist/cli/agents/antigravity.d.ts.map +1 -0
- package/dist/cli/agents/antigravity.js +56 -0
- package/dist/cli/agents/antigravity.js.map +1 -0
- package/dist/cli/agents/claude-code.d.ts +25 -0
- package/dist/cli/agents/claude-code.d.ts.map +1 -0
- package/dist/cli/agents/claude-code.js +117 -0
- package/dist/cli/agents/claude-code.js.map +1 -0
- package/dist/cli/agents/cursor.d.ts +21 -0
- package/dist/cli/agents/cursor.d.ts.map +1 -0
- package/dist/cli/agents/cursor.js +57 -0
- package/dist/cli/agents/cursor.js.map +1 -0
- package/dist/cli/agents/gemini-cli.d.ts +21 -0
- package/dist/cli/agents/gemini-cli.d.ts.map +1 -0
- package/dist/cli/agents/gemini-cli.js +55 -0
- package/dist/cli/agents/gemini-cli.js.map +1 -0
- package/dist/cli/agents/registry.d.ts +21 -0
- package/dist/cli/agents/registry.d.ts.map +1 -0
- package/dist/cli/agents/registry.js +20 -0
- package/dist/cli/agents/registry.js.map +1 -0
- package/dist/cli/agents/utils.d.ts +26 -0
- package/dist/cli/agents/utils.d.ts.map +1 -0
- package/dist/cli/agents/utils.js +151 -0
- package/dist/cli/agents/utils.js.map +1 -0
- package/dist/cli/agents/vscode.d.ts +21 -0
- package/dist/cli/agents/vscode.d.ts.map +1 -0
- package/dist/cli/agents/vscode.js +58 -0
- package/dist/cli/agents/vscode.js.map +1 -0
- package/dist/cli/index.d.ts +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +13 -1
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init.d.ts +2 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +200 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/setup-mcp.d.ts +2 -0
- package/dist/cli/setup-mcp.d.ts.map +1 -0
- package/dist/cli/setup-mcp.js +116 -0
- package/dist/cli/setup-mcp.js.map +1 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +32 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/cli/tui/agents-types.d.ts +28 -0
- package/dist/cli/tui/agents-types.d.ts.map +1 -0
- package/dist/cli/tui/agents-types.js +2 -0
- package/dist/cli/tui/agents-types.js.map +1 -0
- package/dist/cli/tui/agents.d.ts +11 -0
- package/dist/cli/tui/agents.d.ts.map +1 -0
- package/dist/cli/tui/agents.js +101 -0
- package/dist/cli/tui/agents.js.map +1 -0
- package/dist/cli/tui/banner.d.ts +3 -0
- package/dist/cli/tui/banner.d.ts.map +1 -0
- package/dist/cli/tui/banner.js +25 -0
- package/dist/cli/tui/banner.js.map +1 -0
- package/dist/cli/tui/components/AgentSelect.d.ts +13 -0
- package/dist/cli/tui/components/AgentSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/AgentSelect.js +88 -0
- package/dist/cli/tui/components/AgentSelect.js.map +1 -0
- package/dist/cli/tui/components/Banner.d.ts +6 -0
- package/dist/cli/tui/components/Banner.d.ts.map +1 -0
- package/dist/cli/tui/components/Banner.js +15 -0
- package/dist/cli/tui/components/Banner.js.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts +7 -0
- package/dist/cli/tui/components/BrowserSelect.d.ts.map +1 -0
- package/dist/cli/tui/components/BrowserSelect.js +12 -0
- package/dist/cli/tui/components/BrowserSelect.js.map +1 -0
- package/dist/cli/tui/components/InstallProgress.d.ts +9 -0
- package/dist/cli/tui/components/InstallProgress.d.ts.map +1 -0
- package/dist/cli/tui/components/InstallProgress.js +34 -0
- package/dist/cli/tui/components/InstallProgress.js.map +1 -0
- package/dist/cli/tui/components/SkillInstall.d.ts +14 -0
- package/dist/cli/tui/components/SkillInstall.d.ts.map +1 -0
- package/dist/cli/tui/components/SkillInstall.js +80 -0
- package/dist/cli/tui/components/SkillInstall.js.map +1 -0
- package/dist/cli/tui/components/Summary.d.ts +22 -0
- package/dist/cli/tui/components/Summary.d.ts.map +1 -0
- package/dist/cli/tui/components/Summary.js +19 -0
- package/dist/cli/tui/components/Summary.js.map +1 -0
- package/dist/cli/tui/components/SystemCheck.d.ts +8 -0
- package/dist/cli/tui/components/SystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/components/SystemCheck.js +36 -0
- package/dist/cli/tui/components/SystemCheck.js.map +1 -0
- package/dist/cli/tui/components/Verification.d.ts +8 -0
- package/dist/cli/tui/components/Verification.d.ts.map +1 -0
- package/dist/cli/tui/components/Verification.js +31 -0
- package/dist/cli/tui/components/Verification.js.map +1 -0
- package/dist/cli/tui/config-writer-cli.d.ts +12 -0
- package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-cli.js +33 -0
- package/dist/cli/tui/config-writer-cli.js.map +1 -0
- package/dist/cli/tui/config-writer-json.d.ts +16 -0
- package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-json.js +89 -0
- package/dist/cli/tui/config-writer-json.js.map +1 -0
- package/dist/cli/tui/config-writer-toml.d.ts +16 -0
- package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-toml.js +88 -0
- package/dist/cli/tui/config-writer-toml.js.map +1 -0
- package/dist/cli/tui/config-writer.d.ts +25 -0
- package/dist/cli/tui/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/config-writer.js +98 -0
- package/dist/cli/tui/config-writer.js.map +1 -0
- package/dist/cli/tui/detect-helpers.d.ts +6 -0
- package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
- package/dist/cli/tui/detect-helpers.js +44 -0
- package/dist/cli/tui/detect-helpers.js.map +1 -0
- package/dist/cli/tui/flags-types.d.ts +19 -0
- package/dist/cli/tui/flags-types.d.ts.map +1 -0
- package/dist/cli/tui/flags-types.js +19 -0
- package/dist/cli/tui/flags-types.js.map +1 -0
- package/dist/cli/tui/flags.d.ts +5 -0
- package/dist/cli/tui/flags.d.ts.map +1 -0
- package/dist/cli/tui/flags.js +124 -0
- package/dist/cli/tui/flags.js.map +1 -0
- package/dist/cli/tui/format.d.ts +14 -0
- package/dist/cli/tui/format.d.ts.map +1 -0
- package/dist/cli/tui/format.js +28 -0
- package/dist/cli/tui/format.js.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts +6 -0
- package/dist/cli/tui/hooks/useAgentDetect.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useAgentDetect.js +18 -0
- package/dist/cli/tui/hooks/useAgentDetect.js.map +1 -0
- package/dist/cli/tui/hooks/useInstall.d.ts +14 -0
- package/dist/cli/tui/hooks/useInstall.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useInstall.js +70 -0
- package/dist/cli/tui/hooks/useInstall.js.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts +13 -0
- package/dist/cli/tui/hooks/useSystemCheck.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useSystemCheck.js +97 -0
- package/dist/cli/tui/hooks/useSystemCheck.js.map +1 -0
- package/dist/cli/tui/hooks/useVerify.d.ts +14 -0
- package/dist/cli/tui/hooks/useVerify.d.ts.map +1 -0
- package/dist/cli/tui/hooks/useVerify.js +52 -0
- package/dist/cli/tui/hooks/useVerify.js.map +1 -0
- package/dist/cli/tui/ink-init.d.ts +2 -0
- package/dist/cli/tui/ink-init.d.ts.map +1 -0
- package/dist/cli/tui/ink-init.js +125 -0
- package/dist/cli/tui/ink-init.js.map +1 -0
- package/dist/cli/tui/reporter-auto.d.ts +7 -0
- package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
- package/dist/cli/tui/reporter-auto.js +15 -0
- package/dist/cli/tui/reporter-auto.js.map +1 -0
- package/dist/cli/tui/reporter.d.ts +26 -0
- package/dist/cli/tui/reporter.d.ts.map +1 -0
- package/dist/cli/tui/reporter.js +31 -0
- package/dist/cli/tui/reporter.js.map +1 -0
- package/dist/cli/tui/run-command.d.ts +14 -0
- package/dist/cli/tui/run-command.d.ts.map +1 -0
- package/dist/cli/tui/run-command.js +73 -0
- package/dist/cli/tui/run-command.js.map +1 -0
- package/dist/cli/tui/select-agents.d.ts +6 -0
- package/dist/cli/tui/select-agents.d.ts.map +1 -0
- package/dist/cli/tui/select-agents.js +28 -0
- package/dist/cli/tui/select-agents.js.map +1 -0
- package/dist/cli/tui/status-agents.d.ts +11 -0
- package/dist/cli/tui/status-agents.d.ts.map +1 -0
- package/dist/cli/tui/status-agents.js +53 -0
- package/dist/cli/tui/status-agents.js.map +1 -0
- package/dist/cli/tui/status-cache.d.ts +6 -0
- package/dist/cli/tui/status-cache.d.ts.map +1 -0
- package/dist/cli/tui/status-cache.js +39 -0
- package/dist/cli/tui/status-cache.js.map +1 -0
- package/dist/cli/tui/status-format.d.ts +15 -0
- package/dist/cli/tui/status-format.d.ts.map +1 -0
- package/dist/cli/tui/status-format.js +45 -0
- package/dist/cli/tui/status-format.js.map +1 -0
- package/dist/cli/tui/status-python.d.ts +7 -0
- package/dist/cli/tui/status-python.d.ts.map +1 -0
- package/dist/cli/tui/status-python.js +24 -0
- package/dist/cli/tui/status-python.js.map +1 -0
- package/dist/cli/tui/system-check.d.ts +24 -0
- package/dist/cli/tui/system-check.d.ts.map +1 -0
- package/dist/cli/tui/system-check.js +101 -0
- package/dist/cli/tui/system-check.js.map +1 -0
- package/dist/cli/tui/tui-reporter.d.ts +19 -0
- package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
- package/dist/cli/tui/tui-reporter.js +94 -0
- package/dist/cli/tui/tui-reporter.js.map +1 -0
- package/dist/cli/tui/utils/config-writer.d.ts +3 -0
- package/dist/cli/tui/utils/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/utils/config-writer.js +20 -0
- package/dist/cli/tui/utils/config-writer.js.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts +3 -0
- package/dist/cli/tui/utils/suppress-logs.d.ts.map +1 -0
- package/dist/cli/tui/utils/suppress-logs.js +7 -0
- package/dist/cli/tui/utils/suppress-logs.js.map +1 -0
- package/dist/cli/tui/verify-suggestions.d.ts +5 -0
- package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
- package/dist/cli/tui/verify-suggestions.js +22 -0
- package/dist/cli/tui/verify-suggestions.js.map +1 -0
- package/dist/cli/tui/verify.d.ts +16 -0
- package/dist/cli/tui/verify.d.ts.map +1 -0
- package/dist/cli/tui/verify.js +112 -0
- package/dist/cli/tui/verify.js.map +1 -0
- package/dist/cli/tui/version.d.ts +2 -0
- package/dist/cli/tui/version.d.ts.map +1 -0
- package/dist/cli/tui/version.js +12 -0
- package/dist/cli/tui/version.js.map +1 -0
- package/dist/cli/uninstall.d.ts +2 -0
- package/dist/cli/uninstall.d.ts.map +1 -0
- package/dist/cli/uninstall.js +50 -0
- package/dist/cli/uninstall.js.map +1 -0
- package/dist/cli/warmup.d.ts +2 -1
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +147 -208
- package/dist/cli/warmup.js.map +1 -1
- package/dist/daemon/http-server.js +1 -1
- package/dist/daemon/http-server.js.map +1 -1
- package/dist/embedding/embed.d.ts +2 -1
- package/dist/embedding/embed.d.ts.map +1 -1
- package/dist/embedding/embed.js +18 -3
- package/dist/embedding/embed.js.map +1 -1
- package/dist/extraction/extract.d.ts.map +1 -1
- package/dist/extraction/extract.js +6 -0
- package/dist/extraction/extract.js.map +1 -1
- package/dist/extraction/markdown.d.ts +2 -0
- package/dist/extraction/markdown.d.ts.map +1 -1
- package/dist/extraction/markdown.js +70 -0
- package/dist/extraction/markdown.js.map +1 -1
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +32 -7
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/readability.d.ts +1 -1
- package/dist/extraction/readability.d.ts.map +1 -1
- package/dist/extraction/readability.js +1 -1
- package/dist/extraction/readability.js.map +1 -1
- package/dist/extraction/site-extractors/github.js +1 -1
- package/dist/extraction/site-extractors/github.js.map +1 -1
- package/dist/extraction/site-extractors/mdn.js +1 -1
- package/dist/extraction/site-extractors/mdn.js.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
- package/dist/extraction/structured.d.ts +4 -0
- package/dist/extraction/structured.d.ts.map +1 -0
- package/dist/extraction/structured.js +206 -0
- package/dist/extraction/structured.js.map +1 -0
- package/dist/fetch/lightpanda.js +1 -1
- package/dist/fetch/lightpanda.js.map +1 -1
- package/dist/index.js +24 -0
- package/dist/index.js.map +1 -1
- package/dist/instructions.d.ts +6 -6
- package/dist/instructions.d.ts.map +1 -1
- package/dist/instructions.js +55 -51
- package/dist/instructions.js.map +1 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +29 -1
- package/dist/logger.js.map +1 -1
- package/dist/research/brief.d.ts +5 -0
- package/dist/research/brief.d.ts.map +1 -0
- package/dist/research/brief.js +205 -0
- package/dist/research/brief.js.map +1 -0
- package/dist/research/decompose.d.ts +7 -0
- package/dist/research/decompose.d.ts.map +1 -1
- package/dist/research/decompose.js +126 -2
- package/dist/research/decompose.js.map +1 -1
- package/dist/research/pipeline.d.ts +1 -1
- package/dist/research/pipeline.d.ts.map +1 -1
- package/dist/research/pipeline.js +19 -6
- package/dist/research/pipeline.js.map +1 -1
- package/dist/research/synthesize.js +1 -1
- package/dist/research/synthesize.js.map +1 -1
- package/dist/search/engines/bing.d.ts.map +1 -1
- package/dist/search/engines/bing.js +40 -0
- package/dist/search/engines/bing.js.map +1 -1
- package/dist/search/engines/duckduckgo.d.ts.map +1 -1
- package/dist/search/engines/duckduckgo.js +13 -1
- package/dist/search/engines/duckduckgo.js.map +1 -1
- package/dist/search/engines/startpage.d.ts.map +1 -1
- package/dist/search/engines/startpage.js +21 -1
- package/dist/search/engines/startpage.js.map +1 -1
- package/dist/search/find-similar.d.ts.map +1 -1
- package/dist/search/find-similar.js +69 -9
- package/dist/search/find-similar.js.map +1 -1
- package/dist/search/highlights.d.ts +10 -0
- package/dist/search/highlights.d.ts.map +1 -0
- package/dist/search/highlights.js +103 -0
- package/dist/search/highlights.js.map +1 -0
- package/dist/searxng/docker.d.ts.map +1 -1
- package/dist/searxng/docker.js +6 -2
- package/dist/searxng/docker.js.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +8 -4
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts +2 -2
- package/dist/tools/agent.d.ts.map +1 -1
- package/dist/tools/agent.js +1 -1
- package/dist/tools/agent.js.map +1 -1
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +19 -1
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/fetch.d.ts.map +1 -1
- package/dist/tools/fetch.js +6 -1
- package/dist/tools/fetch.js.map +1 -1
- package/dist/tools/research.d.ts +1 -1
- package/dist/tools/research.d.ts.map +1 -1
- package/dist/tools/research.js +1 -1
- package/dist/tools/research.js.map +1 -1
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +56 -28
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +71 -4
- package/dist/types.d.ts.map +1 -1
- package/package.json +15 -1
package/dist/embedding/embed.js
CHANGED
|
@@ -9,7 +9,7 @@ export class EmbeddingService {
|
|
|
9
9
|
subprocess;
|
|
10
10
|
index;
|
|
11
11
|
available = false;
|
|
12
|
-
|
|
12
|
+
subprocessVerified = false;
|
|
13
13
|
constructor() {
|
|
14
14
|
this.subprocess = new EmbeddingSubprocess();
|
|
15
15
|
this.index = new VectorIndex();
|
|
@@ -28,8 +28,21 @@ export class EmbeddingService {
|
|
|
28
28
|
const loaded = this.index.loadFromBuffers(entries);
|
|
29
29
|
log.info('loaded embeddings into index', { count: loaded });
|
|
30
30
|
}
|
|
31
|
+
// Probe the subprocess to verify Python + sentence-transformers work.
|
|
32
|
+
// This forces the lazy spawn so we know right away if embedding is broken.
|
|
33
|
+
try {
|
|
34
|
+
const probeId = 'init-probe';
|
|
35
|
+
await this.subprocess.embed(probeId, 'embedding service probe');
|
|
36
|
+
this.subprocessVerified = true;
|
|
37
|
+
log.info('embedding subprocess verified');
|
|
38
|
+
}
|
|
39
|
+
catch (err) {
|
|
40
|
+
log.warn('embedding subprocess probe failed — embeddings disabled', {
|
|
41
|
+
error: err instanceof Error ? err.message : String(err),
|
|
42
|
+
});
|
|
43
|
+
this.subprocessVerified = false;
|
|
44
|
+
}
|
|
31
45
|
this.available = true;
|
|
32
|
-
this.initialized = true;
|
|
33
46
|
}
|
|
34
47
|
catch (err) {
|
|
35
48
|
log.error('EmbeddingService init failed', { error: String(err) });
|
|
@@ -42,6 +55,9 @@ export class EmbeddingService {
|
|
|
42
55
|
setAvailable(value) {
|
|
43
56
|
this.available = value;
|
|
44
57
|
}
|
|
58
|
+
isSubprocessReady() {
|
|
59
|
+
return this.subprocessVerified;
|
|
60
|
+
}
|
|
45
61
|
getIndex() {
|
|
46
62
|
return this.index;
|
|
47
63
|
}
|
|
@@ -107,7 +123,6 @@ export class EmbeddingService {
|
|
|
107
123
|
this.subprocess.shutdown();
|
|
108
124
|
this.index.clear();
|
|
109
125
|
this.available = false;
|
|
110
|
-
this.initialized = false;
|
|
111
126
|
log.info('EmbeddingService shut down');
|
|
112
127
|
}
|
|
113
128
|
catch (err) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embed.js","sourceRoot":"","sources":["../../src/embedding/embed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAsB,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC;AAEtC,MAAM,OAAO,gBAAgB;IACnB,UAAU,CAAsB;IAChC,KAAK,CAAc;IACnB,SAAS,GAAG,KAAK,CAAC;IAClB,
|
|
1
|
+
{"version":3,"file":"embed.js","sourceRoot":"","sources":["../../src/embedding/embed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAsB,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC;AAEtC,MAAM,OAAO,gBAAgB;IACnB,UAAU,CAAsB;IAChC,KAAK,CAAc;IACnB,SAAS,GAAG,KAAK,CAAC;IAClB,kBAAkB,GAAG,KAAK,CAAC;IAEnC;QACE,IAAI,CAAC,UAAU,GAAG,IAAI,mBAAmB,EAAE,CAAC;QAC5C,IAAI,CAAC,KAAK,GAAG,IAAI,WAAW,EAAE,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,IAAI;QACR,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,gBAAgB,EAAE,CAAC;YAClC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,MAAM;qBACnB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;qBACtC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACT,GAAG,EAAE,CAAC,CAAC,aAAa;oBACpB,SAAS,EAAE,CAAC,CAAC,SAAS;oBACtB,IAAI,EAAE,CAAC,CAAC,IAAI;iBACb,CAAC,CAAC,CAAC;gBACN,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;gBACnD,GAAG,CAAC,IAAI,CAAC,8BAA8B,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,sEAAsE;YACtE,2EAA2E;YAC3E,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,YAAY,CAAC;gBAC7B,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,OAAO,EAAE,yBAAyB,CAAC,CAAC;gBAChE,IAAI,CAAC,kBAAkB,GAAG,IAAI,CAAC;gBAC/B,GAAG,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;YAC5C,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,GAAG,CAAC,IAAI,CAAC,yDAAyD,EAAE;oBAClE,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC;iBACxD,CAAC,CAAC;gBACH,IAAI,CAAC,kBAAkB,GAAG,KAAK,CAAC;YAClC,CAAC;YAED,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QAExB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,8BAA8B,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAClE,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;IAED,WAAW;QACT,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,YAAY,CAAC,KAAc;QACzB,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;IACzB,CAAC;IAED,iBAAiB;QACf,OAAO,IAAI,CAAC,kBAAkB,CAAC;IACjC,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,GAAW,EAAE,QAAgB;QAC/C,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,GAAG,CAAC,KAAK,CAAC,0CAA0C,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YAC/D,OAAO;QACT,CAAC;QAED,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;YAElE,IAAI,CAAC,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACvC,GAAG,CAAC,IAAI,CAAC,0BAA0B,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;gBACrE,OAAO;YACT,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACjD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,IAAI,SAAS,EAAE,CAAC,cAAc,CAAC;YACvE,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC;YAEjE,IAAI,aAAqB,CAAC;YAC1B,IAAI,CAAC;gBACH,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;YACpC,CAAC;YAAC,MAAM,CAAC;gBACP,aAAa,GAAG,GAAG,CAAC;YACtB,CAAC;YAED,oBAAoB,CAAC,aAAa,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;YACzD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YAEtC,GAAG,CAAC,KAAK,CAAC,qBAAqB,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QACjE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,IAAI,CAAC,sBAAsB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChE,CAAC;IACH,CAAC;IAED,UAAU,CAAC,GAAW,EAAE,QAAgB;QACtC,IAAI,CAAC,IAAI,CAAC,SAAS;YAAE,OAAO;QAE5B,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE;YAC5C,GAAG,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,WAAW,CACf,SAAiB,EACjB,IAAY,EACZ,WAAyB;QAEzB,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC;YAC/C,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;YAEnE,IAAI,CAAC,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACvC,GAAG,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC9D,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,MAAM,WAAW,GAAG,IAAI,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACtD,OAAO,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,WAAW,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;QAChE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,IAAI,CAAC,oBAAoB,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACvD,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,QAAQ;QACN,IAAI,CAAC;YACH,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC;YAC3B,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YACnB,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;YACvB,GAAG,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACzC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,iCAAiC,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACvE,CAAC;IACH,CAAC;CACF;AAED,IAAI,cAAc,GAA4B,IAAI,CAAC;AAEnD,MAAM,UAAU,mBAAmB;IACjC,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,cAAc,GAAG,IAAI,gBAAgB,EAAE,CAAC;IAC1C,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,qBAAqB;IACnC,IAAI,cAAc,EAAE,CAAC;QACnB,cAAc,CAAC,QAAQ,EAAE,CAAC;QAC1B,cAAc,GAAG,IAAI,CAAC;IACxB,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAS3D,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,
|
|
1
|
+
{"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAS3D,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CAiC1D;AAED,wBAAgB,eAAe,CAC7B,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,OAAO,GAChB,MAAM,GAAG,MAAM,EAAE,CAUnB;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,CA6CvD"}
|
|
@@ -26,6 +26,12 @@ export function extractMetadata(html) {
|
|
|
26
26
|
const ogImage = getMetaContent(doc, 'og:image');
|
|
27
27
|
if (ogImage)
|
|
28
28
|
result.og_image = ogImage;
|
|
29
|
+
const ogType = getMetaContent(doc, 'og:type');
|
|
30
|
+
if (ogType)
|
|
31
|
+
result.og_type = ogType;
|
|
32
|
+
const canonical = doc.querySelector('link[rel="canonical"]')?.getAttribute('href');
|
|
33
|
+
if (canonical)
|
|
34
|
+
result.canonical_url = canonical;
|
|
29
35
|
return result;
|
|
30
36
|
}
|
|
31
37
|
export function extractSelector(html, selector, multiple) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extract.js","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAGrC,SAAS,cAAc,CAAC,GAAa,EAAE,cAAsB;IAC3D,MAAM,EAAE,GACN,GAAG,CAAC,aAAa,CAAC,cAAc,cAAc,IAAI,CAAC;QACnD,GAAG,CAAC,aAAa,CAAC,kBAAkB,cAAc,IAAI,CAAC,CAAC;IAC1D,OAAO,EAAE,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAiB,EAAE,CAAC;IAEhC,MAAM,KAAK,GAAG,GAAG,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IAC9D,IAAI,KAAK;QAAE,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;IAEhC,MAAM,WAAW,GACf,cAAc,CAAC,GAAG,EAAE,aAAa,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,gBAAgB,CAAC,CAAC;IAC9E,IAAI,WAAW;QAAE,MAAM,CAAC,WAAW,GAAG,WAAW,CAAC;IAElD,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAC7C,IAAI,MAAM;QAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC;IAEnC,MAAM,IAAI,GACR,cAAc,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,wBAAwB,CAAC,CAAC;IAC/E,IAAI,IAAI;QAAE,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC;IAE7B,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IACjD,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7E,CAAC;IAED,MAAM,OAAO,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IAChD,IAAI,OAAO;QAAE,MAAM,CAAC,QAAQ,GAAG,OAAO,CAAC;IAEvC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,IAAY,EACZ,QAAgB,EAChB,QAAiB;IAEjB,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAE1C,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;QAChD,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,MAAM,EAAE,GAAG,GAAG,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;IACvC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;AACjD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,GAAG,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAC7C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;QACtC,MAAM,OAAO,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;QAEjF,MAAM,UAAU,GAAG,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;QACtD,IAAI,OAAiB,CAAC;QACtB,IAAI,QAAmB,CAAC;QAExB,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5E,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC,CAAC;YAC1D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC1B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;gBACzD,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACzD,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,WAAW,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAEhF,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACjE,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;iBAAM,CAAC;gBACN,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxE,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBACtE,QAAQ,GAAG,OAAO,CAAC;YACrB,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;YAChC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACrD,MAAM,GAAG,GAA2B,EAAE,CAAC;YACvC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC5B,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACrD,CAAC,CAAC,CAAC;YACH,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
1
|
+
{"version":3,"file":"extract.js","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAGrC,SAAS,cAAc,CAAC,GAAa,EAAE,cAAsB;IAC3D,MAAM,EAAE,GACN,GAAG,CAAC,aAAa,CAAC,cAAc,cAAc,IAAI,CAAC;QACnD,GAAG,CAAC,aAAa,CAAC,kBAAkB,cAAc,IAAI,CAAC,CAAC;IAC1D,OAAO,EAAE,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAiB,EAAE,CAAC;IAEhC,MAAM,KAAK,GAAG,GAAG,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IAC9D,IAAI,KAAK;QAAE,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;IAEhC,MAAM,WAAW,GACf,cAAc,CAAC,GAAG,EAAE,aAAa,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,gBAAgB,CAAC,CAAC;IAC9E,IAAI,WAAW;QAAE,MAAM,CAAC,WAAW,GAAG,WAAW,CAAC;IAElD,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAC7C,IAAI,MAAM;QAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC;IAEnC,MAAM,IAAI,GACR,cAAc,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,wBAAwB,CAAC,CAAC;IAC/E,IAAI,IAAI;QAAE,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC;IAE7B,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IACjD,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7E,CAAC;IAED,MAAM,OAAO,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IAChD,IAAI,OAAO;QAAE,MAAM,CAAC,QAAQ,GAAG,OAAO,CAAC;IAEvC,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;IAC9C,IAAI,MAAM;QAAE,MAAM,CAAC,OAAO,GAAG,MAAM,CAAC;IAEpC,MAAM,SAAS,GAAG,GAAG,CAAC,aAAa,CAAC,uBAAuB,CAAC,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;IACnF,IAAI,SAAS;QAAE,MAAM,CAAC,aAAa,GAAG,SAAS,CAAC;IAEhD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,IAAY,EACZ,QAAgB,EAChB,QAAiB;IAEjB,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAE1C,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;QAChD,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,MAAM,EAAE,GAAG,GAAG,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;IACvC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;AACjD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,GAAG,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAC7C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;QACtC,MAAM,OAAO,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;QAEjF,MAAM,UAAU,GAAG,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;QACtD,IAAI,OAAiB,CAAC;QACtB,IAAI,QAAmB,CAAC;QAExB,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5E,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC,CAAC;YAC1D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC1B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;gBACzD,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACzD,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,WAAW,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAEhF,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACjE,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;iBAAM,CAAC;gBACN,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxE,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBACtE,QAAQ,GAAG,OAAO,CAAC;YACrB,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;YAChC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACrD,MAAM,GAAG,GAA2B,EAAE,CAAC;YACvC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC5B,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACrD,CAAC,CAAC,CAAC;YACH,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -7,4 +7,6 @@ export declare function extractLinksAndImages(markdown: string): {
|
|
|
7
7
|
links: string[];
|
|
8
8
|
images: string[];
|
|
9
9
|
};
|
|
10
|
+
export declare function filterDecorativeImages(markdown: string): string;
|
|
11
|
+
export declare function resolveRelativeUrls(markdown: string, baseUrl: string): string;
|
|
10
12
|
//# sourceMappingURL=markdown.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAkDA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAmCD,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,SAAI,GACf;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CA2BvC;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAoB7F"}
|
|
1
|
+
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAkDA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAmCD,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,SAAI,GACf;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CA2BvC;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAoB7F;AAkBD,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAsB/D;AAID,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAkC7E"}
|
|
@@ -104,4 +104,74 @@ export function extractLinksAndImages(markdown) {
|
|
|
104
104
|
}
|
|
105
105
|
return { links: Array.from(links), images: Array.from(images) };
|
|
106
106
|
}
|
|
107
|
+
const DECORATIVE_URL_MARKERS = [
|
|
108
|
+
'avatar',
|
|
109
|
+
'icon',
|
|
110
|
+
'logo',
|
|
111
|
+
'badge',
|
|
112
|
+
'shield',
|
|
113
|
+
'tracking',
|
|
114
|
+
'pixel',
|
|
115
|
+
'sprite',
|
|
116
|
+
'emoji',
|
|
117
|
+
'favicon',
|
|
118
|
+
];
|
|
119
|
+
// Drop `` tokens that look decorative. Heuristic only -- keep
|
|
120
|
+
// images that have alt text unless the URL clearly marks them decorative.
|
|
121
|
+
// Tracking pixels (tiny data-URI gifs) and empty-alt icons are removed.
|
|
122
|
+
export function filterDecorativeImages(markdown) {
|
|
123
|
+
if (!markdown)
|
|
124
|
+
return markdown;
|
|
125
|
+
return markdown.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (match, alt, src) => {
|
|
126
|
+
const trimmedAlt = alt.trim();
|
|
127
|
+
const lowerSrc = src.toLowerCase();
|
|
128
|
+
// Tiny animated-GIF tracking pixel / 1x1 beacons
|
|
129
|
+
if (lowerSrc.startsWith('data:image/gif;base64,'))
|
|
130
|
+
return '';
|
|
131
|
+
// Inline SVG icon data URIs (short = tiny, likely decorative glyph)
|
|
132
|
+
if (lowerSrc.startsWith('data:image/svg+xml') && src.length < 200)
|
|
133
|
+
return '';
|
|
134
|
+
// URL marks it as decorative regardless of alt
|
|
135
|
+
for (const marker of DECORATIVE_URL_MARKERS) {
|
|
136
|
+
if (lowerSrc.includes(marker))
|
|
137
|
+
return '';
|
|
138
|
+
}
|
|
139
|
+
// No alt text + no title = decorative
|
|
140
|
+
if (!trimmedAlt)
|
|
141
|
+
return '';
|
|
142
|
+
return match;
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
// Resolve relative `[text](path)` and `` targets against baseUrl.
|
|
146
|
+
// Leaves absolute URLs, mailto:, tel:, javascript:, and #fragments untouched.
|
|
147
|
+
export function resolveRelativeUrls(markdown, baseUrl) {
|
|
148
|
+
if (!markdown || !baseUrl)
|
|
149
|
+
return markdown;
|
|
150
|
+
const rewrite = (path) => {
|
|
151
|
+
const trimmed = path.trim();
|
|
152
|
+
if (!trimmed)
|
|
153
|
+
return path;
|
|
154
|
+
if (/^(?:https?:|mailto:|tel:|javascript:|data:|#)/i.test(trimmed))
|
|
155
|
+
return path;
|
|
156
|
+
if (trimmed.startsWith('//')) {
|
|
157
|
+
try {
|
|
158
|
+
const base = new URL(baseUrl);
|
|
159
|
+
return `${base.protocol}${trimmed}`;
|
|
160
|
+
}
|
|
161
|
+
catch {
|
|
162
|
+
return path;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
try {
|
|
166
|
+
return new URL(trimmed, baseUrl).href;
|
|
167
|
+
}
|
|
168
|
+
catch {
|
|
169
|
+
return path;
|
|
170
|
+
}
|
|
171
|
+
};
|
|
172
|
+
// Image links first so the shared link regex does not rewrite them twice.
|
|
173
|
+
let result = markdown.replace(/(!\[[^\]]*\]\()([^)\s]+)(\s*(?:"[^"]*")?\))/g, (_m, open, path, close) => `${open}${rewrite(path)}${close}`);
|
|
174
|
+
result = result.replace(/(^|[^!])(\[[^\]]*\]\()([^)\s]+)(\s*(?:"[^"]*")?\))/g, (_m, pre, open, path, close) => `${pre}${open}${rewrite(path)}${close}`);
|
|
175
|
+
return result;
|
|
176
|
+
}
|
|
107
177
|
//# sourceMappingURL=markdown.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,SAAS,aAAa;IACpB,MAAM,EAAE,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;IAElF,wCAAwC;IACxC,EAAE,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;IAE/B,iDAAiD;IACjD,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE;QAClB,MAAM,EAAE,OAAO;QACf,WAAW,CAAC,QAAQ,EAAE,IAAI;YACxB,MAAM,EAAE,GAAG,IAAe,CAAC;YAC3B,MAAM,IAAI,GAAc,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAEjC,MAAM,SAAS,GAAG,CAAC,GAAY,EAAU,EAAE;gBACzC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACzD,OAAO,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YACnG,CAAC,CAAC;YAEF,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,WAAW,GAAG,SAAS,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;YAChE,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrE,MAAM,SAAS,GAAG,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YAEzE,IAAI,WAAW,EAAE,CAAC;gBAChB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC/B,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC5E,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;YAC5C,CAAC;YAED,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;YACjF,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;QAC5C,CAAC;KACF,CAAC,CAAC;IAEH,qFAAqF;IACrF,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE;QACtB,MAAM,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;QACrD,WAAW,CAAC,OAAO;YACjB,OAAO,OAAO,CAAC;QACjB,CAAC;KACF,CAAC,CAAC;IAEH,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;AAEjC,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAQD,SAAS,aAAa,CAAC,KAAe;IACpC,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACjD,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAe,EAAE,QAAmB,EAAE,UAAkB;IAClF,MAAM,OAAO,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC;IAEhC,0EAA0E;IAC1E,IAAI,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,KAAK,IAAI,CAAC,GAAG,UAAU,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtD,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACvC,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC5B,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,QAAgB,EAChB,OAAe,EACf,YAAY,GAAG,CAAC;IAEhB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAExE,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IAEnD,8BAA8B;IAC9B,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,KAAK,CAAC,CAAC;IAE/E,yDAAyD;IACzD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC;QAClE,MAAM,EAAE,CAAC,EAAE,GAAG,YAAY,CAAC,YAAY,CAAC,CAAC;QACzC,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IAC5E,CAAC;IAED,4EAA4E;IAC5E,MAAM,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAEzF,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,IAAI,gBAAgB,CAAC,MAAM,EAAE,CAAC;QAC7E,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAC/C,CAAC;IAED,MAAM,EAAE,CAAC,EAAE,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAC7C,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,QAAgB;IACpD,MAAM,YAAY,GAAG,yBAAyB,CAAC;IAC/C,MAAM,WAAW,GAAG,8BAA8B,CAAC;IAEnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;IACjC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,IAAI,KAA6B,CAAC;IAElC,uBAAuB;IACvB,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACtD,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,4BAA4B;IAC5B,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;AAClE,CAAC"}
|
|
1
|
+
{"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,SAAS,aAAa;IACpB,MAAM,EAAE,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;IAElF,wCAAwC;IACxC,EAAE,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;IAE/B,iDAAiD;IACjD,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE;QAClB,MAAM,EAAE,OAAO;QACf,WAAW,CAAC,QAAQ,EAAE,IAAI;YACxB,MAAM,EAAE,GAAG,IAAe,CAAC;YAC3B,MAAM,IAAI,GAAc,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAEjC,MAAM,SAAS,GAAG,CAAC,GAAY,EAAU,EAAE;gBACzC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACzD,OAAO,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YACnG,CAAC,CAAC;YAEF,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,WAAW,GAAG,SAAS,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;YAChE,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrE,MAAM,SAAS,GAAG,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YAEzE,IAAI,WAAW,EAAE,CAAC;gBAChB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC/B,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC5E,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;YAC5C,CAAC;YAED,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;YACjF,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;QAC5C,CAAC;KACF,CAAC,CAAC;IAEH,qFAAqF;IACrF,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE;QACtB,MAAM,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;QACrD,WAAW,CAAC,OAAO;YACjB,OAAO,OAAO,CAAC;QACjB,CAAC;KACF,CAAC,CAAC;IAEH,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;AAEjC,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAQD,SAAS,aAAa,CAAC,KAAe;IACpC,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACjD,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAe,EAAE,QAAmB,EAAE,UAAkB;IAClF,MAAM,OAAO,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC;IAEhC,0EAA0E;IAC1E,IAAI,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,KAAK,IAAI,CAAC,GAAG,UAAU,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtD,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACvC,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC5B,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,QAAgB,EAChB,OAAe,EACf,YAAY,GAAG,CAAC;IAEhB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAExE,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IAEnD,8BAA8B;IAC9B,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,KAAK,CAAC,CAAC;IAE/E,yDAAyD;IACzD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC;QAClE,MAAM,EAAE,CAAC,EAAE,GAAG,YAAY,CAAC,YAAY,CAAC,CAAC;QACzC,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IAC5E,CAAC;IAED,4EAA4E;IAC5E,MAAM,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAEzF,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,IAAI,gBAAgB,CAAC,MAAM,EAAE,CAAC;QAC7E,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAC/C,CAAC;IAED,MAAM,EAAE,CAAC,EAAE,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAC7C,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,QAAgB;IACpD,MAAM,YAAY,GAAG,yBAAyB,CAAC;IAC/C,MAAM,WAAW,GAAG,8BAA8B,CAAC;IAEnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;IACjC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,IAAI,KAA6B,CAAC;IAElC,uBAAuB;IACvB,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACtD,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,4BAA4B;IAC5B,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,sBAAsB,GAAG;IAC7B,QAAQ;IACR,MAAM;IACN,MAAM;IACN,OAAO;IACP,QAAQ;IACR,UAAU;IACV,OAAO;IACP,QAAQ;IACR,OAAO;IACP,SAAS;CACV,CAAC;AAEF,yEAAyE;AACzE,0EAA0E;AAC1E,wEAAwE;AACxE,MAAM,UAAU,sBAAsB,CAAC,QAAgB;IACrD,IAAI,CAAC,QAAQ;QAAE,OAAO,QAAQ,CAAC;IAC/B,OAAO,QAAQ,CAAC,OAAO,CAAC,2BAA2B,EAAE,CAAC,KAAK,EAAE,GAAW,EAAE,GAAW,EAAE,EAAE;QACvF,MAAM,UAAU,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;QAEnC,iDAAiD;QACjD,IAAI,QAAQ,CAAC,UAAU,CAAC,wBAAwB,CAAC;YAAE,OAAO,EAAE,CAAC;QAE7D,oEAAoE;QACpE,IAAI,QAAQ,CAAC,UAAU,CAAC,oBAAoB,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,EAAE,CAAC;QAE7E,+CAA+C;QAC/C,KAAK,MAAM,MAAM,IAAI,sBAAsB,EAAE,CAAC;YAC5C,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAAE,OAAO,EAAE,CAAC;QAC3C,CAAC;QAED,sCAAsC;QACtC,IAAI,CAAC,UAAU;YAAE,OAAO,EAAE,CAAC;QAE3B,OAAO,KAAK,CAAC;IACf,CAAC,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,8EAA8E;AAC9E,MAAM,UAAU,mBAAmB,CAAC,QAAgB,EAAE,OAAe;IACnE,IAAI,CAAC,QAAQ,IAAI,CAAC,OAAO;QAAE,OAAO,QAAQ,CAAC;IAE3C,MAAM,OAAO,GAAG,CAAC,IAAY,EAAU,EAAE;QACvC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAC1B,IAAI,gDAAgD,CAAC,IAAI,CAAC,OAAO,CAAC;YAAE,OAAO,IAAI,CAAC;QAChF,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC9B,OAAO,GAAG,IAAI,CAAC,QAAQ,GAAG,OAAO,EAAE,CAAC;YACtC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QACD,IAAI,CAAC;YACH,OAAO,IAAI,GAAG,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;QACxC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC,CAAC;IAEF,0EAA0E;IAC1E,IAAI,MAAM,GAAG,QAAQ,CAAC,OAAO,CAC3B,8CAA8C,EAC9C,CAAC,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,EAAE,CAC7D,CAAC;IAEF,MAAM,GAAG,MAAM,CAAC,OAAO,CACrB,qDAAqD,EACrD,CAAC,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,EAAE,CACxE,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,gBAAgB,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAU/D,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AASD,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,SAAS,GAAG,IAAI,CAE5D;AAED,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,gBAAgB,CAAC,CAmE3B"}
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { defuddleExtract } from './defuddle.js';
|
|
2
2
|
import { readabilityExtract } from './readability.js';
|
|
3
3
|
import { trafilaturaExtract, isTrafilaturaAvailable } from './trafilatura.js';
|
|
4
|
-
import { htmlToMarkdown, extractSection, extractLinksAndImages } from './markdown.js';
|
|
4
|
+
import { htmlToMarkdown, extractSection, extractLinksAndImages, filterDecorativeImages, resolveRelativeUrls, } from './markdown.js';
|
|
5
|
+
import { extractMetadata } from './extract.js';
|
|
5
6
|
import { githubExtractor } from './site-extractors/github.js';
|
|
6
7
|
import { stackoverflowExtractor } from './site-extractors/stackoverflow.js';
|
|
7
8
|
import { mdnExtractor } from './site-extractors/mdn.js';
|
|
@@ -40,14 +41,14 @@ export async function extractContent(html, url, options = {}) {
|
|
|
40
41
|
images: [],
|
|
41
42
|
extractor: 'turndown',
|
|
42
43
|
};
|
|
43
|
-
return applyPostProcessing(result, options);
|
|
44
|
+
return applyPostProcessing(result, url, html, options);
|
|
44
45
|
}
|
|
45
46
|
const siteExtractor = siteExtractors.find((e) => e.canHandle(url, html));
|
|
46
47
|
if (siteExtractor) {
|
|
47
48
|
const extracted = siteExtractor.extract(html, url);
|
|
48
49
|
if (extracted) {
|
|
49
50
|
result = extracted;
|
|
50
|
-
return applyPostProcessing(result, options);
|
|
51
|
+
return applyPostProcessing(result, url, html, options);
|
|
51
52
|
}
|
|
52
53
|
}
|
|
53
54
|
result = await defuddleExtract(html, url);
|
|
@@ -59,7 +60,7 @@ export async function extractContent(html, url, options = {}) {
|
|
|
59
60
|
result = await trafilaturaExtract(html, url);
|
|
60
61
|
if (result) {
|
|
61
62
|
log.info('Trafilatura extraction succeeded', { url, chars: result.markdown.length });
|
|
62
|
-
return applyPostProcessing(result, options);
|
|
63
|
+
return applyPostProcessing(result, url, html, options);
|
|
63
64
|
}
|
|
64
65
|
}
|
|
65
66
|
}
|
|
@@ -78,18 +79,42 @@ export async function extractContent(html, url, options = {}) {
|
|
|
78
79
|
extractor: 'turndown',
|
|
79
80
|
};
|
|
80
81
|
}
|
|
81
|
-
return applyPostProcessing(result, options);
|
|
82
|
+
return applyPostProcessing(result, url, html, options);
|
|
82
83
|
}
|
|
83
|
-
function
|
|
84
|
+
function mergeMetadata(base, html) {
|
|
85
|
+
try {
|
|
86
|
+
const meta = extractMetadata(html);
|
|
87
|
+
return {
|
|
88
|
+
...meta,
|
|
89
|
+
// Extractor-provided fields win when set (they already inspected the article body).
|
|
90
|
+
description: base.description || meta.description,
|
|
91
|
+
author: base.author || meta.author,
|
|
92
|
+
date: base.date || meta.date,
|
|
93
|
+
language: base.language,
|
|
94
|
+
og_image: base.og_image ?? meta.og_image,
|
|
95
|
+
og_type: base.og_type ?? meta.og_type,
|
|
96
|
+
canonical_url: base.canonical_url ?? meta.canonical_url,
|
|
97
|
+
keywords: base.keywords ?? meta.keywords,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
catch {
|
|
101
|
+
return base;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
function applyPostProcessing(result, url, html, options) {
|
|
84
105
|
let markdown = result.markdown;
|
|
106
|
+
// Resolve relative links/images before slicing so downstream consumers get absolute URLs.
|
|
107
|
+
markdown = resolveRelativeUrls(markdown, url);
|
|
108
|
+
markdown = filterDecorativeImages(markdown);
|
|
85
109
|
if (options.section) {
|
|
86
110
|
const { content } = extractSection(markdown, options.section, options.sectionIndex ?? 0);
|
|
87
111
|
markdown = content;
|
|
88
112
|
}
|
|
89
113
|
const { links, images } = extractLinksAndImages(markdown);
|
|
114
|
+
const metadata = mergeMetadata(result.metadata, html);
|
|
90
115
|
if (options.maxChars && markdown.length > options.maxChars) {
|
|
91
116
|
markdown = markdown.slice(0, options.maxChars);
|
|
92
117
|
}
|
|
93
|
-
return { ...result, markdown, links, images };
|
|
118
|
+
return { ...result, markdown, links, images, metadata };
|
|
94
119
|
}
|
|
95
120
|
//# sourceMappingURL=pipeline.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,MAAM,kBAAkB,CAAC;AAC9E,OAAO,
|
|
1
|
+
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,MAAM,kBAAkB,CAAC;AAC9E,OAAO,EACL,cAAc,EACd,cAAc,EACd,qBAAqB,EACrB,sBAAsB,EACtB,mBAAmB,GACpB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAE/C,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAC9D,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAC5E,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACzE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEzC,MAAM,GAAG,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC;AAUpC,MAAM,cAAc,GAAgB;IAClC,eAAe;IACf,sBAAsB;IACtB,YAAY;IACZ,oBAAoB;CACrB,CAAC;AAEF,MAAM,UAAU,iBAAiB,CAAC,SAAoB;IACpD,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,IAAY,EACZ,GAAW,EACX,UAA6B,EAAE;IAE/B,IAAI,MAAM,GAA4B,IAAI,CAAC;IAE3C,IAAI,OAAO,CAAC,WAAW,KAAK,iBAAiB,EAAE,CAAC;QAC9C,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,CAAC,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC;gBACrD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;gBACjD,OAAO,GAAG,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAC9B,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,GAAG,CAAC,IAAI,CAAC,kBAAkB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC5D,CAAC;QACH,CAAC;QACD,MAAM,GAAG;YACP,KAAK,EAAE,EAAE;YACT,QAAQ,EAAE,OAAO;YACjB,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,UAAU;SACtB,CAAC;QACF,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IACzD,CAAC;IAED,MAAM,aAAa,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC;IACzE,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,SAAS,GAAG,aAAa,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACnD,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,GAAG,SAAS,CAAC;YACnB,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;IAED,MAAM,GAAG,MAAM,eAAe,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAE1C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,IAAI,MAAM,CAAC,WAAW,KAAK,OAAO,EAAE,CAAC;YACnC,MAAM,aAAa,GAAG,MAAM,sBAAsB,EAAE,CAAC;YACrD,IAAI,aAAa,EAAE,CAAC;gBAClB,MAAM,GAAG,MAAM,kBAAkB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;gBAC7C,IAAI,MAAM,EAAE,CAAC;oBACX,GAAG,CAAC,IAAI,CAAC,kCAAkC,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;oBACrF,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;gBACzD,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,GAAG,kBAAkB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,GAAG;YACP,KAAK,EAAE,EAAE;YACT,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,UAAU;SACtB,CAAC;IACJ,CAAC;IAED,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;AACzD,CAAC;AAED,SAAS,aAAa,CACpB,IAAkC,EAClC,IAAY;IAEZ,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,OAAO;YACL,GAAG,IAAI;YACP,oFAAoF;YACpF,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW;YACjD,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM;YAClC,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI;YAC5B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;YACxC,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;YACrC,aAAa,EAAE,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,aAAa;YACvD,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;SACzC,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,mBAAmB,CAC1B,MAAwB,EACxB,GAAW,EACX,IAAY,EACZ,OAA0B;IAE1B,IAAI,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IAE/B,0FAA0F;IAC1F,QAAQ,GAAG,mBAAmB,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC9C,QAAQ,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IAE5C,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,EAAE,OAAO,EAAE,GAAG,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,OAAO,EAAE,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC;QACzF,QAAQ,GAAG,OAAO,CAAC;IACrB,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAC1D,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IAEtD,IAAI,OAAO,CAAC,QAAQ,IAAI,QAAQ,CAAC,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;QAC3D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACjD,CAAC;IAED,OAAO,EAAE,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;AAC1D,CAAC"}
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import type { ExtractionResult } from '../types.js';
|
|
2
|
-
export declare function readabilityExtract(html: string,
|
|
2
|
+
export declare function readabilityExtract(html: string, _url: string): ExtractionResult | null;
|
|
3
3
|
//# sourceMappingURL=readability.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"readability.d.ts","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIpD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,
|
|
1
|
+
{"version":3,"file":"readability.d.ts","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIpD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI,CA0BtF"}
|
|
@@ -2,7 +2,7 @@ import { Readability } from '@mozilla/readability';
|
|
|
2
2
|
import { parseHTML } from 'linkedom';
|
|
3
3
|
import TurndownService from 'turndown';
|
|
4
4
|
const MIN_CONTENT_THRESHOLD = 100;
|
|
5
|
-
export function readabilityExtract(html,
|
|
5
|
+
export function readabilityExtract(html, _url) {
|
|
6
6
|
try {
|
|
7
7
|
const { document } = parseHTML(html);
|
|
8
8
|
const reader = new Readability(document);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"readability.js","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC,MAAM,UAAU,kBAAkB,CAAC,IAAY,EAAE,
|
|
1
|
+
{"version":3,"file":"readability.js","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC,MAAM,UAAU,kBAAkB,CAAC,IAAY,EAAE,IAAY;IAC3D,IAAI,CAAC;QACH,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAe,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAC/B,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE9C,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;QACxF,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAEpD,IAAI,QAAQ,CAAC,MAAM,GAAG,qBAAqB;YAAE,OAAO,IAAI,CAAC;QAEzD,OAAO;YACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE;YAC1B,QAAQ;YACR,QAAQ,EAAE;gBACR,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;gBACnC,QAAQ,EAAE,OAAO,CAAC,IAAI,IAAI,SAAS;aACpC;YACD,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,aAAa;SACzB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
|
|
@@ -7,7 +7,7 @@ function isIssueOrPR(url) {
|
|
|
7
7
|
function isBlob(url) {
|
|
8
8
|
return /\/blob\//.test(url);
|
|
9
9
|
}
|
|
10
|
-
function extractIssue(document,
|
|
10
|
+
function extractIssue(document, _url) {
|
|
11
11
|
const titleEl = document.querySelector('.js-issue-title') ?? document.querySelector('.gh-header-title');
|
|
12
12
|
if (!titleEl)
|
|
13
13
|
return null;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"github.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/github.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,SAAS,WAAW,CAAC,GAAW;IAC9B,OAAO,2BAA2B,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,MAAM,CAAC,GAAW;IACzB,OAAO,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC9B,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB,EAAE,
|
|
1
|
+
{"version":3,"file":"github.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/github.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,SAAS,WAAW,CAAC,GAAW;IAC9B,OAAO,2BAA2B,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,MAAM,CAAC,GAAW;IACzB,OAAO,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC9B,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB,EAAE,IAAY;IACpD,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,iBAAiB,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,kBAAkB,CAAC,CAAC;IACxG,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAEhD,MAAM,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC;SAChC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;SACzC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,MAAM,aAAa,GAAG,QAAQ,CAAC,gBAAgB,CAAC,uBAAuB,CAAC,CAAC;IACzE,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAE5C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,QAAQ,CAAC,IAAI,CAAC,eAAe,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QAC5C,MAAM,IAAI,GAAI,IAAgB,CAAC,SAAS,CAAC;QACzC,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1C,IAAI,EAAE,EAAE,CAAC;YACP,QAAQ,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEvC,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,QAAkB;IACvC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,QAAQ,CAAC;IAEzD,MAAM,UAAU,GACd,QAAQ,CAAC,aAAa,CAAC,wBAAwB,CAAC;QAChD,QAAQ,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAE3C,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAE7B,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,UAAsB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC7E,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAAC,QAAkB;IACrC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAEjD,MAAM,SAAS,GACb,QAAQ,CAAC,aAAa,CAAC,oBAAoB,CAAC;QAC5C,QAAQ,CAAC,aAAa,CAAC,YAAY,CAAC;QACpC,QAAQ,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAE3C,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC;IAE5B,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,SAAqB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC5E,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,MAAM,eAAe,GAAc;IACxC,IAAI,EAAE,QAAQ;IAEd,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,YAAY,IAAI,QAAQ,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QACvE,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,GAAW;QAC/B,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;YACrB,OAAO,YAAY,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACrC,CAAC;QAED,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YAChB,OAAO,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC/B,CAAC;QAED,OAAO,aAAa,CAAC,QAAQ,CAAC,CAAC;IACjC,CAAC;CACF,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mdn.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/mdn.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,MAAM,eAAe,GAAG;IACtB,KAAK;IACL,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,WAAW;CACZ,CAAC;AAEF,MAAM,CAAC,MAAM,YAAY,GAAc;IACrC,IAAI,EAAE,KAAK;IAEX,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,uBAAuB,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,
|
|
1
|
+
{"version":3,"file":"mdn.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/mdn.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,MAAM,eAAe,GAAG;IACtB,KAAK;IACL,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,WAAW;CACZ,CAAC;AAEF,MAAM,CAAC,MAAM,YAAY,GAAc;IACrC,IAAI,EAAE,KAAK;IAEX,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,uBAAuB,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,IAAY;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,MAAM,OAAO,GACX,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC;YACnD,QAAQ,CAAC,aAAa,CAAC,kBAAkB,CAAC;YAC1C,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAEpC,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,KAAK,MAAM,QAAQ,IAAI,eAAe,EAAE,CAAC;YACvC,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;gBAChE,EAAE,CAAC,UAAU,EAAE,WAAW,CAAC,EAAE,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GACX,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC;YAC3B,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAElC,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;YAClC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE;YAChC,CAAC,CAAC,QAAQ,CAAC;QAEb,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,OAAmB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1E,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC;QAE3B,OAAO;YACL,KAAK;YACL,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,eAAe;SAC3B,CAAC;IACJ,CAAC;CACF,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stackoverflow.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/stackoverflow.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAQxF,SAAS,UAAU,CAAC,EAAkB;IACpC,IAAI,CAAC,EAAE;QAAE,OAAO,CAAC,CAAC;IAClB,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,YAAY,CAAC,YAAY,CAAC,IAAI,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,GAAG,CAAC;IACrF,OAAO,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB;IACtC,MAAM,SAAS,GAAG,QAAQ,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;IAChE,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,UAAU,CAAC,EAAa,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,qCAAqC,CAAC,CAAC;QACvE,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAE,MAAkB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,OAAO,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CACpB,KAAa,EACb,IAAc,EACd,KAAa,EACb,YAAoB,EACpB,OAAiB;IAEjB,MAAM,OAAO,GAAG,SAAS,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,KAAK,EAAE,CAAC;IAC7D,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC;IAE1D,MAAM,QAAQ,GAAa;QACzB,KAAK,KAAK,EAAE;QACZ,OAAO;QACP,EAAE;QACF,UAAU;KACX,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACpF,MAAM,OAAO,GAAG,CAAC,GAAG,QAAQ,EAAE,GAAG,MAAM,CAAC,CAAC;IAEzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ;YAC7B,CAAC,CAAC,8BAA8B,MAAM,CAAC,KAAK,GAAG;YAC/C,CAAC,CAAC,qBAAqB,MAAM,CAAC,KAAK,GAAG,CAAC;QACzC,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;QACzD,QAAQ,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC;AAED,MAAM,CAAC,MAAM,sBAAsB,GAAc;IAC/C,IAAI,EAAE,eAAe;IAErB,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,mBAAmB;gBACrC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC;gBACvC,QAAQ,KAAK,mBAAmB;gBAChC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC;QAC5C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,
|
|
1
|
+
{"version":3,"file":"stackoverflow.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/stackoverflow.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAQxF,SAAS,UAAU,CAAC,EAAkB;IACpC,IAAI,CAAC,EAAE;QAAE,OAAO,CAAC,CAAC;IAClB,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,YAAY,CAAC,YAAY,CAAC,IAAI,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,GAAG,CAAC;IACrF,OAAO,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB;IACtC,MAAM,SAAS,GAAG,QAAQ,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;IAChE,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,UAAU,CAAC,EAAa,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,qCAAqC,CAAC,CAAC;QACvE,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAE,MAAkB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,OAAO,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CACpB,KAAa,EACb,IAAc,EACd,KAAa,EACb,YAAoB,EACpB,OAAiB;IAEjB,MAAM,OAAO,GAAG,SAAS,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,KAAK,EAAE,CAAC;IAC7D,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC;IAE1D,MAAM,QAAQ,GAAa;QACzB,KAAK,KAAK,EAAE;QACZ,OAAO;QACP,EAAE;QACF,UAAU;KACX,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACpF,MAAM,OAAO,GAAG,CAAC,GAAG,QAAQ,EAAE,GAAG,MAAM,CAAC,CAAC;IAEzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ;YAC7B,CAAC,CAAC,8BAA8B,MAAM,CAAC,KAAK,GAAG;YAC/C,CAAC,CAAC,qBAAqB,MAAM,CAAC,KAAK,GAAG,CAAC;QACzC,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;QACzD,QAAQ,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC;AAED,MAAM,CAAC,MAAM,sBAAsB,GAAc;IAC/C,IAAI,EAAE,eAAe;IAErB,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,mBAAmB;gBACrC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC;gBACvC,QAAQ,KAAK,mBAAmB;gBAChC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC;QAC5C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,IAAY;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,qBAAqB,CAAC,CAAC;QAC9D,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,cAAc,GAAG,QAAQ,CAAC,aAAa,CAAC,mEAAmE,CAAC,CAAC;QACnH,IAAI,CAAC,cAAc;YAAE,OAAO,IAAI,CAAC;QAEjC,MAAM,YAAY,GAAI,cAA0B,CAAC,SAAS,CAAC;QAE3D,MAAM,MAAM,GAAG,QAAQ,CAAC,gBAAgB,CAAC,8DAA8D,CAAC,CAAC;QACzG,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAE1F,MAAM,UAAU,GAAG,QAAQ,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,UAAU,CAAC,UAA4B,CAAC,CAAC;QAEvD,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;QAEvC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAE1E,OAAO;YACL,KAAK;YACL,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,eAAe;SAC3B,CAAC;IACJ,CAAC;CACF,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"structured.d.ts","sourceRoot":"","sources":["../../src/extraction/structured.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAA2C,MAAM,aAAa,CAAC;AAU3F,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,cAAc,CAgB9D;AAuLD,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC"}
|