scrapex 1.0.0-alpha.1 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +164 -5
  2. package/dist/enhancer-ByjRD-t5.mjs +769 -0
  3. package/dist/enhancer-ByjRD-t5.mjs.map +1 -0
  4. package/dist/enhancer-j0xqKDJm.cjs +847 -0
  5. package/dist/enhancer-j0xqKDJm.cjs.map +1 -0
  6. package/dist/index-CDgcRnig.d.cts +268 -0
  7. package/dist/index-CDgcRnig.d.cts.map +1 -0
  8. package/dist/index-piS5wtki.d.mts +268 -0
  9. package/dist/index-piS5wtki.d.mts.map +1 -0
  10. package/dist/index.cjs +1192 -37
  11. package/dist/index.cjs.map +1 -1
  12. package/dist/index.d.cts +318 -2
  13. package/dist/index.d.cts.map +1 -1
  14. package/dist/index.d.mts +318 -2
  15. package/dist/index.d.mts.map +1 -1
  16. package/dist/index.mjs +1164 -6
  17. package/dist/index.mjs.map +1 -1
  18. package/dist/llm/index.cjs +250 -232
  19. package/dist/llm/index.cjs.map +1 -1
  20. package/dist/llm/index.d.cts +132 -85
  21. package/dist/llm/index.d.cts.map +1 -1
  22. package/dist/llm/index.d.mts +132 -85
  23. package/dist/llm/index.d.mts.map +1 -1
  24. package/dist/llm/index.mjs +243 -236
  25. package/dist/llm/index.mjs.map +1 -1
  26. package/dist/parsers/index.cjs +10 -199
  27. package/dist/parsers/index.d.cts +2 -133
  28. package/dist/parsers/index.d.mts +2 -133
  29. package/dist/parsers/index.mjs +2 -191
  30. package/dist/parsers-Bneuws8x.cjs +569 -0
  31. package/dist/parsers-Bneuws8x.cjs.map +1 -0
  32. package/dist/parsers-CwkYnyWY.mjs +482 -0
  33. package/dist/parsers-CwkYnyWY.mjs.map +1 -0
  34. package/dist/types-CadAXrme.d.mts +674 -0
  35. package/dist/types-CadAXrme.d.mts.map +1 -0
  36. package/dist/types-DPEtPihB.d.cts +674 -0
  37. package/dist/types-DPEtPihB.d.cts.map +1 -0
  38. package/package.json +15 -16
  39. package/dist/enhancer-Q6CSc1gA.mjs +0 -220
  40. package/dist/enhancer-Q6CSc1gA.mjs.map +0 -1
  41. package/dist/enhancer-oM4BhYYS.cjs +0 -268
  42. package/dist/enhancer-oM4BhYYS.cjs.map +0 -1
  43. package/dist/parsers/index.cjs.map +0 -1
  44. package/dist/parsers/index.d.cts.map +0 -1
  45. package/dist/parsers/index.d.mts.map +0 -1
  46. package/dist/parsers/index.mjs.map +0 -1
  47. package/dist/types-CNQZVW36.d.mts +0 -150
  48. package/dist/types-CNQZVW36.d.mts.map +0 -1
  49. package/dist/types-D0HYR95H.d.cts +0 -150
  50. package/dist/types-D0HYR95H.d.cts.map +0 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scrapex",
3
- "version": "1.0.0-alpha.1",
3
+ "version": "1.0.0-beta.1",
4
4
  "description": "Modern web scraper with LLM-enhanced extraction, extensible pipeline, and pluggable parsers",
5
5
  "type": "module",
6
6
  "exports": {
@@ -18,6 +18,11 @@
18
18
  "types": "./dist/llm/index.d.mts",
19
19
  "import": "./dist/llm/index.mjs",
20
20
  "require": "./dist/llm/index.cjs"
21
+ },
22
+ "./embeddings": {
23
+ "types": "./dist/embeddings/index.d.mts",
24
+ "import": "./dist/embeddings/index.mjs",
25
+ "require": "./dist/embeddings/index.cjs"
21
26
  }
22
27
  },
23
28
  "main": "./dist/index.cjs",
@@ -53,7 +58,9 @@
53
58
  "extraction",
54
59
  "readability",
55
60
  "markdown",
56
- "parser"
61
+ "parser",
62
+ "embeddings",
63
+ "vector-search"
57
64
  ],
58
65
  "author": "Rakesh Paul <https://binaryroute.com/authors/rk-paul/>",
59
66
  "license": "MIT",
@@ -63,35 +70,27 @@
63
70
  "dependencies": {
64
71
  "@mozilla/readability": "^0.6.0",
65
72
  "cheerio": "^1.1.2",
66
- "jsdom": "^27.2.0",
73
+ "jsdom": "^27.4.0",
67
74
  "mdast-util-from-markdown": "^2.0.2",
68
75
  "mdast-util-to-string": "^4.0.0",
69
76
  "turndown": "^7.2.2",
70
77
  "unist-util-visit": "^5.0.0",
71
- "zod": "^4.1.13"
78
+ "zod": "^4.3.4"
72
79
  },
73
80
  "devDependencies": {
74
- "@biomejs/biome": "^2.3.8",
81
+ "@biomejs/biome": "^2.3.10",
75
82
  "@types/jsdom": "^27.0.0",
76
83
  "@types/mdast": "^4.0.4",
77
84
  "@types/node": "^22.10.0",
78
85
  "@types/turndown": "^5.0.6",
79
- "tsdown": "^0.17.0",
86
+ "tsdown": "^0.18.4",
80
87
  "typescript": "^5.9.3",
81
- "vitest": "^4.0.15"
88
+ "vitest": "^4.0.16"
82
89
  },
83
90
  "peerDependencies": {
84
- "@anthropic-ai/sdk": ">=0.30.0",
85
- "openai": ">=4.0.0",
86
- "puppeteer": ">=23.0.0"
91
+ "puppeteer": "^24.34.0"
87
92
  },
88
93
  "peerDependenciesMeta": {
89
- "@anthropic-ai/sdk": {
90
- "optional": true
91
- },
92
- "openai": {
93
- "optional": true
94
- },
95
94
  "puppeteer": {
96
95
  "optional": true
97
96
  }
@@ -1,220 +0,0 @@
1
- import { z } from "zod";
2
-
3
- //#region src/core/errors.ts
4
- /**
5
- * Custom error class for scraping failures with structured error codes
6
- */
7
- var ScrapeError = class ScrapeError extends Error {
8
- code;
9
- statusCode;
10
- constructor(message, code, statusCode, cause) {
11
- super(message, { cause });
12
- this.name = "ScrapeError";
13
- this.code = code;
14
- this.statusCode = statusCode;
15
- if (Error.captureStackTrace) Error.captureStackTrace(this, ScrapeError);
16
- }
17
- /**
18
- * Create a ScrapeError from an unknown error
19
- */
20
- static from(error, code = "FETCH_FAILED") {
21
- if (error instanceof ScrapeError) return error;
22
- if (error instanceof Error) return new ScrapeError(error.message, code, void 0, error);
23
- return new ScrapeError(String(error), code);
24
- }
25
- /**
26
- * Check if error is retryable (network issues, timeouts)
27
- */
28
- isRetryable() {
29
- return this.code === "FETCH_FAILED" || this.code === "TIMEOUT";
30
- }
31
- /**
32
- * Convert to a plain object for serialization
33
- */
34
- toJSON() {
35
- return {
36
- name: this.name,
37
- message: this.message,
38
- code: this.code,
39
- statusCode: this.statusCode,
40
- stack: this.stack
41
- };
42
- }
43
- };
44
-
45
- //#endregion
46
- //#region src/llm/types.ts
47
- /**
48
- * Zod schemas for LLM outputs
49
- */
50
- const SummarySchema = z.object({ summary: z.string().describe("A concise 2-3 sentence summary of the content") });
51
- const TagsSchema = z.object({ tags: z.array(z.string()).describe("5-10 relevant tags/keywords") });
52
- const EntitiesSchema = z.object({
53
- people: z.array(z.string()).describe("People mentioned"),
54
- organizations: z.array(z.string()).describe("Organizations/companies"),
55
- technologies: z.array(z.string()).describe("Technologies/tools/frameworks"),
56
- locations: z.array(z.string()).describe("Locations/places"),
57
- concepts: z.array(z.string()).describe("Key concepts/topics")
58
- });
59
- const ClassifySchema = z.object({
60
- contentType: z.enum([
61
- "article",
62
- "repo",
63
- "docs",
64
- "package",
65
- "video",
66
- "tool",
67
- "product",
68
- "unknown"
69
- ]).describe("The type of content"),
70
- confidence: z.number().min(0).max(1).describe("Confidence score 0-1")
71
- });
72
-
73
- //#endregion
74
- //#region src/llm/enhancer.ts
75
- /**
76
- * Enhance scraped data with LLM-powered features
77
- */
78
- async function enhance(data, provider, types) {
79
- const results = {};
80
- const content = data.excerpt || data.textContent.slice(0, 1e4);
81
- const context = `Title: ${data.title}\nURL: ${data.url}\n\nContent:\n${content}`;
82
- const promises = [];
83
- if (types.includes("summarize")) promises.push(summarize(context, provider).then((summary) => {
84
- results.summary = summary;
85
- }));
86
- if (types.includes("tags")) promises.push(extractTags(context, provider).then((tags) => {
87
- results.suggestedTags = tags;
88
- }));
89
- if (types.includes("entities")) promises.push(extractEntities(context, provider).then((entities) => {
90
- results.entities = entities;
91
- }));
92
- if (types.includes("classify")) promises.push(classify(context, provider).then((classification) => {
93
- if (classification.confidence > .7) results.contentType = classification.contentType;
94
- }));
95
- await Promise.all(promises);
96
- return results;
97
- }
98
- /**
99
- * Ask a custom question about the scraped content
100
- * Results are stored in the `custom` field of ScrapedData
101
- */
102
- async function ask(data, provider, prompt, options) {
103
- const key = options?.key || "response";
104
- const content = data.excerpt || data.textContent.slice(0, 1e4);
105
- const processedPrompt = applyPlaceholders(prompt, data, content);
106
- if (options?.schema) {
107
- const result = await extract(data, provider, options.schema, processedPrompt);
108
- return { custom: { [key]: result } };
109
- }
110
- const fullPrompt = prompt.includes("{{content}}") ? processedPrompt : `${processedPrompt}\n\nTitle: ${data.title}\nURL: ${data.url}\n\nContent:\n${content}`;
111
- const response = await provider.complete(fullPrompt);
112
- return { custom: { [key]: response } };
113
- }
114
- /**
115
- * Apply placeholder replacements to a prompt template
116
- */
117
- function applyPlaceholders(prompt, data, content) {
118
- const domain = (() => {
119
- try {
120
- return new URL(data.url).hostname;
121
- } catch {
122
- return "";
123
- }
124
- })();
125
- return prompt.replace(/\{\{title\}\}/g, data.title).replace(/\{\{url\}\}/g, data.url).replace(/\{\{content\}\}/g, content).replace(/\{\{description\}\}/g, data.description || "").replace(/\{\{excerpt\}\}/g, data.excerpt || "").replace(/\{\{domain\}\}/g, domain);
126
- }
127
- /**
128
- * Extract structured data using LLM and a custom schema
129
- */
130
- async function extract(data, provider, schema, promptTemplate) {
131
- const zodShape = {};
132
- for (const [key, type] of Object.entries(schema)) {
133
- const isOptional = type.endsWith("?");
134
- const baseType = isOptional ? type.slice(0, -1) : type;
135
- let zodType;
136
- switch (baseType) {
137
- case "string":
138
- zodType = z.string();
139
- break;
140
- case "number":
141
- zodType = z.number();
142
- break;
143
- case "boolean":
144
- zodType = z.boolean();
145
- break;
146
- case "string[]":
147
- zodType = z.array(z.string());
148
- break;
149
- case "number[]":
150
- zodType = z.array(z.number());
151
- break;
152
- default: zodType = z.string();
153
- }
154
- zodShape[key] = isOptional ? zodType.optional() : zodType;
155
- }
156
- const zodSchema = z.object(zodShape);
157
- const content = data.textContent.slice(0, 4e3);
158
- let prompt;
159
- if (promptTemplate) {
160
- prompt = applyPlaceholders(promptTemplate, data, content);
161
- if (!promptTemplate.includes("{{content}}")) prompt += `\n\nContext:\n${content}`;
162
- } else prompt = `Extract the following information from this content:
163
-
164
- Title: ${data.title}
165
- URL: ${data.url}
166
-
167
- Content:
168
- ${content}
169
-
170
- Extract these fields:
171
- ${Object.entries(schema).map(([key, type]) => `- ${key} (${type})`).join("\n")}`;
172
- return provider.completeJSON(prompt, zodSchema);
173
- }
174
- /**
175
- * Generate a summary of the content
176
- */
177
- async function summarize(context, provider) {
178
- const prompt = `Summarize the following content in 2-3 concise sentences:
179
-
180
- ${context}`;
181
- return (await provider.completeJSON(prompt, SummarySchema)).summary;
182
- }
183
- /**
184
- * Extract relevant tags/keywords
185
- */
186
- async function extractTags(context, provider) {
187
- const prompt = `Extract 5-10 relevant tags or keywords from the following content. Focus on technologies, concepts, and topics mentioned:
188
-
189
- ${context}`;
190
- return (await provider.completeJSON(prompt, TagsSchema)).tags;
191
- }
192
- /**
193
- * Extract named entities from content
194
- */
195
- async function extractEntities(context, provider) {
196
- const prompt = `Extract named entities from the following content. Identify people, organizations, technologies, locations, and key concepts:
197
-
198
- ${context}`;
199
- return provider.completeJSON(prompt, EntitiesSchema);
200
- }
201
- /**
202
- * Classify content type using LLM
203
- */
204
- async function classify(context, provider) {
205
- const prompt = `Classify the following content into one of these categories:
206
- - article: Blog post, news article, essay
207
- - repo: Code repository, open source project
208
- - docs: Documentation, API reference, guides
209
- - package: npm/pip package page
210
- - video: Video content, YouTube
211
- - tool: Software tool, web application
212
- - product: Commercial product, e-commerce
213
-
214
- ${context}`;
215
- return provider.completeJSON(prompt, ClassifySchema);
216
- }
217
-
218
- //#endregion
219
- export { EntitiesSchema as a, ScrapeError as c, ClassifySchema as i, enhance as n, SummarySchema as o, extract as r, TagsSchema as s, ask as t };
220
- //# sourceMappingURL=enhancer-Q6CSc1gA.mjs.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"enhancer-Q6CSc1gA.mjs","names":["results: Partial<ScrapedData>","promises: Promise<void>[]","zodShape: Record<string, z.ZodTypeAny>","zodType: z.ZodTypeAny","prompt: string"],"sources":["../src/core/errors.ts","../src/llm/types.ts","../src/llm/enhancer.ts"],"sourcesContent":["/**\n * Error codes for scraping failures\n */\nexport type ScrapeErrorCode =\n | 'FETCH_FAILED'\n | 'TIMEOUT'\n | 'INVALID_URL'\n | 'BLOCKED'\n | 'NOT_FOUND'\n | 'ROBOTS_BLOCKED'\n | 'PARSE_ERROR'\n | 'LLM_ERROR'\n | 'VALIDATION_ERROR';\n\n/**\n * Custom error class for scraping failures with structured error codes\n */\nexport class ScrapeError extends Error {\n public readonly code: ScrapeErrorCode;\n public readonly statusCode?: number;\n\n constructor(message: string, code: ScrapeErrorCode, statusCode?: number, cause?: Error) {\n super(message, { cause });\n this.name = 'ScrapeError';\n this.code = code;\n this.statusCode = statusCode;\n\n // Maintains proper stack trace in V8 environments\n if (Error.captureStackTrace) {\n Error.captureStackTrace(this, ScrapeError);\n }\n }\n\n /**\n * Create a ScrapeError from an unknown error\n */\n static from(error: unknown, code: ScrapeErrorCode = 'FETCH_FAILED'): ScrapeError {\n if (error instanceof ScrapeError) {\n return error;\n }\n\n if (error instanceof Error) {\n return new ScrapeError(error.message, code, undefined, error);\n }\n\n return new ScrapeError(String(error), code);\n }\n\n /**\n * Check if error is retryable (network issues, timeouts)\n */\n isRetryable(): boolean {\n return this.code === 'FETCH_FAILED' || this.code === 'TIMEOUT';\n }\n\n /**\n * Convert to a plain object for serialization\n */\n toJSON(): Record<string, unknown> {\n return {\n name: this.name,\n message: this.message,\n code: this.code,\n statusCode: this.statusCode,\n stack: this.stack,\n };\n }\n}\n","import { z } from 'zod';\n\n/**\n * LLM completion options\n */\nexport interface CompletionOptions {\n maxTokens?: number;\n temperature?: number;\n systemPrompt?: string;\n}\n\n/**\n * LLM Provider interface - implemented by all providers\n */\nexport interface LLMProvider {\n readonly name: string;\n\n /**\n * Generate a text completion\n */\n complete(prompt: string, options?: CompletionOptions): Promise<string>;\n\n /**\n * Generate a structured JSON completion with Zod validation\n */\n completeJSON<T>(prompt: string, schema: z.ZodType<T>, options?: CompletionOptions): Promise<T>;\n}\n\n/**\n * Provider configuration for Anthropic\n */\nexport interface AnthropicConfig {\n apiKey?: string; // Falls back to ANTHROPIC_API_KEY env var\n model?: string; // Default: claude-3-haiku-20240307\n baseUrl?: string;\n}\n\n/**\n * Provider configuration for OpenAI-compatible APIs\n * Works with: OpenAI, Ollama, LM Studio, LocalAI, vLLM, etc.\n */\nexport interface OpenAICompatibleConfig {\n apiKey?: string; // Falls back to OPENAI_API_KEY env var\n model?: string; // Default: gpt-4o-mini\n baseUrl?: string; // Default: https://api.openai.com/v1\n}\n\n/**\n * Enhancement result types\n */\nexport interface SummaryResult {\n summary: string;\n}\n\nexport interface TagsResult {\n tags: string[];\n}\n\nexport interface EntitiesResult {\n people: string[];\n organizations: string[];\n technologies: string[];\n locations: string[];\n concepts: string[];\n}\n\nexport interface ClassifyResult {\n contentType: string;\n confidence: number;\n}\n\n/**\n * Zod schemas for LLM outputs\n */\nexport const SummarySchema = z.object({\n summary: z.string().describe('A concise 2-3 sentence summary of the content'),\n});\n\nexport const TagsSchema = z.object({\n tags: z.array(z.string()).describe('5-10 relevant tags/keywords'),\n});\n\nexport const EntitiesSchema = z.object({\n people: z.array(z.string()).describe('People mentioned'),\n organizations: z.array(z.string()).describe('Organizations/companies'),\n technologies: z.array(z.string()).describe('Technologies/tools/frameworks'),\n locations: z.array(z.string()).describe('Locations/places'),\n concepts: z.array(z.string()).describe('Key concepts/topics'),\n});\n\nexport const ClassifySchema = z.object({\n contentType: z\n .enum(['article', 'repo', 'docs', 'package', 'video', 'tool', 'product', 'unknown'])\n .describe('The type of content'),\n confidence: z.number().min(0).max(1).describe('Confidence score 0-1'),\n});\n","import { z } from 'zod';\nimport type {\n EnhancementType,\n ExtractedEntities,\n ExtractionSchema,\n ScrapedData,\n} from '@/core/types.js';\nimport type { LLMProvider } from './types.js';\nimport { ClassifySchema, EntitiesSchema, SummarySchema, TagsSchema } from './types.js';\n\n/**\n * Enhance scraped data with LLM-powered features\n */\nexport async function enhance(\n data: ScrapedData,\n provider: LLMProvider,\n types: EnhancementType[]\n): Promise<Partial<ScrapedData>> {\n const results: Partial<ScrapedData> = {};\n\n // Prepare content for LLM (use excerpt/textContent to save tokens)\n const content = data.excerpt || data.textContent.slice(0, 10000);\n const context = `Title: ${data.title}\\nURL: ${data.url}\\n\\nContent:\\n${content}`;\n\n // Run enhancements in parallel\n const promises: Promise<void>[] = [];\n\n if (types.includes('summarize')) {\n promises.push(\n summarize(context, provider).then((summary) => {\n results.summary = summary;\n })\n );\n }\n\n if (types.includes('tags')) {\n promises.push(\n extractTags(context, provider).then((tags) => {\n results.suggestedTags = tags;\n })\n );\n }\n\n if (types.includes('entities')) {\n promises.push(\n extractEntities(context, provider).then((entities) => {\n results.entities = entities;\n })\n );\n }\n\n if (types.includes('classify')) {\n promises.push(\n classify(context, provider).then((classification) => {\n if (classification.confidence > 0.7) {\n results.contentType = classification.contentType as ScrapedData['contentType'];\n }\n })\n );\n }\n\n await Promise.all(promises);\n\n return results;\n}\n\n/**\n * Options for the ask() function\n */\nexport interface AskOptions {\n /** Key to store the result under in custom field */\n key?: string;\n /** Schema for structured response */\n schema?: ExtractionSchema;\n}\n\n/**\n * Ask a custom question about the scraped content\n * Results are stored in the `custom` field of ScrapedData\n */\nexport async function ask(\n data: ScrapedData,\n provider: LLMProvider,\n prompt: string,\n options?: AskOptions\n): Promise<Partial<ScrapedData>> {\n const key = options?.key || 'response';\n const content = data.excerpt || data.textContent.slice(0, 10000);\n\n // Apply placeholder replacements\n const processedPrompt = applyPlaceholders(prompt, data, content);\n\n if (options?.schema) {\n // Use structured extraction\n const result = await extract(data, provider, options.schema, processedPrompt);\n return { custom: { [key]: result } };\n }\n\n // Simple string response\n const fullPrompt = prompt.includes('{{content}}')\n ? processedPrompt\n : `${processedPrompt}\\n\\nTitle: ${data.title}\\nURL: ${data.url}\\n\\nContent:\\n${content}`;\n\n const response = await provider.complete(fullPrompt);\n return { custom: { [key]: response } };\n}\n\n/**\n * Apply placeholder replacements to a prompt template\n */\nfunction applyPlaceholders(prompt: string, data: ScrapedData, content: string): string {\n const domain = (() => {\n try {\n return new URL(data.url).hostname;\n } catch {\n return '';\n }\n })();\n\n return prompt\n .replace(/\\{\\{title\\}\\}/g, data.title)\n .replace(/\\{\\{url\\}\\}/g, data.url)\n .replace(/\\{\\{content\\}\\}/g, content)\n .replace(/\\{\\{description\\}\\}/g, data.description || '')\n .replace(/\\{\\{excerpt\\}\\}/g, data.excerpt || '')\n .replace(/\\{\\{domain\\}\\}/g, domain);\n}\n\n/**\n * Extract structured data using LLM and a custom schema\n */\nexport async function extract<T>(\n data: ScrapedData,\n provider: LLMProvider,\n schema: ExtractionSchema,\n promptTemplate?: string\n): Promise<T> {\n // Convert simple schema to Zod schema\n const zodShape: Record<string, z.ZodTypeAny> = {};\n\n for (const [key, type] of Object.entries(schema)) {\n const isOptional = type.endsWith('?');\n const baseType = isOptional ? type.slice(0, -1) : type;\n\n let zodType: z.ZodTypeAny;\n switch (baseType) {\n case 'string':\n zodType = z.string();\n break;\n case 'number':\n zodType = z.number();\n break;\n case 'boolean':\n zodType = z.boolean();\n break;\n case 'string[]':\n zodType = z.array(z.string());\n break;\n case 'number[]':\n zodType = z.array(z.number());\n break;\n default:\n zodType = z.string();\n }\n\n zodShape[key] = isOptional ? zodType.optional() : zodType;\n }\n\n const zodSchema = z.object(zodShape) as unknown as z.ZodType<T>;\n\n const content = data.textContent.slice(0, 4000);\n\n let prompt: string;\n\n if (promptTemplate) {\n // Apply all placeholder replacements\n prompt = applyPlaceholders(promptTemplate, data, content);\n\n // If content wasn't included via placeholder, append it\n if (!promptTemplate.includes('{{content}}')) {\n prompt += `\\n\\nContext:\\n${content}`;\n }\n } else {\n prompt = `Extract the following information from this content:\n\nTitle: ${data.title}\nURL: ${data.url}\n\nContent:\n${content}\n\nExtract these fields:\n${Object.entries(schema)\n .map(([key, type]) => `- ${key} (${type})`)\n .join('\\n')}`;\n }\n\n return provider.completeJSON<T>(prompt, zodSchema as z.ZodType<T>);\n}\n\n/**\n * Generate a summary of the content\n */\nasync function summarize(context: string, provider: LLMProvider): Promise<string> {\n const prompt = `Summarize the following content in 2-3 concise sentences:\n\n${context}`;\n\n const result = await provider.completeJSON(prompt, SummarySchema);\n return result.summary;\n}\n\n/**\n * Extract relevant tags/keywords\n */\nasync function extractTags(context: string, provider: LLMProvider): Promise<string[]> {\n const prompt = `Extract 5-10 relevant tags or keywords from the following content. Focus on technologies, concepts, and topics mentioned:\n\n${context}`;\n\n const result = await provider.completeJSON(prompt, TagsSchema);\n return result.tags;\n}\n\n/**\n * Extract named entities from content\n */\nasync function extractEntities(context: string, provider: LLMProvider): Promise<ExtractedEntities> {\n const prompt = `Extract named entities from the following content. Identify people, organizations, technologies, locations, and key concepts:\n\n${context}`;\n\n return provider.completeJSON(prompt, EntitiesSchema);\n}\n\n/**\n * Classify content type using LLM\n */\nasync function classify(\n context: string,\n provider: LLMProvider\n): Promise<{ contentType: string; confidence: number }> {\n const prompt = `Classify the following content into one of these categories:\n- article: Blog post, news article, essay\n- repo: Code repository, open source project\n- docs: Documentation, API reference, guides\n- package: npm/pip package page\n- video: Video content, YouTube\n- tool: Software tool, web application\n- product: Commercial product, e-commerce\n\n${context}`;\n\n return provider.completeJSON(prompt, ClassifySchema);\n}\n"],"mappings":";;;;;;AAiBA,IAAa,cAAb,MAAa,oBAAoB,MAAM;CACrC,AAAgB;CAChB,AAAgB;CAEhB,YAAY,SAAiB,MAAuB,YAAqB,OAAe;AACtF,QAAM,SAAS,EAAE,OAAO,CAAC;AACzB,OAAK,OAAO;AACZ,OAAK,OAAO;AACZ,OAAK,aAAa;AAGlB,MAAI,MAAM,kBACR,OAAM,kBAAkB,MAAM,YAAY;;;;;CAO9C,OAAO,KAAK,OAAgB,OAAwB,gBAA6B;AAC/E,MAAI,iBAAiB,YACnB,QAAO;AAGT,MAAI,iBAAiB,MACnB,QAAO,IAAI,YAAY,MAAM,SAAS,MAAM,QAAW,MAAM;AAG/D,SAAO,IAAI,YAAY,OAAO,MAAM,EAAE,KAAK;;;;;CAM7C,cAAuB;AACrB,SAAO,KAAK,SAAS,kBAAkB,KAAK,SAAS;;;;;CAMvD,SAAkC;AAChC,SAAO;GACL,MAAM,KAAK;GACX,SAAS,KAAK;GACd,MAAM,KAAK;GACX,YAAY,KAAK;GACjB,OAAO,KAAK;GACb;;;;;;;;;ACSL,MAAa,gBAAgB,EAAE,OAAO,EACpC,SAAS,EAAE,QAAQ,CAAC,SAAS,gDAAgD,EAC9E,CAAC;AAEF,MAAa,aAAa,EAAE,OAAO,EACjC,MAAM,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,SAAS,8BAA8B,EAClE,CAAC;AAEF,MAAa,iBAAiB,EAAE,OAAO;CACrC,QAAQ,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,SAAS,mBAAmB;CACxD,eAAe,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,SAAS,0BAA0B;CACtE,cAAc,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,SAAS,gCAAgC;CAC3E,WAAW,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,SAAS,mBAAmB;CAC3D,UAAU,EAAE,MAAM,EAAE,QAAQ,CAAC,CAAC,SAAS,sBAAsB;CAC9D,CAAC;AAEF,MAAa,iBAAiB,EAAE,OAAO;CACrC,aAAa,EACV,KAAK;EAAC;EAAW;EAAQ;EAAQ;EAAW;EAAS;EAAQ;EAAW;EAAU,CAAC,CACnF,SAAS,sBAAsB;CAClC,YAAY,EAAE,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,SAAS,uBAAuB;CACtE,CAAC;;;;;;;AClFF,eAAsB,QACpB,MACA,UACA,OAC+B;CAC/B,MAAMA,UAAgC,EAAE;CAGxC,MAAM,UAAU,KAAK,WAAW,KAAK,YAAY,MAAM,GAAG,IAAM;CAChE,MAAM,UAAU,UAAU,KAAK,MAAM,SAAS,KAAK,IAAI,gBAAgB;CAGvE,MAAMC,WAA4B,EAAE;AAEpC,KAAI,MAAM,SAAS,YAAY,CAC7B,UAAS,KACP,UAAU,SAAS,SAAS,CAAC,MAAM,YAAY;AAC7C,UAAQ,UAAU;GAClB,CACH;AAGH,KAAI,MAAM,SAAS,OAAO,CACxB,UAAS,KACP,YAAY,SAAS,SAAS,CAAC,MAAM,SAAS;AAC5C,UAAQ,gBAAgB;GACxB,CACH;AAGH,KAAI,MAAM,SAAS,WAAW,CAC5B,UAAS,KACP,gBAAgB,SAAS,SAAS,CAAC,MAAM,aAAa;AACpD,UAAQ,WAAW;GACnB,CACH;AAGH,KAAI,MAAM,SAAS,WAAW,CAC5B,UAAS,KACP,SAAS,SAAS,SAAS,CAAC,MAAM,mBAAmB;AACnD,MAAI,eAAe,aAAa,GAC9B,SAAQ,cAAc,eAAe;GAEvC,CACH;AAGH,OAAM,QAAQ,IAAI,SAAS;AAE3B,QAAO;;;;;;AAiBT,eAAsB,IACpB,MACA,UACA,QACA,SAC+B;CAC/B,MAAM,MAAM,SAAS,OAAO;CAC5B,MAAM,UAAU,KAAK,WAAW,KAAK,YAAY,MAAM,GAAG,IAAM;CAGhE,MAAM,kBAAkB,kBAAkB,QAAQ,MAAM,QAAQ;AAEhE,KAAI,SAAS,QAAQ;EAEnB,MAAM,SAAS,MAAM,QAAQ,MAAM,UAAU,QAAQ,QAAQ,gBAAgB;AAC7E,SAAO,EAAE,QAAQ,GAAG,MAAM,QAAQ,EAAE;;CAItC,MAAM,aAAa,OAAO,SAAS,cAAc,GAC7C,kBACA,GAAG,gBAAgB,aAAa,KAAK,MAAM,SAAS,KAAK,IAAI,gBAAgB;CAEjF,MAAM,WAAW,MAAM,SAAS,SAAS,WAAW;AACpD,QAAO,EAAE,QAAQ,GAAG,MAAM,UAAU,EAAE;;;;;AAMxC,SAAS,kBAAkB,QAAgB,MAAmB,SAAyB;CACrF,MAAM,gBAAgB;AACpB,MAAI;AACF,UAAO,IAAI,IAAI,KAAK,IAAI,CAAC;UACnB;AACN,UAAO;;KAEP;AAEJ,QAAO,OACJ,QAAQ,kBAAkB,KAAK,MAAM,CACrC,QAAQ,gBAAgB,KAAK,IAAI,CACjC,QAAQ,oBAAoB,QAAQ,CACpC,QAAQ,wBAAwB,KAAK,eAAe,GAAG,CACvD,QAAQ,oBAAoB,KAAK,WAAW,GAAG,CAC/C,QAAQ,mBAAmB,OAAO;;;;;AAMvC,eAAsB,QACpB,MACA,UACA,QACA,gBACY;CAEZ,MAAMC,WAAyC,EAAE;AAEjD,MAAK,MAAM,CAAC,KAAK,SAAS,OAAO,QAAQ,OAAO,EAAE;EAChD,MAAM,aAAa,KAAK,SAAS,IAAI;EACrC,MAAM,WAAW,aAAa,KAAK,MAAM,GAAG,GAAG,GAAG;EAElD,IAAIC;AACJ,UAAQ,UAAR;GACE,KAAK;AACH,cAAU,EAAE,QAAQ;AACpB;GACF,KAAK;AACH,cAAU,EAAE,QAAQ;AACpB;GACF,KAAK;AACH,cAAU,EAAE,SAAS;AACrB;GACF,KAAK;AACH,cAAU,EAAE,MAAM,EAAE,QAAQ,CAAC;AAC7B;GACF,KAAK;AACH,cAAU,EAAE,MAAM,EAAE,QAAQ,CAAC;AAC7B;GACF,QACE,WAAU,EAAE,QAAQ;;AAGxB,WAAS,OAAO,aAAa,QAAQ,UAAU,GAAG;;CAGpD,MAAM,YAAY,EAAE,OAAO,SAAS;CAEpC,MAAM,UAAU,KAAK,YAAY,MAAM,GAAG,IAAK;CAE/C,IAAIC;AAEJ,KAAI,gBAAgB;AAElB,WAAS,kBAAkB,gBAAgB,MAAM,QAAQ;AAGzD,MAAI,CAAC,eAAe,SAAS,cAAc,CACzC,WAAU,iBAAiB;OAG7B,UAAS;;SAEJ,KAAK,MAAM;OACb,KAAK,IAAI;;;EAGd,QAAQ;;;EAGR,OAAO,QAAQ,OAAO,CACrB,KAAK,CAAC,KAAK,UAAU,KAAK,IAAI,IAAI,KAAK,GAAG,CAC1C,KAAK,KAAK;AAGX,QAAO,SAAS,aAAgB,QAAQ,UAA0B;;;;;AAMpE,eAAe,UAAU,SAAiB,UAAwC;CAChF,MAAM,SAAS;;EAEf;AAGA,SADe,MAAM,SAAS,aAAa,QAAQ,cAAc,EACnD;;;;;AAMhB,eAAe,YAAY,SAAiB,UAA0C;CACpF,MAAM,SAAS;;EAEf;AAGA,SADe,MAAM,SAAS,aAAa,QAAQ,WAAW,EAChD;;;;;AAMhB,eAAe,gBAAgB,SAAiB,UAAmD;CACjG,MAAM,SAAS;;EAEf;AAEA,QAAO,SAAS,aAAa,QAAQ,eAAe;;;;;AAMtD,eAAe,SACb,SACA,UACsD;CACtD,MAAM,SAAS;;;;;;;;;EASf;AAEA,QAAO,SAAS,aAAa,QAAQ,eAAe"}
@@ -1,268 +0,0 @@
1
- const require_index = require('./index.cjs');
2
- let zod = require("zod");
3
-
4
- //#region src/core/errors.ts
5
- /**
6
- * Custom error class for scraping failures with structured error codes
7
- */
8
- var ScrapeError = class ScrapeError extends Error {
9
- code;
10
- statusCode;
11
- constructor(message, code, statusCode, cause) {
12
- super(message, { cause });
13
- this.name = "ScrapeError";
14
- this.code = code;
15
- this.statusCode = statusCode;
16
- if (Error.captureStackTrace) Error.captureStackTrace(this, ScrapeError);
17
- }
18
- /**
19
- * Create a ScrapeError from an unknown error
20
- */
21
- static from(error, code = "FETCH_FAILED") {
22
- if (error instanceof ScrapeError) return error;
23
- if (error instanceof Error) return new ScrapeError(error.message, code, void 0, error);
24
- return new ScrapeError(String(error), code);
25
- }
26
- /**
27
- * Check if error is retryable (network issues, timeouts)
28
- */
29
- isRetryable() {
30
- return this.code === "FETCH_FAILED" || this.code === "TIMEOUT";
31
- }
32
- /**
33
- * Convert to a plain object for serialization
34
- */
35
- toJSON() {
36
- return {
37
- name: this.name,
38
- message: this.message,
39
- code: this.code,
40
- statusCode: this.statusCode,
41
- stack: this.stack
42
- };
43
- }
44
- };
45
-
46
- //#endregion
47
- //#region src/llm/types.ts
48
- /**
49
- * Zod schemas for LLM outputs
50
- */
51
- const SummarySchema = zod.z.object({ summary: zod.z.string().describe("A concise 2-3 sentence summary of the content") });
52
- const TagsSchema = zod.z.object({ tags: zod.z.array(zod.z.string()).describe("5-10 relevant tags/keywords") });
53
- const EntitiesSchema = zod.z.object({
54
- people: zod.z.array(zod.z.string()).describe("People mentioned"),
55
- organizations: zod.z.array(zod.z.string()).describe("Organizations/companies"),
56
- technologies: zod.z.array(zod.z.string()).describe("Technologies/tools/frameworks"),
57
- locations: zod.z.array(zod.z.string()).describe("Locations/places"),
58
- concepts: zod.z.array(zod.z.string()).describe("Key concepts/topics")
59
- });
60
- const ClassifySchema = zod.z.object({
61
- contentType: zod.z.enum([
62
- "article",
63
- "repo",
64
- "docs",
65
- "package",
66
- "video",
67
- "tool",
68
- "product",
69
- "unknown"
70
- ]).describe("The type of content"),
71
- confidence: zod.z.number().min(0).max(1).describe("Confidence score 0-1")
72
- });
73
-
74
- //#endregion
75
- //#region src/llm/enhancer.ts
76
- /**
77
- * Enhance scraped data with LLM-powered features
78
- */
79
- async function enhance(data, provider, types) {
80
- const results = {};
81
- const content = data.excerpt || data.textContent.slice(0, 1e4);
82
- const context = `Title: ${data.title}\nURL: ${data.url}\n\nContent:\n${content}`;
83
- const promises = [];
84
- if (types.includes("summarize")) promises.push(summarize(context, provider).then((summary) => {
85
- results.summary = summary;
86
- }));
87
- if (types.includes("tags")) promises.push(extractTags(context, provider).then((tags) => {
88
- results.suggestedTags = tags;
89
- }));
90
- if (types.includes("entities")) promises.push(extractEntities(context, provider).then((entities) => {
91
- results.entities = entities;
92
- }));
93
- if (types.includes("classify")) promises.push(classify(context, provider).then((classification) => {
94
- if (classification.confidence > .7) results.contentType = classification.contentType;
95
- }));
96
- await Promise.all(promises);
97
- return results;
98
- }
99
- /**
100
- * Ask a custom question about the scraped content
101
- * Results are stored in the `custom` field of ScrapedData
102
- */
103
- async function ask(data, provider, prompt, options) {
104
- const key = options?.key || "response";
105
- const content = data.excerpt || data.textContent.slice(0, 1e4);
106
- const processedPrompt = applyPlaceholders(prompt, data, content);
107
- if (options?.schema) {
108
- const result = await extract(data, provider, options.schema, processedPrompt);
109
- return { custom: { [key]: result } };
110
- }
111
- const fullPrompt = prompt.includes("{{content}}") ? processedPrompt : `${processedPrompt}\n\nTitle: ${data.title}\nURL: ${data.url}\n\nContent:\n${content}`;
112
- const response = await provider.complete(fullPrompt);
113
- return { custom: { [key]: response } };
114
- }
115
- /**
116
- * Apply placeholder replacements to a prompt template
117
- */
118
- function applyPlaceholders(prompt, data, content) {
119
- const domain = (() => {
120
- try {
121
- return new URL(data.url).hostname;
122
- } catch {
123
- return "";
124
- }
125
- })();
126
- return prompt.replace(/\{\{title\}\}/g, data.title).replace(/\{\{url\}\}/g, data.url).replace(/\{\{content\}\}/g, content).replace(/\{\{description\}\}/g, data.description || "").replace(/\{\{excerpt\}\}/g, data.excerpt || "").replace(/\{\{domain\}\}/g, domain);
127
- }
128
- /**
129
- * Extract structured data using LLM and a custom schema
130
- */
131
- async function extract(data, provider, schema, promptTemplate) {
132
- const zodShape = {};
133
- for (const [key, type] of Object.entries(schema)) {
134
- const isOptional = type.endsWith("?");
135
- const baseType = isOptional ? type.slice(0, -1) : type;
136
- let zodType;
137
- switch (baseType) {
138
- case "string":
139
- zodType = zod.z.string();
140
- break;
141
- case "number":
142
- zodType = zod.z.number();
143
- break;
144
- case "boolean":
145
- zodType = zod.z.boolean();
146
- break;
147
- case "string[]":
148
- zodType = zod.z.array(zod.z.string());
149
- break;
150
- case "number[]":
151
- zodType = zod.z.array(zod.z.number());
152
- break;
153
- default: zodType = zod.z.string();
154
- }
155
- zodShape[key] = isOptional ? zodType.optional() : zodType;
156
- }
157
- const zodSchema = zod.z.object(zodShape);
158
- const content = data.textContent.slice(0, 4e3);
159
- let prompt;
160
- if (promptTemplate) {
161
- prompt = applyPlaceholders(promptTemplate, data, content);
162
- if (!promptTemplate.includes("{{content}}")) prompt += `\n\nContext:\n${content}`;
163
- } else prompt = `Extract the following information from this content:
164
-
165
- Title: ${data.title}
166
- URL: ${data.url}
167
-
168
- Content:
169
- ${content}
170
-
171
- Extract these fields:
172
- ${Object.entries(schema).map(([key, type]) => `- ${key} (${type})`).join("\n")}`;
173
- return provider.completeJSON(prompt, zodSchema);
174
- }
175
- /**
176
- * Generate a summary of the content
177
- */
178
- async function summarize(context, provider) {
179
- const prompt = `Summarize the following content in 2-3 concise sentences:
180
-
181
- ${context}`;
182
- return (await provider.completeJSON(prompt, SummarySchema)).summary;
183
- }
184
- /**
185
- * Extract relevant tags/keywords
186
- */
187
- async function extractTags(context, provider) {
188
- const prompt = `Extract 5-10 relevant tags or keywords from the following content. Focus on technologies, concepts, and topics mentioned:
189
-
190
- ${context}`;
191
- return (await provider.completeJSON(prompt, TagsSchema)).tags;
192
- }
193
- /**
194
- * Extract named entities from content
195
- */
196
- async function extractEntities(context, provider) {
197
- const prompt = `Extract named entities from the following content. Identify people, organizations, technologies, locations, and key concepts:
198
-
199
- ${context}`;
200
- return provider.completeJSON(prompt, EntitiesSchema);
201
- }
202
- /**
203
- * Classify content type using LLM
204
- */
205
- async function classify(context, provider) {
206
- const prompt = `Classify the following content into one of these categories:
207
- - article: Blog post, news article, essay
208
- - repo: Code repository, open source project
209
- - docs: Documentation, API reference, guides
210
- - package: npm/pip package page
211
- - video: Video content, YouTube
212
- - tool: Software tool, web application
213
- - product: Commercial product, e-commerce
214
-
215
- ${context}`;
216
- return provider.completeJSON(prompt, ClassifySchema);
217
- }
218
-
219
- //#endregion
220
- Object.defineProperty(exports, 'ClassifySchema', {
221
- enumerable: true,
222
- get: function () {
223
- return ClassifySchema;
224
- }
225
- });
226
- Object.defineProperty(exports, 'EntitiesSchema', {
227
- enumerable: true,
228
- get: function () {
229
- return EntitiesSchema;
230
- }
231
- });
232
- Object.defineProperty(exports, 'ScrapeError', {
233
- enumerable: true,
234
- get: function () {
235
- return ScrapeError;
236
- }
237
- });
238
- Object.defineProperty(exports, 'SummarySchema', {
239
- enumerable: true,
240
- get: function () {
241
- return SummarySchema;
242
- }
243
- });
244
- Object.defineProperty(exports, 'TagsSchema', {
245
- enumerable: true,
246
- get: function () {
247
- return TagsSchema;
248
- }
249
- });
250
- Object.defineProperty(exports, 'ask', {
251
- enumerable: true,
252
- get: function () {
253
- return ask;
254
- }
255
- });
256
- Object.defineProperty(exports, 'enhance', {
257
- enumerable: true,
258
- get: function () {
259
- return enhance;
260
- }
261
- });
262
- Object.defineProperty(exports, 'extract', {
263
- enumerable: true,
264
- get: function () {
265
- return extract;
266
- }
267
- });
268
- //# sourceMappingURL=enhancer-oM4BhYYS.cjs.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"enhancer-oM4BhYYS.cjs","names":["z","results: Partial<ScrapedData>","promises: Promise<void>[]","zodShape: Record<string, z.ZodTypeAny>","zodType: z.ZodTypeAny","z","prompt: string"],"sources":["../src/core/errors.ts","../src/llm/types.ts","../src/llm/enhancer.ts"],"sourcesContent":["/**\n * Error codes for scraping failures\n */\nexport type ScrapeErrorCode =\n | 'FETCH_FAILED'\n | 'TIMEOUT'\n | 'INVALID_URL'\n | 'BLOCKED'\n | 'NOT_FOUND'\n | 'ROBOTS_BLOCKED'\n | 'PARSE_ERROR'\n | 'LLM_ERROR'\n | 'VALIDATION_ERROR';\n\n/**\n * Custom error class for scraping failures with structured error codes\n */\nexport class ScrapeError extends Error {\n public readonly code: ScrapeErrorCode;\n public readonly statusCode?: number;\n\n constructor(message: string, code: ScrapeErrorCode, statusCode?: number, cause?: Error) {\n super(message, { cause });\n this.name = 'ScrapeError';\n this.code = code;\n this.statusCode = statusCode;\n\n // Maintains proper stack trace in V8 environments\n if (Error.captureStackTrace) {\n Error.captureStackTrace(this, ScrapeError);\n }\n }\n\n /**\n * Create a ScrapeError from an unknown error\n */\n static from(error: unknown, code: ScrapeErrorCode = 'FETCH_FAILED'): ScrapeError {\n if (error instanceof ScrapeError) {\n return error;\n }\n\n if (error instanceof Error) {\n return new ScrapeError(error.message, code, undefined, error);\n }\n\n return new ScrapeError(String(error), code);\n }\n\n /**\n * Check if error is retryable (network issues, timeouts)\n */\n isRetryable(): boolean {\n return this.code === 'FETCH_FAILED' || this.code === 'TIMEOUT';\n }\n\n /**\n * Convert to a plain object for serialization\n */\n toJSON(): Record<string, unknown> {\n return {\n name: this.name,\n message: this.message,\n code: this.code,\n statusCode: this.statusCode,\n stack: this.stack,\n };\n }\n}\n","import { z } from 'zod';\n\n/**\n * LLM completion options\n */\nexport interface CompletionOptions {\n maxTokens?: number;\n temperature?: number;\n systemPrompt?: string;\n}\n\n/**\n * LLM Provider interface - implemented by all providers\n */\nexport interface LLMProvider {\n readonly name: string;\n\n /**\n * Generate a text completion\n */\n complete(prompt: string, options?: CompletionOptions): Promise<string>;\n\n /**\n * Generate a structured JSON completion with Zod validation\n */\n completeJSON<T>(prompt: string, schema: z.ZodType<T>, options?: CompletionOptions): Promise<T>;\n}\n\n/**\n * Provider configuration for Anthropic\n */\nexport interface AnthropicConfig {\n apiKey?: string; // Falls back to ANTHROPIC_API_KEY env var\n model?: string; // Default: claude-3-haiku-20240307\n baseUrl?: string;\n}\n\n/**\n * Provider configuration for OpenAI-compatible APIs\n * Works with: OpenAI, Ollama, LM Studio, LocalAI, vLLM, etc.\n */\nexport interface OpenAICompatibleConfig {\n apiKey?: string; // Falls back to OPENAI_API_KEY env var\n model?: string; // Default: gpt-4o-mini\n baseUrl?: string; // Default: https://api.openai.com/v1\n}\n\n/**\n * Enhancement result types\n */\nexport interface SummaryResult {\n summary: string;\n}\n\nexport interface TagsResult {\n tags: string[];\n}\n\nexport interface EntitiesResult {\n people: string[];\n organizations: string[];\n technologies: string[];\n locations: string[];\n concepts: string[];\n}\n\nexport interface ClassifyResult {\n contentType: string;\n confidence: number;\n}\n\n/**\n * Zod schemas for LLM outputs\n */\nexport const SummarySchema = z.object({\n summary: z.string().describe('A concise 2-3 sentence summary of the content'),\n});\n\nexport const TagsSchema = z.object({\n tags: z.array(z.string()).describe('5-10 relevant tags/keywords'),\n});\n\nexport const EntitiesSchema = z.object({\n people: z.array(z.string()).describe('People mentioned'),\n organizations: z.array(z.string()).describe('Organizations/companies'),\n technologies: z.array(z.string()).describe('Technologies/tools/frameworks'),\n locations: z.array(z.string()).describe('Locations/places'),\n concepts: z.array(z.string()).describe('Key concepts/topics'),\n});\n\nexport const ClassifySchema = z.object({\n contentType: z\n .enum(['article', 'repo', 'docs', 'package', 'video', 'tool', 'product', 'unknown'])\n .describe('The type of content'),\n confidence: z.number().min(0).max(1).describe('Confidence score 0-1'),\n});\n","import { z } from 'zod';\nimport type {\n EnhancementType,\n ExtractedEntities,\n ExtractionSchema,\n ScrapedData,\n} from '@/core/types.js';\nimport type { LLMProvider } from './types.js';\nimport { ClassifySchema, EntitiesSchema, SummarySchema, TagsSchema } from './types.js';\n\n/**\n * Enhance scraped data with LLM-powered features\n */\nexport async function enhance(\n data: ScrapedData,\n provider: LLMProvider,\n types: EnhancementType[]\n): Promise<Partial<ScrapedData>> {\n const results: Partial<ScrapedData> = {};\n\n // Prepare content for LLM (use excerpt/textContent to save tokens)\n const content = data.excerpt || data.textContent.slice(0, 10000);\n const context = `Title: ${data.title}\\nURL: ${data.url}\\n\\nContent:\\n${content}`;\n\n // Run enhancements in parallel\n const promises: Promise<void>[] = [];\n\n if (types.includes('summarize')) {\n promises.push(\n summarize(context, provider).then((summary) => {\n results.summary = summary;\n })\n );\n }\n\n if (types.includes('tags')) {\n promises.push(\n extractTags(context, provider).then((tags) => {\n results.suggestedTags = tags;\n })\n );\n }\n\n if (types.includes('entities')) {\n promises.push(\n extractEntities(context, provider).then((entities) => {\n results.entities = entities;\n })\n );\n }\n\n if (types.includes('classify')) {\n promises.push(\n classify(context, provider).then((classification) => {\n if (classification.confidence > 0.7) {\n results.contentType = classification.contentType as ScrapedData['contentType'];\n }\n })\n );\n }\n\n await Promise.all(promises);\n\n return results;\n}\n\n/**\n * Options for the ask() function\n */\nexport interface AskOptions {\n /** Key to store the result under in custom field */\n key?: string;\n /** Schema for structured response */\n schema?: ExtractionSchema;\n}\n\n/**\n * Ask a custom question about the scraped content\n * Results are stored in the `custom` field of ScrapedData\n */\nexport async function ask(\n data: ScrapedData,\n provider: LLMProvider,\n prompt: string,\n options?: AskOptions\n): Promise<Partial<ScrapedData>> {\n const key = options?.key || 'response';\n const content = data.excerpt || data.textContent.slice(0, 10000);\n\n // Apply placeholder replacements\n const processedPrompt = applyPlaceholders(prompt, data, content);\n\n if (options?.schema) {\n // Use structured extraction\n const result = await extract(data, provider, options.schema, processedPrompt);\n return { custom: { [key]: result } };\n }\n\n // Simple string response\n const fullPrompt = prompt.includes('{{content}}')\n ? processedPrompt\n : `${processedPrompt}\\n\\nTitle: ${data.title}\\nURL: ${data.url}\\n\\nContent:\\n${content}`;\n\n const response = await provider.complete(fullPrompt);\n return { custom: { [key]: response } };\n}\n\n/**\n * Apply placeholder replacements to a prompt template\n */\nfunction applyPlaceholders(prompt: string, data: ScrapedData, content: string): string {\n const domain = (() => {\n try {\n return new URL(data.url).hostname;\n } catch {\n return '';\n }\n })();\n\n return prompt\n .replace(/\\{\\{title\\}\\}/g, data.title)\n .replace(/\\{\\{url\\}\\}/g, data.url)\n .replace(/\\{\\{content\\}\\}/g, content)\n .replace(/\\{\\{description\\}\\}/g, data.description || '')\n .replace(/\\{\\{excerpt\\}\\}/g, data.excerpt || '')\n .replace(/\\{\\{domain\\}\\}/g, domain);\n}\n\n/**\n * Extract structured data using LLM and a custom schema\n */\nexport async function extract<T>(\n data: ScrapedData,\n provider: LLMProvider,\n schema: ExtractionSchema,\n promptTemplate?: string\n): Promise<T> {\n // Convert simple schema to Zod schema\n const zodShape: Record<string, z.ZodTypeAny> = {};\n\n for (const [key, type] of Object.entries(schema)) {\n const isOptional = type.endsWith('?');\n const baseType = isOptional ? type.slice(0, -1) : type;\n\n let zodType: z.ZodTypeAny;\n switch (baseType) {\n case 'string':\n zodType = z.string();\n break;\n case 'number':\n zodType = z.number();\n break;\n case 'boolean':\n zodType = z.boolean();\n break;\n case 'string[]':\n zodType = z.array(z.string());\n break;\n case 'number[]':\n zodType = z.array(z.number());\n break;\n default:\n zodType = z.string();\n }\n\n zodShape[key] = isOptional ? zodType.optional() : zodType;\n }\n\n const zodSchema = z.object(zodShape) as unknown as z.ZodType<T>;\n\n const content = data.textContent.slice(0, 4000);\n\n let prompt: string;\n\n if (promptTemplate) {\n // Apply all placeholder replacements\n prompt = applyPlaceholders(promptTemplate, data, content);\n\n // If content wasn't included via placeholder, append it\n if (!promptTemplate.includes('{{content}}')) {\n prompt += `\\n\\nContext:\\n${content}`;\n }\n } else {\n prompt = `Extract the following information from this content:\n\nTitle: ${data.title}\nURL: ${data.url}\n\nContent:\n${content}\n\nExtract these fields:\n${Object.entries(schema)\n .map(([key, type]) => `- ${key} (${type})`)\n .join('\\n')}`;\n }\n\n return provider.completeJSON<T>(prompt, zodSchema as z.ZodType<T>);\n}\n\n/**\n * Generate a summary of the content\n */\nasync function summarize(context: string, provider: LLMProvider): Promise<string> {\n const prompt = `Summarize the following content in 2-3 concise sentences:\n\n${context}`;\n\n const result = await provider.completeJSON(prompt, SummarySchema);\n return result.summary;\n}\n\n/**\n * Extract relevant tags/keywords\n */\nasync function extractTags(context: string, provider: LLMProvider): Promise<string[]> {\n const prompt = `Extract 5-10 relevant tags or keywords from the following content. Focus on technologies, concepts, and topics mentioned:\n\n${context}`;\n\n const result = await provider.completeJSON(prompt, TagsSchema);\n return result.tags;\n}\n\n/**\n * Extract named entities from content\n */\nasync function extractEntities(context: string, provider: LLMProvider): Promise<ExtractedEntities> {\n const prompt = `Extract named entities from the following content. Identify people, organizations, technologies, locations, and key concepts:\n\n${context}`;\n\n return provider.completeJSON(prompt, EntitiesSchema);\n}\n\n/**\n * Classify content type using LLM\n */\nasync function classify(\n context: string,\n provider: LLMProvider\n): Promise<{ contentType: string; confidence: number }> {\n const prompt = `Classify the following content into one of these categories:\n- article: Blog post, news article, essay\n- repo: Code repository, open source project\n- docs: Documentation, API reference, guides\n- package: npm/pip package page\n- video: Video content, YouTube\n- tool: Software tool, web application\n- product: Commercial product, e-commerce\n\n${context}`;\n\n return provider.completeJSON(prompt, ClassifySchema);\n}\n"],"mappings":";;;;;;;AAiBA,IAAa,cAAb,MAAa,oBAAoB,MAAM;CACrC,AAAgB;CAChB,AAAgB;CAEhB,YAAY,SAAiB,MAAuB,YAAqB,OAAe;AACtF,QAAM,SAAS,EAAE,OAAO,CAAC;AACzB,OAAK,OAAO;AACZ,OAAK,OAAO;AACZ,OAAK,aAAa;AAGlB,MAAI,MAAM,kBACR,OAAM,kBAAkB,MAAM,YAAY;;;;;CAO9C,OAAO,KAAK,OAAgB,OAAwB,gBAA6B;AAC/E,MAAI,iBAAiB,YACnB,QAAO;AAGT,MAAI,iBAAiB,MACnB,QAAO,IAAI,YAAY,MAAM,SAAS,MAAM,QAAW,MAAM;AAG/D,SAAO,IAAI,YAAY,OAAO,MAAM,EAAE,KAAK;;;;;CAM7C,cAAuB;AACrB,SAAO,KAAK,SAAS,kBAAkB,KAAK,SAAS;;;;;CAMvD,SAAkC;AAChC,SAAO;GACL,MAAM,KAAK;GACX,SAAS,KAAK;GACd,MAAM,KAAK;GACX,YAAY,KAAK;GACjB,OAAO,KAAK;GACb;;;;;;;;;ACSL,MAAa,gBAAgBA,MAAE,OAAO,EACpC,SAASA,MAAE,QAAQ,CAAC,SAAS,gDAAgD,EAC9E,CAAC;AAEF,MAAa,aAAaA,MAAE,OAAO,EACjC,MAAMA,MAAE,MAAMA,MAAE,QAAQ,CAAC,CAAC,SAAS,8BAA8B,EAClE,CAAC;AAEF,MAAa,iBAAiBA,MAAE,OAAO;CACrC,QAAQA,MAAE,MAAMA,MAAE,QAAQ,CAAC,CAAC,SAAS,mBAAmB;CACxD,eAAeA,MAAE,MAAMA,MAAE,QAAQ,CAAC,CAAC,SAAS,0BAA0B;CACtE,cAAcA,MAAE,MAAMA,MAAE,QAAQ,CAAC,CAAC,SAAS,gCAAgC;CAC3E,WAAWA,MAAE,MAAMA,MAAE,QAAQ,CAAC,CAAC,SAAS,mBAAmB;CAC3D,UAAUA,MAAE,MAAMA,MAAE,QAAQ,CAAC,CAAC,SAAS,sBAAsB;CAC9D,CAAC;AAEF,MAAa,iBAAiBA,MAAE,OAAO;CACrC,aAAaA,MACV,KAAK;EAAC;EAAW;EAAQ;EAAQ;EAAW;EAAS;EAAQ;EAAW;EAAU,CAAC,CACnF,SAAS,sBAAsB;CAClC,YAAYA,MAAE,QAAQ,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,SAAS,uBAAuB;CACtE,CAAC;;;;;;;AClFF,eAAsB,QACpB,MACA,UACA,OAC+B;CAC/B,MAAMC,UAAgC,EAAE;CAGxC,MAAM,UAAU,KAAK,WAAW,KAAK,YAAY,MAAM,GAAG,IAAM;CAChE,MAAM,UAAU,UAAU,KAAK,MAAM,SAAS,KAAK,IAAI,gBAAgB;CAGvE,MAAMC,WAA4B,EAAE;AAEpC,KAAI,MAAM,SAAS,YAAY,CAC7B,UAAS,KACP,UAAU,SAAS,SAAS,CAAC,MAAM,YAAY;AAC7C,UAAQ,UAAU;GAClB,CACH;AAGH,KAAI,MAAM,SAAS,OAAO,CACxB,UAAS,KACP,YAAY,SAAS,SAAS,CAAC,MAAM,SAAS;AAC5C,UAAQ,gBAAgB;GACxB,CACH;AAGH,KAAI,MAAM,SAAS,WAAW,CAC5B,UAAS,KACP,gBAAgB,SAAS,SAAS,CAAC,MAAM,aAAa;AACpD,UAAQ,WAAW;GACnB,CACH;AAGH,KAAI,MAAM,SAAS,WAAW,CAC5B,UAAS,KACP,SAAS,SAAS,SAAS,CAAC,MAAM,mBAAmB;AACnD,MAAI,eAAe,aAAa,GAC9B,SAAQ,cAAc,eAAe;GAEvC,CACH;AAGH,OAAM,QAAQ,IAAI,SAAS;AAE3B,QAAO;;;;;;AAiBT,eAAsB,IACpB,MACA,UACA,QACA,SAC+B;CAC/B,MAAM,MAAM,SAAS,OAAO;CAC5B,MAAM,UAAU,KAAK,WAAW,KAAK,YAAY,MAAM,GAAG,IAAM;CAGhE,MAAM,kBAAkB,kBAAkB,QAAQ,MAAM,QAAQ;AAEhE,KAAI,SAAS,QAAQ;EAEnB,MAAM,SAAS,MAAM,QAAQ,MAAM,UAAU,QAAQ,QAAQ,gBAAgB;AAC7E,SAAO,EAAE,QAAQ,GAAG,MAAM,QAAQ,EAAE;;CAItC,MAAM,aAAa,OAAO,SAAS,cAAc,GAC7C,kBACA,GAAG,gBAAgB,aAAa,KAAK,MAAM,SAAS,KAAK,IAAI,gBAAgB;CAEjF,MAAM,WAAW,MAAM,SAAS,SAAS,WAAW;AACpD,QAAO,EAAE,QAAQ,GAAG,MAAM,UAAU,EAAE;;;;;AAMxC,SAAS,kBAAkB,QAAgB,MAAmB,SAAyB;CACrF,MAAM,gBAAgB;AACpB,MAAI;AACF,UAAO,IAAI,IAAI,KAAK,IAAI,CAAC;UACnB;AACN,UAAO;;KAEP;AAEJ,QAAO,OACJ,QAAQ,kBAAkB,KAAK,MAAM,CACrC,QAAQ,gBAAgB,KAAK,IAAI,CACjC,QAAQ,oBAAoB,QAAQ,CACpC,QAAQ,wBAAwB,KAAK,eAAe,GAAG,CACvD,QAAQ,oBAAoB,KAAK,WAAW,GAAG,CAC/C,QAAQ,mBAAmB,OAAO;;;;;AAMvC,eAAsB,QACpB,MACA,UACA,QACA,gBACY;CAEZ,MAAMC,WAAyC,EAAE;AAEjD,MAAK,MAAM,CAAC,KAAK,SAAS,OAAO,QAAQ,OAAO,EAAE;EAChD,MAAM,aAAa,KAAK,SAAS,IAAI;EACrC,MAAM,WAAW,aAAa,KAAK,MAAM,GAAG,GAAG,GAAG;EAElD,IAAIC;AACJ,UAAQ,UAAR;GACE,KAAK;AACH,cAAUC,MAAE,QAAQ;AACpB;GACF,KAAK;AACH,cAAUA,MAAE,QAAQ;AACpB;GACF,KAAK;AACH,cAAUA,MAAE,SAAS;AACrB;GACF,KAAK;AACH,cAAUA,MAAE,MAAMA,MAAE,QAAQ,CAAC;AAC7B;GACF,KAAK;AACH,cAAUA,MAAE,MAAMA,MAAE,QAAQ,CAAC;AAC7B;GACF,QACE,WAAUA,MAAE,QAAQ;;AAGxB,WAAS,OAAO,aAAa,QAAQ,UAAU,GAAG;;CAGpD,MAAM,YAAYA,MAAE,OAAO,SAAS;CAEpC,MAAM,UAAU,KAAK,YAAY,MAAM,GAAG,IAAK;CAE/C,IAAIC;AAEJ,KAAI,gBAAgB;AAElB,WAAS,kBAAkB,gBAAgB,MAAM,QAAQ;AAGzD,MAAI,CAAC,eAAe,SAAS,cAAc,CACzC,WAAU,iBAAiB;OAG7B,UAAS;;SAEJ,KAAK,MAAM;OACb,KAAK,IAAI;;;EAGd,QAAQ;;;EAGR,OAAO,QAAQ,OAAO,CACrB,KAAK,CAAC,KAAK,UAAU,KAAK,IAAI,IAAI,KAAK,GAAG,CAC1C,KAAK,KAAK;AAGX,QAAO,SAAS,aAAgB,QAAQ,UAA0B;;;;;AAMpE,eAAe,UAAU,SAAiB,UAAwC;CAChF,MAAM,SAAS;;EAEf;AAGA,SADe,MAAM,SAAS,aAAa,QAAQ,cAAc,EACnD;;;;;AAMhB,eAAe,YAAY,SAAiB,UAA0C;CACpF,MAAM,SAAS;;EAEf;AAGA,SADe,MAAM,SAAS,aAAa,QAAQ,WAAW,EAChD;;;;;AAMhB,eAAe,gBAAgB,SAAiB,UAAmD;CACjG,MAAM,SAAS;;EAEf;AAEA,QAAO,SAAS,aAAa,QAAQ,eAAe;;;;;AAMtD,eAAe,SACb,SACA,UACsD;CACtD,MAAM,SAAS;;;;;;;;;EASf;AAEA,QAAO,SAAS,aAAa,QAAQ,eAAe"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.cjs","names":["sections: MarkdownSection[]","allLinks: MarkdownLink[]","codeBlocks: CodeBlock[]","frontmatter: Record<string, unknown> | undefined","currentSection: MarkdownSection | null","linkData: MarkdownLink","result: Record<string, unknown>","value: string | boolean | number","links: MarkdownLink[]"],"sources":["../../src/parsers/github.ts","../../src/parsers/markdown.ts"],"sourcesContent":["import type { GitHubMeta, MarkdownLink } from './types.js';\n\n/**\n * GitHub-specific utilities for parsing repositories.\n */\n\n/**\n * Check if a URL is a GitHub repository\n */\nexport function isGitHubRepo(url: string): boolean {\n return /^https?:\\/\\/(www\\.)?github\\.com\\/[^/]+\\/[^/]+\\/?$/.test(url);\n}\n\n/**\n * Extract GitHub repo info from URL\n */\nexport function parseGitHubUrl(url: string): { owner: string; repo: string } | null {\n const match = url.match(/github\\.com\\/([^/]+)\\/([^/]+)/);\n if (!match || !match[1] || !match[2]) return null;\n return {\n owner: match[1],\n repo: match[2].replace(/\\.git$/, ''),\n };\n}\n\n/**\n * Convert a GitHub repo URL to raw content URL\n */\nexport function toRawUrl(url: string, branch = 'main', file = 'README.md'): string {\n const info = parseGitHubUrl(url);\n if (!info) return url;\n return `https://raw.githubusercontent.com/${info.owner}/${info.repo}/${branch}/${file}`;\n}\n\n/**\n * Fetch GitHub API metadata for a repository\n * Note: This is a placeholder - actual implementation would need GitHub API access\n */\nexport async function fetchRepoMeta(\n owner: string,\n repo: string,\n _token?: string\n): Promise<GitHubMeta> {\n // This would make actual API calls in a full implementation\n // For now, return basic info\n return {\n repoOwner: owner,\n repoName: repo,\n };\n}\n\n/**\n * Group links by their category/section\n */\nexport function groupByCategory(links: MarkdownLink[]): Map<string, MarkdownLink[]> {\n const groups = new Map<string, MarkdownLink[]>();\n\n for (const link of links) {\n const category = link.context || 'Uncategorized';\n const existing = groups.get(category) || [];\n existing.push(link);\n groups.set(category, existing);\n }\n\n return groups;\n}\n","import type { Code, Heading, Link, ListItem, Root } from 'mdast';\nimport { fromMarkdown } from 'mdast-util-from-markdown';\nimport { toString as mdastToString } from 'mdast-util-to-string';\nimport { visit } from 'unist-util-visit';\nimport type {\n CodeBlock,\n MarkdownLink,\n MarkdownSection,\n ParsedMarkdown,\n ParserResult,\n SourceParser,\n} from './types.js';\n\n/**\n * Generic Markdown parser.\n * Extracts structure, links, and code blocks from markdown content.\n *\n * @example\n * ```ts\n * const parser = new MarkdownParser();\n * const result = parser.parse(markdownContent);\n * console.log(result.data.sections);\n * console.log(result.data.links);\n * ```\n */\nexport class MarkdownParser implements SourceParser<ParsedMarkdown> {\n readonly name = 'markdown';\n\n canParse(content: string): boolean {\n // Check for common markdown patterns\n return (\n content.includes('# ') ||\n content.includes('## ') ||\n content.includes('- [') ||\n content.includes('* [') ||\n content.includes('```')\n );\n }\n\n parse(content: string): ParserResult<ParsedMarkdown> {\n const tree = fromMarkdown(content);\n const sections: MarkdownSection[] = [];\n const allLinks: MarkdownLink[] = [];\n const codeBlocks: CodeBlock[] = [];\n let frontmatter: Record<string, unknown> | undefined;\n\n // Extract frontmatter if present\n if (content.startsWith('---')) {\n const endIndex = content.indexOf('---', 3);\n if (endIndex !== -1) {\n const frontmatterContent = content.slice(3, endIndex).trim();\n frontmatter = this.parseFrontmatter(frontmatterContent);\n }\n }\n\n // Track current section\n let currentSection: MarkdownSection | null = null;\n\n // Process the AST\n visit(tree, (node) => {\n // Handle headings\n if (node.type === 'heading') {\n const heading = node as Heading;\n const title = mdastToString(heading);\n\n // Finalize previous section\n if (currentSection) {\n sections.push(currentSection);\n }\n\n currentSection = {\n level: heading.depth,\n title,\n content: '',\n links: [],\n };\n }\n\n // Handle links\n if (node.type === 'link') {\n const link = node as Link;\n const text = mdastToString(link);\n const linkData: MarkdownLink = {\n url: link.url,\n text,\n title: link.title ?? undefined,\n context: currentSection?.title,\n };\n\n allLinks.push(linkData);\n if (currentSection) {\n currentSection.links.push(linkData);\n }\n }\n\n // Handle code blocks\n if (node.type === 'code') {\n const code = node as Code;\n codeBlocks.push({\n language: code.lang ?? undefined,\n code: code.value,\n meta: code.meta ?? undefined,\n });\n }\n\n // Accumulate content for current section\n if (currentSection && node.type === 'paragraph') {\n const text = mdastToString(node);\n currentSection.content += (currentSection.content ? '\\n\\n' : '') + text;\n }\n });\n\n // Finalize last section\n if (currentSection) {\n sections.push(currentSection);\n }\n\n // Extract title from first h1 or frontmatter\n const title = (frontmatter?.title as string) ?? sections.find((s) => s.level === 1)?.title;\n\n // Extract description from frontmatter or first paragraph before any heading\n const description = (frontmatter?.description as string) ?? this.extractDescription(tree);\n\n return {\n data: {\n title,\n description,\n sections,\n links: allLinks,\n codeBlocks,\n frontmatter,\n },\n };\n }\n\n private parseFrontmatter(content: string): Record<string, unknown> {\n const result: Record<string, unknown> = {};\n const lines = content.split('\\n');\n\n for (const line of lines) {\n const colonIndex = line.indexOf(':');\n if (colonIndex > 0) {\n const key = line.slice(0, colonIndex).trim();\n let value: string | boolean | number = line.slice(colonIndex + 1).trim();\n\n // Parse simple types\n if (value === 'true') value = true;\n else if (value === 'false') value = false;\n else if (/^-?\\d+(\\.\\d+)?$/.test(value)) value = Number(value);\n else if (value.startsWith('\"') && value.endsWith('\"')) value = value.slice(1, -1);\n else if (value.startsWith(\"'\") && value.endsWith(\"'\")) value = value.slice(1, -1);\n\n result[key] = value;\n }\n }\n\n return result;\n }\n\n private extractDescription(tree: Root): string | undefined {\n // Find first paragraph before any heading\n for (const node of tree.children) {\n if (node.type === 'heading') break;\n if (node.type === 'paragraph') {\n return mdastToString(node);\n }\n }\n return undefined;\n }\n}\n\n/**\n * Extract links from a list-based markdown structure (like awesome lists)\n */\nexport function extractListLinks(markdown: string): MarkdownLink[] {\n const tree = fromMarkdown(markdown);\n const links: MarkdownLink[] = [];\n let currentHeading = '';\n\n visit(tree, (node) => {\n if (node.type === 'heading') {\n currentHeading = mdastToString(node as Heading);\n }\n\n if (node.type === 'listItem') {\n const listItem = node as ListItem;\n\n // Find links in this list item\n visit(listItem, 'link', (linkNode: Link) => {\n links.push({\n url: linkNode.url,\n text: mdastToString(linkNode),\n title: linkNode.title ?? undefined,\n context: currentHeading || undefined,\n });\n });\n }\n });\n\n return links;\n}\n\n/**\n * Parse markdown into sections by heading level\n */\nexport function parseByHeadings(markdown: string, minLevel = 2): MarkdownSection[] {\n const parser = new MarkdownParser();\n const result = parser.parse(markdown);\n return result.data.sections.filter((s) => s.level >= minLevel);\n}\n"],"mappings":";;;;;;;;;;;;AASA,SAAgB,aAAa,KAAsB;AACjD,QAAO,oDAAoD,KAAK,IAAI;;;;;AAMtE,SAAgB,eAAe,KAAqD;CAClF,MAAM,QAAQ,IAAI,MAAM,gCAAgC;AACxD,KAAI,CAAC,SAAS,CAAC,MAAM,MAAM,CAAC,MAAM,GAAI,QAAO;AAC7C,QAAO;EACL,OAAO,MAAM;EACb,MAAM,MAAM,GAAG,QAAQ,UAAU,GAAG;EACrC;;;;;AAMH,SAAgB,SAAS,KAAa,SAAS,QAAQ,OAAO,aAAqB;CACjF,MAAM,OAAO,eAAe,IAAI;AAChC,KAAI,CAAC,KAAM,QAAO;AAClB,QAAO,qCAAqC,KAAK,MAAM,GAAG,KAAK,KAAK,GAAG,OAAO,GAAG;;;;;;AAOnF,eAAsB,cACpB,OACA,MACA,QACqB;AAGrB,QAAO;EACL,WAAW;EACX,UAAU;EACX;;;;;AAMH,SAAgB,gBAAgB,OAAoD;CAClF,MAAM,yBAAS,IAAI,KAA6B;AAEhD,MAAK,MAAM,QAAQ,OAAO;EACxB,MAAM,WAAW,KAAK,WAAW;EACjC,MAAM,WAAW,OAAO,IAAI,SAAS,IAAI,EAAE;AAC3C,WAAS,KAAK,KAAK;AACnB,SAAO,IAAI,UAAU,SAAS;;AAGhC,QAAO;;;;;;;;;;;;;;;;;ACvCT,IAAa,iBAAb,MAAoE;CAClE,AAAS,OAAO;CAEhB,SAAS,SAA0B;AAEjC,SACE,QAAQ,SAAS,KAAK,IACtB,QAAQ,SAAS,MAAM,IACvB,QAAQ,SAAS,MAAM,IACvB,QAAQ,SAAS,MAAM,IACvB,QAAQ,SAAS,MAAM;;CAI3B,MAAM,SAA+C;EACnD,MAAM,kDAAoB,QAAQ;EAClC,MAAMA,WAA8B,EAAE;EACtC,MAAMC,WAA2B,EAAE;EACnC,MAAMC,aAA0B,EAAE;EAClC,IAAIC;AAGJ,MAAI,QAAQ,WAAW,MAAM,EAAE;GAC7B,MAAM,WAAW,QAAQ,QAAQ,OAAO,EAAE;AAC1C,OAAI,aAAa,IAAI;IACnB,MAAM,qBAAqB,QAAQ,MAAM,GAAG,SAAS,CAAC,MAAM;AAC5D,kBAAc,KAAK,iBAAiB,mBAAmB;;;EAK3D,IAAIC,iBAAyC;AAG7C,8BAAM,OAAO,SAAS;AAEpB,OAAI,KAAK,SAAS,WAAW;IAC3B,MAAM,UAAU;IAChB,MAAM,2CAAsB,QAAQ;AAGpC,QAAI,eACF,UAAS,KAAK,eAAe;AAG/B,qBAAiB;KACf,OAAO,QAAQ;KACf;KACA,SAAS;KACT,OAAO,EAAE;KACV;;AAIH,OAAI,KAAK,SAAS,QAAQ;IACxB,MAAM,OAAO;IACb,MAAM,0CAAqB,KAAK;IAChC,MAAMC,WAAyB;KAC7B,KAAK,KAAK;KACV;KACA,OAAO,KAAK,SAAS;KACrB,SAAS,gBAAgB;KAC1B;AAED,aAAS,KAAK,SAAS;AACvB,QAAI,eACF,gBAAe,MAAM,KAAK,SAAS;;AAKvC,OAAI,KAAK,SAAS,QAAQ;IACxB,MAAM,OAAO;AACb,eAAW,KAAK;KACd,UAAU,KAAK,QAAQ;KACvB,MAAM,KAAK;KACX,MAAM,KAAK,QAAQ;KACpB,CAAC;;AAIJ,OAAI,kBAAkB,KAAK,SAAS,aAAa;IAC/C,MAAM,0CAAqB,KAAK;AAChC,mBAAe,YAAY,eAAe,UAAU,SAAS,MAAM;;IAErE;AAGF,MAAI,eACF,UAAS,KAAK,eAAe;AAS/B,SAAO,EACL,MAAM;GACJ,OAPW,aAAa,SAAoB,SAAS,MAAM,MAAM,EAAE,UAAU,EAAE,EAAE;GAQjF,aALiB,aAAa,eAA0B,KAAK,mBAAmB,KAAK;GAMrF;GACA,OAAO;GACP;GACA;GACD,EACF;;CAGH,AAAQ,iBAAiB,SAA0C;EACjE,MAAMC,SAAkC,EAAE;EAC1C,MAAM,QAAQ,QAAQ,MAAM,KAAK;AAEjC,OAAK,MAAM,QAAQ,OAAO;GACxB,MAAM,aAAa,KAAK,QAAQ,IAAI;AACpC,OAAI,aAAa,GAAG;IAClB,MAAM,MAAM,KAAK,MAAM,GAAG,WAAW,CAAC,MAAM;IAC5C,IAAIC,QAAmC,KAAK,MAAM,aAAa,EAAE,CAAC,MAAM;AAGxE,QAAI,UAAU,OAAQ,SAAQ;aACrB,UAAU,QAAS,SAAQ;aAC3B,kBAAkB,KAAK,MAAM,CAAE,SAAQ,OAAO,MAAM;aACpD,MAAM,WAAW,KAAI,IAAI,MAAM,SAAS,KAAI,CAAE,SAAQ,MAAM,MAAM,GAAG,GAAG;aACxE,MAAM,WAAW,IAAI,IAAI,MAAM,SAAS,IAAI,CAAE,SAAQ,MAAM,MAAM,GAAG,GAAG;AAEjF,WAAO,OAAO;;;AAIlB,SAAO;;CAGT,AAAQ,mBAAmB,MAAgC;AAEzD,OAAK,MAAM,QAAQ,KAAK,UAAU;AAChC,OAAI,KAAK,SAAS,UAAW;AAC7B,OAAI,KAAK,SAAS,YAChB,2CAAqB,KAAK;;;;;;;AAUlC,SAAgB,iBAAiB,UAAkC;CACjE,MAAM,kDAAoB,SAAS;CACnC,MAAMC,QAAwB,EAAE;CAChC,IAAI,iBAAiB;AAErB,6BAAM,OAAO,SAAS;AACpB,MAAI,KAAK,SAAS,UAChB,qDAA+B,KAAgB;AAGjD,MAAI,KAAK,SAAS,WAIhB,6BAHiB,MAGD,SAAS,aAAmB;AAC1C,SAAM,KAAK;IACT,KAAK,SAAS;IACd,yCAAoB,SAAS;IAC7B,OAAO,SAAS,SAAS;IACzB,SAAS,kBAAkB;IAC5B,CAAC;IACF;GAEJ;AAEF,QAAO;;;;;AAMT,SAAgB,gBAAgB,UAAkB,WAAW,GAAsB;AAGjF,QAFe,IAAI,gBAAgB,CACb,MAAM,SAAS,CACvB,KAAK,SAAS,QAAQ,MAAM,EAAE,SAAS,SAAS"}
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.d.cts","names":[],"sources":["../../src/parsers/types.ts","../../src/parsers/github.ts","../../src/parsers/markdown.ts"],"sourcesContent":[],"mappings":";;AAOA;;;;;AAiBA;AAQiB,UAzBA,YAyBY,CAAA,KAAA,EAAA,QAAA,OAAA,CAAA,CAAA;EAUZ,SAAA,IAAA,EAAA,MAAe;EAUf;;;EAKH,QAAA,CAAA,OAAA,EAAA,MAAA,EAAA,GAAA,CAAA,EAAA,MAAA,CAAA,EAAA,OAAA;EACE;;AAMhB;EASiB,KAAA,CAAA,OAAU,EAAA,MAAA,EAAA,GAAA,CAAA,EAAA,MAAA,CAAA,EAvDa,YAuDb,CAvD0B,KAuD1B,EAvDiC,KAuDjC,CAAA;;;;AChE3B;AAOgB,UDQC,YCRa,CAAA,KAAA,EAAA,QAAA,OAAA,CAAA,CAAA;EAYd,IAAA,EDHR,KCGgB;EAUF,IAAA,CAAA,EDZb,KCYa;AAgBtB;;;;AAA2D,UDtB1C,YAAA,CCsB0C;;;;EC7B9C,OAAA,CAAA,EAAA,MAAA;;;;;AAAsC,UFiBlC,eAAA,CEjBkC;EAqJnC,KAAA,EAAA,MAAA;EA+BA,KAAA,EAAA,MAAA;;SF/JP;;;;;UAMQ,cAAA;;;YAGL;SACH;cACK;gBACE;;;;;UAMC,SAAA;;;;;;;;UASA,UAAA;;;;;;;;AAlEjB;;;;;AAiBA;AAQiB,iBCvBD,YAAA,CDuBa,GAAA,EAAA,MAAA,CAAA,EAAA,OAAA;AAU7B;AAUA;;AAIS,iBCxCO,cAAA,CDwCP,GAAA,EAAA,MAAA,CAAA,EAAA;EACK,KAAA,EAAA,MAAA;EACE,IAAA,EAAA,MAAA;CAAM,GAAA,IAAA;AAMtB;AASA;;iBC7CgB,QAAA;;AAnBhB;AAOA;AAYA;AAUsB,iBAAA,aAAA,CAIX,KAAA,EAAR,MAAA,EAAO,IAAA,EAAA,MAAA,EAAA,MAAA,CAAA,EAAA,MAAA,CAAA,EAAP,OAAO,CAAC,UAAD,CAAA;AAYV;;;AAAwD,iBAAxC,eAAA,CAAwC,KAAA,EAAjB,YAAiB,EAAA,CAAA,EAAA,GAAA,CAAA,MAAA,EAAY,YAAZ,EAAA,CAAA;;;AD/CxD;;;;;AAiBA;AAQA;AAUA;AAUA;;;;AAMgB,cEjCH,cAAA,YAA0B,YFiCvB,CEjCoC,cFiCpC,CAAA,CAAA;EAAM,SAAA,IAAA,GAAA,UAAA;EAML,QAAA,CAAA,OAAS,EAAA,MAAA,CAAA,EAAA,OAAA;EAST,KAAA,CAAA,OAAU,EAAA,MAAA,CAAA,EElCD,YFkCC,CElCY,cFkCZ,CAAA;;;;AChE3B;AAOA;AAYA;AAUsB,iBCwIN,gBAAA,CDpIL,QAAR,EAAA,MAAO,CAAA,ECoI0C,YDpI1C,EAAA;AAYV;;;AAAwD,iBCuJxC,eAAA,CDvJwC,QAAA,EAAA,MAAA,EAAA,QAAA,CAAA,EAAA,MAAA,CAAA,ECuJS,eDvJT,EAAA"}