scrapex 1.0.0-alpha.1 → 1.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +164 -5
  2. package/dist/enhancer-ByjRD-t5.mjs +769 -0
  3. package/dist/enhancer-ByjRD-t5.mjs.map +1 -0
  4. package/dist/enhancer-j0xqKDJm.cjs +847 -0
  5. package/dist/enhancer-j0xqKDJm.cjs.map +1 -0
  6. package/dist/index-CDgcRnig.d.cts +268 -0
  7. package/dist/index-CDgcRnig.d.cts.map +1 -0
  8. package/dist/index-piS5wtki.d.mts +268 -0
  9. package/dist/index-piS5wtki.d.mts.map +1 -0
  10. package/dist/index.cjs +1192 -37
  11. package/dist/index.cjs.map +1 -1
  12. package/dist/index.d.cts +318 -2
  13. package/dist/index.d.cts.map +1 -1
  14. package/dist/index.d.mts +318 -2
  15. package/dist/index.d.mts.map +1 -1
  16. package/dist/index.mjs +1164 -6
  17. package/dist/index.mjs.map +1 -1
  18. package/dist/llm/index.cjs +250 -232
  19. package/dist/llm/index.cjs.map +1 -1
  20. package/dist/llm/index.d.cts +132 -85
  21. package/dist/llm/index.d.cts.map +1 -1
  22. package/dist/llm/index.d.mts +132 -85
  23. package/dist/llm/index.d.mts.map +1 -1
  24. package/dist/llm/index.mjs +243 -236
  25. package/dist/llm/index.mjs.map +1 -1
  26. package/dist/parsers/index.cjs +10 -199
  27. package/dist/parsers/index.d.cts +2 -133
  28. package/dist/parsers/index.d.mts +2 -133
  29. package/dist/parsers/index.mjs +2 -191
  30. package/dist/parsers-Bneuws8x.cjs +569 -0
  31. package/dist/parsers-Bneuws8x.cjs.map +1 -0
  32. package/dist/parsers-CwkYnyWY.mjs +482 -0
  33. package/dist/parsers-CwkYnyWY.mjs.map +1 -0
  34. package/dist/types-CadAXrme.d.mts +674 -0
  35. package/dist/types-CadAXrme.d.mts.map +1 -0
  36. package/dist/types-DPEtPihB.d.cts +674 -0
  37. package/dist/types-DPEtPihB.d.cts.map +1 -0
  38. package/package.json +15 -16
  39. package/dist/enhancer-Q6CSc1gA.mjs +0 -220
  40. package/dist/enhancer-Q6CSc1gA.mjs.map +0 -1
  41. package/dist/enhancer-oM4BhYYS.cjs +0 -268
  42. package/dist/enhancer-oM4BhYYS.cjs.map +0 -1
  43. package/dist/parsers/index.cjs.map +0 -1
  44. package/dist/parsers/index.d.cts.map +0 -1
  45. package/dist/parsers/index.d.mts.map +0 -1
  46. package/dist/parsers/index.mjs.map +0 -1
  47. package/dist/types-CNQZVW36.d.mts +0 -150
  48. package/dist/types-CNQZVW36.d.mts.map +0 -1
  49. package/dist/types-D0HYR95H.d.cts +0 -150
  50. package/dist/types-D0HYR95H.d.cts.map +0 -1
@@ -1,200 +1,11 @@
1
- const require_index = require('../index.cjs');
2
- let mdast_util_from_markdown = require("mdast-util-from-markdown");
3
- let mdast_util_to_string = require("mdast-util-to-string");
4
- let unist_util_visit = require("unist-util-visit");
1
+ const require_parsers = require('../parsers-Bneuws8x.cjs');
5
2
 
6
- //#region src/parsers/github.ts
7
- /**
8
- * GitHub-specific utilities for parsing repositories.
9
- */
10
- /**
11
- * Check if a URL is a GitHub repository
12
- */
13
- function isGitHubRepo(url) {
14
- return /^https?:\/\/(www\.)?github\.com\/[^/]+\/[^/]+\/?$/.test(url);
15
- }
16
- /**
17
- * Extract GitHub repo info from URL
18
- */
19
- function parseGitHubUrl(url) {
20
- const match = url.match(/github\.com\/([^/]+)\/([^/]+)/);
21
- if (!match || !match[1] || !match[2]) return null;
22
- return {
23
- owner: match[1],
24
- repo: match[2].replace(/\.git$/, "")
25
- };
26
- }
27
- /**
28
- * Convert a GitHub repo URL to raw content URL
29
- */
30
- function toRawUrl(url, branch = "main", file = "README.md") {
31
- const info = parseGitHubUrl(url);
32
- if (!info) return url;
33
- return `https://raw.githubusercontent.com/${info.owner}/${info.repo}/${branch}/${file}`;
34
- }
35
- /**
36
- * Fetch GitHub API metadata for a repository
37
- * Note: This is a placeholder - actual implementation would need GitHub API access
38
- */
39
- async function fetchRepoMeta(owner, repo, _token) {
40
- return {
41
- repoOwner: owner,
42
- repoName: repo
43
- };
44
- }
45
- /**
46
- * Group links by their category/section
47
- */
48
- function groupByCategory(links) {
49
- const groups = /* @__PURE__ */ new Map();
50
- for (const link of links) {
51
- const category = link.context || "Uncategorized";
52
- const existing = groups.get(category) || [];
53
- existing.push(link);
54
- groups.set(category, existing);
55
- }
56
- return groups;
57
- }
58
-
59
- //#endregion
60
- //#region src/parsers/markdown.ts
61
- /**
62
- * Generic Markdown parser.
63
- * Extracts structure, links, and code blocks from markdown content.
64
- *
65
- * @example
66
- * ```ts
67
- * const parser = new MarkdownParser();
68
- * const result = parser.parse(markdownContent);
69
- * console.log(result.data.sections);
70
- * console.log(result.data.links);
71
- * ```
72
- */
73
- var MarkdownParser = class {
74
- name = "markdown";
75
- canParse(content) {
76
- return content.includes("# ") || content.includes("## ") || content.includes("- [") || content.includes("* [") || content.includes("```");
77
- }
78
- parse(content) {
79
- const tree = (0, mdast_util_from_markdown.fromMarkdown)(content);
80
- const sections = [];
81
- const allLinks = [];
82
- const codeBlocks = [];
83
- let frontmatter;
84
- if (content.startsWith("---")) {
85
- const endIndex = content.indexOf("---", 3);
86
- if (endIndex !== -1) {
87
- const frontmatterContent = content.slice(3, endIndex).trim();
88
- frontmatter = this.parseFrontmatter(frontmatterContent);
89
- }
90
- }
91
- let currentSection = null;
92
- (0, unist_util_visit.visit)(tree, (node) => {
93
- if (node.type === "heading") {
94
- const heading = node;
95
- const title = (0, mdast_util_to_string.toString)(heading);
96
- if (currentSection) sections.push(currentSection);
97
- currentSection = {
98
- level: heading.depth,
99
- title,
100
- content: "",
101
- links: []
102
- };
103
- }
104
- if (node.type === "link") {
105
- const link = node;
106
- const text = (0, mdast_util_to_string.toString)(link);
107
- const linkData = {
108
- url: link.url,
109
- text,
110
- title: link.title ?? void 0,
111
- context: currentSection?.title
112
- };
113
- allLinks.push(linkData);
114
- if (currentSection) currentSection.links.push(linkData);
115
- }
116
- if (node.type === "code") {
117
- const code = node;
118
- codeBlocks.push({
119
- language: code.lang ?? void 0,
120
- code: code.value,
121
- meta: code.meta ?? void 0
122
- });
123
- }
124
- if (currentSection && node.type === "paragraph") {
125
- const text = (0, mdast_util_to_string.toString)(node);
126
- currentSection.content += (currentSection.content ? "\n\n" : "") + text;
127
- }
128
- });
129
- if (currentSection) sections.push(currentSection);
130
- return { data: {
131
- title: frontmatter?.title ?? sections.find((s) => s.level === 1)?.title,
132
- description: frontmatter?.description ?? this.extractDescription(tree),
133
- sections,
134
- links: allLinks,
135
- codeBlocks,
136
- frontmatter
137
- } };
138
- }
139
- parseFrontmatter(content) {
140
- const result = {};
141
- const lines = content.split("\n");
142
- for (const line of lines) {
143
- const colonIndex = line.indexOf(":");
144
- if (colonIndex > 0) {
145
- const key = line.slice(0, colonIndex).trim();
146
- let value = line.slice(colonIndex + 1).trim();
147
- if (value === "true") value = true;
148
- else if (value === "false") value = false;
149
- else if (/^-?\d+(\.\d+)?$/.test(value)) value = Number(value);
150
- else if (value.startsWith("\"") && value.endsWith("\"")) value = value.slice(1, -1);
151
- else if (value.startsWith("'") && value.endsWith("'")) value = value.slice(1, -1);
152
- result[key] = value;
153
- }
154
- }
155
- return result;
156
- }
157
- extractDescription(tree) {
158
- for (const node of tree.children) {
159
- if (node.type === "heading") break;
160
- if (node.type === "paragraph") return (0, mdast_util_to_string.toString)(node);
161
- }
162
- }
163
- };
164
- /**
165
- * Extract links from a list-based markdown structure (like awesome lists)
166
- */
167
- function extractListLinks(markdown) {
168
- const tree = (0, mdast_util_from_markdown.fromMarkdown)(markdown);
169
- const links = [];
170
- let currentHeading = "";
171
- (0, unist_util_visit.visit)(tree, (node) => {
172
- if (node.type === "heading") currentHeading = (0, mdast_util_to_string.toString)(node);
173
- if (node.type === "listItem") (0, unist_util_visit.visit)(node, "link", (linkNode) => {
174
- links.push({
175
- url: linkNode.url,
176
- text: (0, mdast_util_to_string.toString)(linkNode),
177
- title: linkNode.title ?? void 0,
178
- context: currentHeading || void 0
179
- });
180
- });
181
- });
182
- return links;
183
- }
184
- /**
185
- * Parse markdown into sections by heading level
186
- */
187
- function parseByHeadings(markdown, minLevel = 2) {
188
- return new MarkdownParser().parse(markdown).data.sections.filter((s) => s.level >= minLevel);
189
- }
190
-
191
- //#endregion
192
- exports.MarkdownParser = MarkdownParser;
193
- exports.extractListLinks = extractListLinks;
194
- exports.fetchRepoMeta = fetchRepoMeta;
195
- exports.groupByCategory = groupByCategory;
196
- exports.isGitHubRepo = isGitHubRepo;
197
- exports.parseByHeadings = parseByHeadings;
198
- exports.parseGitHubUrl = parseGitHubUrl;
199
- exports.toRawUrl = toRawUrl;
200
- //# sourceMappingURL=index.cjs.map
3
+ exports.MarkdownParser = require_parsers.MarkdownParser;
4
+ exports.RSSParser = require_parsers.RSSParser;
5
+ exports.extractListLinks = require_parsers.extractListLinks;
6
+ exports.fetchRepoMeta = require_parsers.fetchRepoMeta;
7
+ exports.groupByCategory = require_parsers.groupByCategory;
8
+ exports.isGitHubRepo = require_parsers.isGitHubRepo;
9
+ exports.parseByHeadings = require_parsers.parseByHeadings;
10
+ exports.parseGitHubUrl = require_parsers.parseGitHubUrl;
11
+ exports.toRawUrl = require_parsers.toRawUrl;
@@ -1,133 +1,2 @@
1
- //#region src/parsers/types.d.ts
2
- /**
3
- * Generic source parser interface.
4
- * Parsers transform raw content into structured data with metadata.
5
- *
6
- * @template TData - The main data type (e.g., array of links)
7
- * @template TMeta - Optional metadata type
8
- */
9
- interface SourceParser<TData, TMeta = unknown> {
10
- readonly name: string;
11
- /**
12
- * Check if this parser can handle the given content
13
- */
14
- canParse(content: string, url?: string): boolean;
15
- /**
16
- * Parse the content and extract structured data
17
- */
18
- parse(content: string, url?: string): ParserResult<TData, TMeta>;
19
- }
20
- /**
21
- * Result from a parser
22
- */
23
- interface ParserResult<TData, TMeta = unknown> {
24
- data: TData;
25
- meta?: TMeta;
26
- }
27
- /**
28
- * Markdown link extracted from content
29
- */
30
- interface MarkdownLink {
31
- url: string;
32
- text: string;
33
- title?: string;
34
- context?: string;
35
- }
36
- /**
37
- * Markdown section (heading + content)
38
- */
39
- interface MarkdownSection {
40
- level: number;
41
- title: string;
42
- content: string;
43
- links: MarkdownLink[];
44
- }
45
- /**
46
- * Parsed markdown structure
47
- */
48
- interface ParsedMarkdown {
49
- title?: string;
50
- description?: string;
51
- sections: MarkdownSection[];
52
- links: MarkdownLink[];
53
- codeBlocks: CodeBlock[];
54
- frontmatter?: Record<string, unknown>;
55
- }
56
- /**
57
- * Code block from markdown
58
- */
59
- interface CodeBlock {
60
- language?: string;
61
- code: string;
62
- meta?: string;
63
- }
64
- /**
65
- * GitHub repository metadata
66
- */
67
- interface GitHubMeta {
68
- repoOwner?: string;
69
- repoName?: string;
70
- stars?: number;
71
- lastUpdated?: string;
72
- }
73
- //#endregion
74
- //#region src/parsers/github.d.ts
75
- /**
76
- * GitHub-specific utilities for parsing repositories.
77
- */
78
- /**
79
- * Check if a URL is a GitHub repository
80
- */
81
- declare function isGitHubRepo(url: string): boolean;
82
- /**
83
- * Extract GitHub repo info from URL
84
- */
85
- declare function parseGitHubUrl(url: string): {
86
- owner: string;
87
- repo: string;
88
- } | null;
89
- /**
90
- * Convert a GitHub repo URL to raw content URL
91
- */
92
- declare function toRawUrl(url: string, branch?: string, file?: string): string;
93
- /**
94
- * Fetch GitHub API metadata for a repository
95
- * Note: This is a placeholder - actual implementation would need GitHub API access
96
- */
97
- declare function fetchRepoMeta(owner: string, repo: string, _token?: string): Promise<GitHubMeta>;
98
- /**
99
- * Group links by their category/section
100
- */
101
- declare function groupByCategory(links: MarkdownLink[]): Map<string, MarkdownLink[]>;
102
- //#endregion
103
- //#region src/parsers/markdown.d.ts
104
- /**
105
- * Generic Markdown parser.
106
- * Extracts structure, links, and code blocks from markdown content.
107
- *
108
- * @example
109
- * ```ts
110
- * const parser = new MarkdownParser();
111
- * const result = parser.parse(markdownContent);
112
- * console.log(result.data.sections);
113
- * console.log(result.data.links);
114
- * ```
115
- */
116
- declare class MarkdownParser implements SourceParser<ParsedMarkdown> {
117
- readonly name = "markdown";
118
- canParse(content: string): boolean;
119
- parse(content: string): ParserResult<ParsedMarkdown>;
120
- private parseFrontmatter;
121
- private extractDescription;
122
- }
123
- /**
124
- * Extract links from a list-based markdown structure (like awesome lists)
125
- */
126
- declare function extractListLinks(markdown: string): MarkdownLink[];
127
- /**
128
- * Parse markdown into sections by heading level
129
- */
130
- declare function parseByHeadings(markdown: string, minLevel?: number): MarkdownSection[];
131
- //#endregion
132
- export { type CodeBlock, type GitHubMeta, type MarkdownLink, MarkdownParser, type MarkdownSection, type ParsedMarkdown, type ParserResult, type SourceParser, extractListLinks, fetchRepoMeta, groupByCategory, isGitHubRepo, parseByHeadings, parseGitHubUrl, toRawUrl };
133
- //# sourceMappingURL=index.d.cts.map
1
+ import { _ as MarkdownSection, a as parseByHeadings, b as ParserResult, c as isGitHubRepo, d as CodeBlock, f as FeedEnclosure, g as MarkdownLink, h as GitHubMeta, i as extractListLinks, l as parseGitHubUrl, m as FeedMeta, n as RSSParserOptions, o as fetchRepoMeta, p as FeedItem, r as MarkdownParser, s as groupByCategory, t as RSSParser, u as toRawUrl, v as ParsedFeed, x as SourceParser, y as ParsedMarkdown } from "../index-CDgcRnig.cjs";
2
+ export { CodeBlock, FeedEnclosure, FeedItem, FeedMeta, GitHubMeta, MarkdownLink, MarkdownParser, MarkdownSection, ParsedFeed, ParsedMarkdown, ParserResult, RSSParser, RSSParserOptions, SourceParser, extractListLinks, fetchRepoMeta, groupByCategory, isGitHubRepo, parseByHeadings, parseGitHubUrl, toRawUrl };
@@ -1,133 +1,2 @@
1
- //#region src/parsers/types.d.ts
2
- /**
3
- * Generic source parser interface.
4
- * Parsers transform raw content into structured data with metadata.
5
- *
6
- * @template TData - The main data type (e.g., array of links)
7
- * @template TMeta - Optional metadata type
8
- */
9
- interface SourceParser<TData, TMeta = unknown> {
10
- readonly name: string;
11
- /**
12
- * Check if this parser can handle the given content
13
- */
14
- canParse(content: string, url?: string): boolean;
15
- /**
16
- * Parse the content and extract structured data
17
- */
18
- parse(content: string, url?: string): ParserResult<TData, TMeta>;
19
- }
20
- /**
21
- * Result from a parser
22
- */
23
- interface ParserResult<TData, TMeta = unknown> {
24
- data: TData;
25
- meta?: TMeta;
26
- }
27
- /**
28
- * Markdown link extracted from content
29
- */
30
- interface MarkdownLink {
31
- url: string;
32
- text: string;
33
- title?: string;
34
- context?: string;
35
- }
36
- /**
37
- * Markdown section (heading + content)
38
- */
39
- interface MarkdownSection {
40
- level: number;
41
- title: string;
42
- content: string;
43
- links: MarkdownLink[];
44
- }
45
- /**
46
- * Parsed markdown structure
47
- */
48
- interface ParsedMarkdown {
49
- title?: string;
50
- description?: string;
51
- sections: MarkdownSection[];
52
- links: MarkdownLink[];
53
- codeBlocks: CodeBlock[];
54
- frontmatter?: Record<string, unknown>;
55
- }
56
- /**
57
- * Code block from markdown
58
- */
59
- interface CodeBlock {
60
- language?: string;
61
- code: string;
62
- meta?: string;
63
- }
64
- /**
65
- * GitHub repository metadata
66
- */
67
- interface GitHubMeta {
68
- repoOwner?: string;
69
- repoName?: string;
70
- stars?: number;
71
- lastUpdated?: string;
72
- }
73
- //#endregion
74
- //#region src/parsers/github.d.ts
75
- /**
76
- * GitHub-specific utilities for parsing repositories.
77
- */
78
- /**
79
- * Check if a URL is a GitHub repository
80
- */
81
- declare function isGitHubRepo(url: string): boolean;
82
- /**
83
- * Extract GitHub repo info from URL
84
- */
85
- declare function parseGitHubUrl(url: string): {
86
- owner: string;
87
- repo: string;
88
- } | null;
89
- /**
90
- * Convert a GitHub repo URL to raw content URL
91
- */
92
- declare function toRawUrl(url: string, branch?: string, file?: string): string;
93
- /**
94
- * Fetch GitHub API metadata for a repository
95
- * Note: This is a placeholder - actual implementation would need GitHub API access
96
- */
97
- declare function fetchRepoMeta(owner: string, repo: string, _token?: string): Promise<GitHubMeta>;
98
- /**
99
- * Group links by their category/section
100
- */
101
- declare function groupByCategory(links: MarkdownLink[]): Map<string, MarkdownLink[]>;
102
- //#endregion
103
- //#region src/parsers/markdown.d.ts
104
- /**
105
- * Generic Markdown parser.
106
- * Extracts structure, links, and code blocks from markdown content.
107
- *
108
- * @example
109
- * ```ts
110
- * const parser = new MarkdownParser();
111
- * const result = parser.parse(markdownContent);
112
- * console.log(result.data.sections);
113
- * console.log(result.data.links);
114
- * ```
115
- */
116
- declare class MarkdownParser implements SourceParser<ParsedMarkdown> {
117
- readonly name = "markdown";
118
- canParse(content: string): boolean;
119
- parse(content: string): ParserResult<ParsedMarkdown>;
120
- private parseFrontmatter;
121
- private extractDescription;
122
- }
123
- /**
124
- * Extract links from a list-based markdown structure (like awesome lists)
125
- */
126
- declare function extractListLinks(markdown: string): MarkdownLink[];
127
- /**
128
- * Parse markdown into sections by heading level
129
- */
130
- declare function parseByHeadings(markdown: string, minLevel?: number): MarkdownSection[];
131
- //#endregion
132
- export { type CodeBlock, type GitHubMeta, type MarkdownLink, MarkdownParser, type MarkdownSection, type ParsedMarkdown, type ParserResult, type SourceParser, extractListLinks, fetchRepoMeta, groupByCategory, isGitHubRepo, parseByHeadings, parseGitHubUrl, toRawUrl };
133
- //# sourceMappingURL=index.d.mts.map
1
+ import { _ as MarkdownSection, a as parseByHeadings, b as ParserResult, c as isGitHubRepo, d as CodeBlock, f as FeedEnclosure, g as MarkdownLink, h as GitHubMeta, i as extractListLinks, l as parseGitHubUrl, m as FeedMeta, n as RSSParserOptions, o as fetchRepoMeta, p as FeedItem, r as MarkdownParser, s as groupByCategory, t as RSSParser, u as toRawUrl, v as ParsedFeed, x as SourceParser, y as ParsedMarkdown } from "../index-piS5wtki.mjs";
2
+ export { CodeBlock, FeedEnclosure, FeedItem, FeedMeta, GitHubMeta, MarkdownLink, MarkdownParser, MarkdownSection, ParsedFeed, ParsedMarkdown, ParserResult, RSSParser, RSSParserOptions, SourceParser, extractListLinks, fetchRepoMeta, groupByCategory, isGitHubRepo, parseByHeadings, parseGitHubUrl, toRawUrl };
@@ -1,192 +1,3 @@
1
- import { fromMarkdown } from "mdast-util-from-markdown";
2
- import { toString } from "mdast-util-to-string";
3
- import { visit } from "unist-util-visit";
1
+ import { a as fetchRepoMeta, c as parseGitHubUrl, i as parseByHeadings, l as toRawUrl, n as MarkdownParser, o as groupByCategory, r as extractListLinks, s as isGitHubRepo, t as RSSParser } from "../parsers-CwkYnyWY.mjs";
4
2
 
5
- //#region src/parsers/github.ts
6
- /**
7
- * GitHub-specific utilities for parsing repositories.
8
- */
9
- /**
10
- * Check if a URL is a GitHub repository
11
- */
12
- function isGitHubRepo(url) {
13
- return /^https?:\/\/(www\.)?github\.com\/[^/]+\/[^/]+\/?$/.test(url);
14
- }
15
- /**
16
- * Extract GitHub repo info from URL
17
- */
18
- function parseGitHubUrl(url) {
19
- const match = url.match(/github\.com\/([^/]+)\/([^/]+)/);
20
- if (!match || !match[1] || !match[2]) return null;
21
- return {
22
- owner: match[1],
23
- repo: match[2].replace(/\.git$/, "")
24
- };
25
- }
26
- /**
27
- * Convert a GitHub repo URL to raw content URL
28
- */
29
- function toRawUrl(url, branch = "main", file = "README.md") {
30
- const info = parseGitHubUrl(url);
31
- if (!info) return url;
32
- return `https://raw.githubusercontent.com/${info.owner}/${info.repo}/${branch}/${file}`;
33
- }
34
- /**
35
- * Fetch GitHub API metadata for a repository
36
- * Note: This is a placeholder - actual implementation would need GitHub API access
37
- */
38
- async function fetchRepoMeta(owner, repo, _token) {
39
- return {
40
- repoOwner: owner,
41
- repoName: repo
42
- };
43
- }
44
- /**
45
- * Group links by their category/section
46
- */
47
- function groupByCategory(links) {
48
- const groups = /* @__PURE__ */ new Map();
49
- for (const link of links) {
50
- const category = link.context || "Uncategorized";
51
- const existing = groups.get(category) || [];
52
- existing.push(link);
53
- groups.set(category, existing);
54
- }
55
- return groups;
56
- }
57
-
58
- //#endregion
59
- //#region src/parsers/markdown.ts
60
- /**
61
- * Generic Markdown parser.
62
- * Extracts structure, links, and code blocks from markdown content.
63
- *
64
- * @example
65
- * ```ts
66
- * const parser = new MarkdownParser();
67
- * const result = parser.parse(markdownContent);
68
- * console.log(result.data.sections);
69
- * console.log(result.data.links);
70
- * ```
71
- */
72
- var MarkdownParser = class {
73
- name = "markdown";
74
- canParse(content) {
75
- return content.includes("# ") || content.includes("## ") || content.includes("- [") || content.includes("* [") || content.includes("```");
76
- }
77
- parse(content) {
78
- const tree = fromMarkdown(content);
79
- const sections = [];
80
- const allLinks = [];
81
- const codeBlocks = [];
82
- let frontmatter;
83
- if (content.startsWith("---")) {
84
- const endIndex = content.indexOf("---", 3);
85
- if (endIndex !== -1) {
86
- const frontmatterContent = content.slice(3, endIndex).trim();
87
- frontmatter = this.parseFrontmatter(frontmatterContent);
88
- }
89
- }
90
- let currentSection = null;
91
- visit(tree, (node) => {
92
- if (node.type === "heading") {
93
- const heading = node;
94
- const title = toString(heading);
95
- if (currentSection) sections.push(currentSection);
96
- currentSection = {
97
- level: heading.depth,
98
- title,
99
- content: "",
100
- links: []
101
- };
102
- }
103
- if (node.type === "link") {
104
- const link = node;
105
- const text = toString(link);
106
- const linkData = {
107
- url: link.url,
108
- text,
109
- title: link.title ?? void 0,
110
- context: currentSection?.title
111
- };
112
- allLinks.push(linkData);
113
- if (currentSection) currentSection.links.push(linkData);
114
- }
115
- if (node.type === "code") {
116
- const code = node;
117
- codeBlocks.push({
118
- language: code.lang ?? void 0,
119
- code: code.value,
120
- meta: code.meta ?? void 0
121
- });
122
- }
123
- if (currentSection && node.type === "paragraph") {
124
- const text = toString(node);
125
- currentSection.content += (currentSection.content ? "\n\n" : "") + text;
126
- }
127
- });
128
- if (currentSection) sections.push(currentSection);
129
- return { data: {
130
- title: frontmatter?.title ?? sections.find((s) => s.level === 1)?.title,
131
- description: frontmatter?.description ?? this.extractDescription(tree),
132
- sections,
133
- links: allLinks,
134
- codeBlocks,
135
- frontmatter
136
- } };
137
- }
138
- parseFrontmatter(content) {
139
- const result = {};
140
- const lines = content.split("\n");
141
- for (const line of lines) {
142
- const colonIndex = line.indexOf(":");
143
- if (colonIndex > 0) {
144
- const key = line.slice(0, colonIndex).trim();
145
- let value = line.slice(colonIndex + 1).trim();
146
- if (value === "true") value = true;
147
- else if (value === "false") value = false;
148
- else if (/^-?\d+(\.\d+)?$/.test(value)) value = Number(value);
149
- else if (value.startsWith("\"") && value.endsWith("\"")) value = value.slice(1, -1);
150
- else if (value.startsWith("'") && value.endsWith("'")) value = value.slice(1, -1);
151
- result[key] = value;
152
- }
153
- }
154
- return result;
155
- }
156
- extractDescription(tree) {
157
- for (const node of tree.children) {
158
- if (node.type === "heading") break;
159
- if (node.type === "paragraph") return toString(node);
160
- }
161
- }
162
- };
163
- /**
164
- * Extract links from a list-based markdown structure (like awesome lists)
165
- */
166
- function extractListLinks(markdown) {
167
- const tree = fromMarkdown(markdown);
168
- const links = [];
169
- let currentHeading = "";
170
- visit(tree, (node) => {
171
- if (node.type === "heading") currentHeading = toString(node);
172
- if (node.type === "listItem") visit(node, "link", (linkNode) => {
173
- links.push({
174
- url: linkNode.url,
175
- text: toString(linkNode),
176
- title: linkNode.title ?? void 0,
177
- context: currentHeading || void 0
178
- });
179
- });
180
- });
181
- return links;
182
- }
183
- /**
184
- * Parse markdown into sections by heading level
185
- */
186
- function parseByHeadings(markdown, minLevel = 2) {
187
- return new MarkdownParser().parse(markdown).data.sections.filter((s) => s.level >= minLevel);
188
- }
189
-
190
- //#endregion
191
- export { MarkdownParser, extractListLinks, fetchRepoMeta, groupByCategory, isGitHubRepo, parseByHeadings, parseGitHubUrl, toRawUrl };
192
- //# sourceMappingURL=index.mjs.map
3
+ export { MarkdownParser, RSSParser, extractListLinks, fetchRepoMeta, groupByCategory, isGitHubRepo, parseByHeadings, parseGitHubUrl, toRawUrl };