research-powerpack-mcp 3.0.0 → 3.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,3 @@
1
- /**
2
- * Simple markdown cleaner service
3
- * Converts HTML to markdown and cleans up scraped content
4
- */
5
1
  export declare class MarkdownCleaner {
6
2
  processContent(htmlContent: string): string;
7
3
  }
@@ -1 +1 @@
1
- {"version":3,"file":"markdown-cleaner.d.ts","sourceRoot":"","sources":["../../src/services/markdown-cleaner.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,qBAAa,eAAe;IAC1B,cAAc,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM;CA+D5C"}
1
+ {"version":3,"file":"markdown-cleaner.d.ts","sourceRoot":"","sources":["../../src/services/markdown-cleaner.ts"],"names":[],"mappings":"AAcA,qBAAa,eAAe;IAC1B,cAAc,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM;CAsB5C"}
@@ -1,52 +1,27 @@
1
1
  /**
2
- * Simple markdown cleaner service
3
- * Converts HTML to markdown and cleans up scraped content
2
+ * Markdown cleaner service using Turndown for HTML to Markdown conversion
4
3
  */
4
+ import TurndownService from 'turndown';
5
+ const turndown = new TurndownService({
6
+ headingStyle: 'atx',
7
+ codeBlockStyle: 'fenced',
8
+ bulletListMarker: '-',
9
+ });
10
+ // Remove script, style, nav, footer, aside elements
11
+ turndown.remove(['script', 'style', 'nav', 'footer', 'aside', 'noscript']);
5
12
  export class MarkdownCleaner {
6
13
  processContent(htmlContent) {
7
14
  if (!htmlContent || typeof htmlContent !== 'string') {
8
15
  return htmlContent;
9
16
  }
10
- // Basic HTML to markdown conversion
11
- let content = htmlContent;
12
- // Remove script and style tags
13
- content = content.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '');
14
- content = content.replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '');
15
- // Remove HTML comments
16
- content = content.replace(/<!--[\s\S]*?-->/g, '');
17
- // Basic tag cleanup (preserve structure if already markdown)
18
- if (!content.includes('<')) {
19
- return content;
17
+ // If already markdown (no HTML tags), return as-is
18
+ if (!htmlContent.includes('<')) {
19
+ return htmlContent.trim();
20
20
  }
21
- // Simple conversions
22
- content = content.replace(/<br\s*\/?>/gi, '\n');
23
- content = content.replace(/<\/p>/gi, '\n\n');
24
- content = content.replace(/<p[^>]*>/gi, '');
25
- content = content.replace(/<\/div>/gi, '\n');
26
- content = content.replace(/<div[^>]*>/gi, '');
27
- // Headers
28
- content = content.replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n');
29
- content = content.replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n');
30
- content = content.replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n');
31
- // Lists
32
- content = content.replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n');
33
- content = content.replace(/<\/ul>/gi, '\n');
34
- content = content.replace(/<ul[^>]*>/gi, '');
35
- // Links
36
- content = content.replace(/<a[^>]*href=["']([^"']*)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)');
37
- // Strong/bold
38
- content = content.replace(/<(strong|b)[^>]*>(.*?)<\/\1>/gi, '**$2**');
39
- // Emphasis/italic
40
- content = content.replace(/<(em|i)[^>]*>(.*?)<\/\1>/gi, '*$2*');
41
- // Remove remaining HTML tags
42
- content = content.replace(/<[^>]+>/g, '');
43
- // Decode HTML entities
44
- content = content.replace(/&nbsp;/g, ' ');
45
- content = content.replace(/&quot;/g, '"');
46
- content = content.replace(/&apos;/g, "'");
47
- content = content.replace(/&lt;/g, '<');
48
- content = content.replace(/&gt;/g, '>');
49
- content = content.replace(/&amp;/g, '&');
21
+ // Remove HTML comments before conversion
22
+ let content = htmlContent.replace(/<!--[\s\S]*?-->/g, '');
23
+ // Convert HTML to Markdown using Turndown
24
+ content = turndown.turndown(content);
50
25
  // Clean up whitespace
51
26
  content = content.replace(/\n{3,}/g, '\n\n');
52
27
  content = content.trim();
@@ -1 +1 @@
1
- {"version":3,"file":"markdown-cleaner.js","sourceRoot":"","sources":["../../src/services/markdown-cleaner.ts"],"names":[],"mappings":"AAAA;;;GAGG;AACH,MAAM,OAAO,eAAe;IAC1B,cAAc,CAAC,WAAmB;QAChC,IAAI,CAAC,WAAW,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC;YACpD,OAAO,WAAW,CAAC;QACrB,CAAC;QAED,oCAAoC;QACpC,IAAI,OAAO,GAAG,WAAW,CAAC;QAE1B,+BAA+B;QAC/B,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,qDAAqD,EAAE,EAAE,CAAC,CAAC;QACrF,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,kDAAkD,EAAE,EAAE,CAAC,CAAC;QAElF,uBAAuB;QACvB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC;QAElD,6DAA6D;QAC7D,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAC3B,OAAO,OAAO,CAAC;QACjB,CAAC;QAED,qBAAqB;QACrB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,cAAc,EAAE,IAAI,CAAC,CAAC;QAChD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAC7C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;QAC5C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC;QAC7C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;QAE9C,UAAU;QACV,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,wBAAwB,EAAE,QAAQ,CAAC,CAAC;QAC9D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,wBAAwB,EAAE,SAAS,CAAC,CAAC;QAC/D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,wBAAwB,EAAE,UAAU,CAAC,CAAC;QAEhE,QAAQ;QACR,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,wBAAwB,EAAE,QAAQ,CAAC,CAAC;QAC9D,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QAC5C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC;QAE7C,QAAQ;QACR,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,gDAAgD,EAAE,UAAU,CAAC,CAAC;QAExF,cAAc;QACd,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,gCAAgC,EAAE,QAAQ,CAAC,CAAC;QAEtE,kBAAkB;QAClB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,4BAA4B,EAAE,MAAM,CAAC,CAAC;QAEhE,6BAA6B;QAC7B,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QAE1C,uBAAuB;QACvB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QAC1C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QAC1C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC,CAAC;QAC1C,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QACxC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;QACxC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAEzC,sBAAsB;QACtB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAC7C,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;QAEzB,OAAO,OAAO,CAAC;IACjB,CAAC;CACF"}
1
+ {"version":3,"file":"markdown-cleaner.js","sourceRoot":"","sources":["../../src/services/markdown-cleaner.ts"],"names":[],"mappings":"AAAA;;GAEG;AACH,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;IACnC,YAAY,EAAE,KAAK;IACnB,cAAc,EAAE,QAAQ;IACxB,gBAAgB,EAAE,GAAG;CACtB,CAAC,CAAC;AAEH,oDAAoD;AACpD,QAAQ,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,CAAC,CAAC,CAAC;AAE3E,MAAM,OAAO,eAAe;IAC1B,cAAc,CAAC,WAAmB;QAChC,IAAI,CAAC,WAAW,IAAI,OAAO,WAAW,KAAK,QAAQ,EAAE,CAAC;YACpD,OAAO,WAAW,CAAC;QACrB,CAAC;QAED,mDAAmD;QACnD,IAAI,CAAC,WAAW,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;YAC/B,OAAO,WAAW,CAAC,IAAI,EAAE,CAAC;QAC5B,CAAC;QAED,yCAAyC;QACzC,IAAI,OAAO,GAAG,WAAW,CAAC,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC;QAE1D,0CAA0C;QAC1C,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAErC,sBAAsB;QACtB,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC;QAC7C,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;QAEzB,OAAO,OAAO,CAAC;IACjB,CAAC;CACF"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "research-powerpack-mcp",
3
- "version": "3.0.0",
3
+ "version": "3.0.2",
4
4
  "description": "The ultimate research MCP toolkit: Reddit mining, web search with CTR aggregation, AI-powered deep research, and intelligent web scraping - all in one modular package",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -42,11 +42,13 @@
42
42
  "dependencies": {
43
43
  "@modelcontextprotocol/sdk": "^1.18.1",
44
44
  "openai": "^4.77.0",
45
+ "turndown": "^7.2.2",
45
46
  "zod": "^3.24.1",
46
47
  "zod-to-json-schema": "^3.24.1"
47
48
  },
48
49
  "devDependencies": {
49
50
  "@types/node": "^22.0.0",
51
+ "@types/turndown": "^5.0.6",
50
52
  "tsx": "^4.19.0",
51
53
  "typescript": "^5.6.0"
52
54
  },