@mz1999/defuddle 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +371 -0
  3. package/dist/cli.d.ts +2 -0
  4. package/dist/cli.js +145 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/constants.d.ts +24 -0
  7. package/dist/constants.js +950 -0
  8. package/dist/constants.js.map +1 -0
  9. package/dist/defuddle.d.ts +136 -0
  10. package/dist/defuddle.js +1816 -0
  11. package/dist/defuddle.js.map +1 -0
  12. package/dist/elements/callouts.d.ts +6 -0
  13. package/dist/elements/callouts.js +74 -0
  14. package/dist/elements/callouts.js.map +1 -0
  15. package/dist/elements/code.d.ts +5 -0
  16. package/dist/elements/code.js +346 -0
  17. package/dist/elements/code.js.map +1 -0
  18. package/dist/elements/footnotes.d.ts +5 -0
  19. package/dist/elements/footnotes.js +619 -0
  20. package/dist/elements/footnotes.js.map +1 -0
  21. package/dist/elements/headings.d.ts +11 -0
  22. package/dist/elements/headings.js +100 -0
  23. package/dist/elements/headings.js.map +1 -0
  24. package/dist/elements/images.d.ts +8 -0
  25. package/dist/elements/images.js +877 -0
  26. package/dist/elements/images.js.map +1 -0
  27. package/dist/elements/math.base.d.ts +9 -0
  28. package/dist/elements/math.base.js +195 -0
  29. package/dist/elements/math.base.js.map +1 -0
  30. package/dist/elements/math.core.d.ts +7 -0
  31. package/dist/elements/math.core.js +52 -0
  32. package/dist/elements/math.core.js.map +1 -0
  33. package/dist/elements/math.d.ts +2 -0
  34. package/dist/elements/math.full.d.ts +8 -0
  35. package/dist/elements/math.js +7 -0
  36. package/dist/elements/math.js.map +1 -0
  37. package/dist/extractor-registry.d.ts +16 -0
  38. package/dist/extractor-registry.js +140 -0
  39. package/dist/extractor-registry.js.map +1 -0
  40. package/dist/extractors/_base.d.ts +22 -0
  41. package/dist/extractors/_base.js +27 -0
  42. package/dist/extractors/_base.js.map +1 -0
  43. package/dist/extractors/_conversation.d.ts +9 -0
  44. package/dist/extractors/_conversation.js +78 -0
  45. package/dist/extractors/_conversation.js.map +1 -0
  46. package/dist/extractors/chatgpt.d.ts +14 -0
  47. package/dist/extractors/chatgpt.js +138 -0
  48. package/dist/extractors/chatgpt.js.map +1 -0
  49. package/dist/extractors/claude.d.ts +10 -0
  50. package/dist/extractors/claude.js +91 -0
  51. package/dist/extractors/claude.js.map +1 -0
  52. package/dist/extractors/gemini.d.ts +14 -0
  53. package/dist/extractors/gemini.js +111 -0
  54. package/dist/extractors/gemini.js.map +1 -0
  55. package/dist/extractors/github.d.ts +20 -0
  56. package/dist/extractors/github.js +251 -0
  57. package/dist/extractors/github.js.map +1 -0
  58. package/dist/extractors/grok.d.ts +15 -0
  59. package/dist/extractors/grok.js +142 -0
  60. package/dist/extractors/grok.js.map +1 -0
  61. package/dist/extractors/hackernews.d.ts +21 -0
  62. package/dist/extractors/hackernews.js +155 -0
  63. package/dist/extractors/hackernews.js.map +1 -0
  64. package/dist/extractors/reddit.d.ts +22 -0
  65. package/dist/extractors/reddit.js +197 -0
  66. package/dist/extractors/reddit.js.map +1 -0
  67. package/dist/extractors/twitter.d.ts +16 -0
  68. package/dist/extractors/twitter.js +204 -0
  69. package/dist/extractors/twitter.js.map +1 -0
  70. package/dist/extractors/x-article.d.ts +24 -0
  71. package/dist/extractors/x-article.js +267 -0
  72. package/dist/extractors/x-article.js.map +1 -0
  73. package/dist/extractors/x-oembed.d.ts +20 -0
  74. package/dist/extractors/x-oembed.js +350 -0
  75. package/dist/extractors/x-oembed.js.map +1 -0
  76. package/dist/extractors/youtube.d.ts +87 -0
  77. package/dist/extractors/youtube.js +869 -0
  78. package/dist/extractors/youtube.js.map +1 -0
  79. package/dist/fetch.d.ts +18 -0
  80. package/dist/fetch.js +265 -0
  81. package/dist/fetch.js.map +1 -0
  82. package/dist/index.d.ts +3 -0
  83. package/dist/index.full.d.ts +12 -0
  84. package/dist/index.full.js +1 -0
  85. package/dist/index.js +1 -0
  86. package/dist/index.js.map +1 -0
  87. package/dist/markdown.d.ts +30 -0
  88. package/dist/markdown.js +661 -0
  89. package/dist/markdown.js.map +1 -0
  90. package/dist/metadata.d.ts +25 -0
  91. package/dist/metadata.js +426 -0
  92. package/dist/metadata.js.map +1 -0
  93. package/dist/node.d.ts +19 -0
  94. package/dist/node.js +78 -0
  95. package/dist/node.js.map +1 -0
  96. package/dist/scoring.d.ts +31 -0
  97. package/dist/scoring.js +472 -0
  98. package/dist/scoring.js.map +1 -0
  99. package/dist/standardize.d.ts +2 -0
  100. package/dist/standardize.js +1101 -0
  101. package/dist/standardize.js.map +1 -0
  102. package/dist/types/extractors.d.ts +41 -0
  103. package/dist/types/extractors.js +3 -0
  104. package/dist/types/extractors.js.map +1 -0
  105. package/dist/types.d.ts +135 -0
  106. package/dist/types.js +3 -0
  107. package/dist/types.js.map +1 -0
  108. package/dist/utils/comments.d.ts +44 -0
  109. package/dist/utils/comments.js +103 -0
  110. package/dist/utils/comments.js.map +1 -0
  111. package/dist/utils/dom.d.ts +42 -0
  112. package/dist/utils/dom.js +104 -0
  113. package/dist/utils/dom.js.map +1 -0
  114. package/dist/utils/linkedom-compat.d.ts +5 -0
  115. package/dist/utils/linkedom-compat.js +23 -0
  116. package/dist/utils/linkedom-compat.js.map +1 -0
  117. package/dist/utils/transcript.d.ts +37 -0
  118. package/dist/utils/transcript.js +61 -0
  119. package/dist/utils/transcript.js.map +1 -0
  120. package/dist/utils.d.ts +13 -0
  121. package/dist/utils.js +98 -0
  122. package/dist/utils.js.map +1 -0
  123. package/package.json +107 -0
@@ -0,0 +1,61 @@
1
+ "use strict";
2
+ /**
3
+ * Standardized transcript HTML and text construction.
4
+ *
5
+ * Used by YouTube (and potentially other video/audio extractors)
6
+ * to produce consistent transcript markup.
7
+ */
8
+ Object.defineProperty(exports, "__esModule", { value: true });
9
+ exports.formatTimestamp = formatTimestamp;
10
+ exports.buildTranscript = buildTranscript;
11
+ const dom_1 = require("./dom");
12
+ /**
13
+ * Format seconds as a human-readable timestamp (M:SS or H:MM:SS).
14
+ */
15
+ function formatTimestamp(seconds) {
16
+ const h = Math.floor(seconds / 3600);
17
+ const m = Math.floor((seconds % 3600) / 60);
18
+ const s = Math.floor(seconds % 60);
19
+ if (h > 0) {
20
+ return `${h}:${String(m).padStart(2, '0')}:${String(s).padStart(2, '0')}`;
21
+ }
22
+ return `${m}:${String(s).padStart(2, '0')}`;
23
+ }
24
+ /**
25
+ * Build transcript HTML and text from segments and optional chapters.
26
+ *
27
+ * @param site - Site identifier for wrapper class (e.g. "youtube")
28
+ * @param segments - Grouped transcript segments with timestamps and speaker info
29
+ * @param chapters - Optional chapter headings with start times
30
+ */
31
+ function buildTranscript(site, segments, chapters = []) {
32
+ const sortedChapters = [...chapters].sort((a, b) => a.start - b.start);
33
+ let chapterIdx = 0;
34
+ const htmlParts = [];
35
+ const textParts = [];
36
+ for (const segment of segments) {
37
+ // Insert chapter headings before this segment
38
+ while (chapterIdx < sortedChapters.length && sortedChapters[chapterIdx].start <= segment.start) {
39
+ const title = sortedChapters[chapterIdx].title;
40
+ htmlParts.push(`<h3>${(0, dom_1.escapeHtml)(title)}</h3>`);
41
+ if (textParts.length > 0)
42
+ textParts.push('');
43
+ textParts.push(`### ${title}`);
44
+ textParts.push('');
45
+ chapterIdx++;
46
+ }
47
+ const timestamp = formatTimestamp(segment.start);
48
+ const speakerClass = segment.speaker !== undefined ? ` speaker-${segment.speaker}` : '';
49
+ const tsHtml = `<strong><span class="timestamp" data-timestamp="${segment.start}">${timestamp}</span></strong>`;
50
+ htmlParts.push(`<p class="transcript-segment${speakerClass}">${tsHtml} · ${(0, dom_1.escapeHtml)(segment.text)}</p>`);
51
+ if (segment.speakerChange && textParts.length > 0) {
52
+ textParts.push('');
53
+ }
54
+ textParts.push(`**${timestamp}** · ${segment.text}`);
55
+ }
56
+ return {
57
+ html: `<div class="${site} transcript">\n<h2>Transcript</h2>\n${htmlParts.join('\n')}\n</div>`,
58
+ text: textParts.join('\n'),
59
+ };
60
+ }
61
+ //# sourceMappingURL=transcript.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"transcript.js","sourceRoot":"","sources":["../../src/utils/transcript.ts"],"names":[],"mappings":";AAAA;;;;;GAKG;;AA6BH,0CASC;AASD,0CAqCC;AAlFD,+BAAmC;AAwBnC;;GAEG;AACH,SAAgB,eAAe,CAAC,OAAe;IAC9C,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;IACrC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;IAC5C,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC;IAEnC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QACX,OAAO,GAAG,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;IAC3E,CAAC;IACD,OAAO,GAAG,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;AAC7C,CAAC;AAED;;;;;;GAMG;AACH,SAAgB,eAAe,CAC9B,IAAY,EACZ,QAA6B,EAC7B,WAAgC,EAAE;IAElC,MAAM,cAAc,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACvE,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAChC,8CAA8C;QAC9C,OAAO,UAAU,GAAG,cAAc,CAAC,MAAM,IAAI,cAAc,CAAC,UAAU,CAAC,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YAChG,MAAM,KAAK,GAAG,cAAc,CAAC,UAAU,CAAC,CAAC,KAAK,CAAC;YAC/C,SAAS,CAAC,IAAI,CAAC,OAAO,IAAA,gBAAU,EAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAChD,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC;gBAAE,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YAC7C,SAAS,CAAC,IAAI,CAAC,OAAO,KAAK,EAAE,CAAC,CAAC;YAC/B,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACnB,UAAU,EAAE,CAAC;QACd,CAAC;QAED,MAAM,SAAS,GAAG,eAAe,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC;QACjD,MAAM,YAAY,GAAG,OAAO,CAAC,OAAO,KAAK,SAAS,CAAC,CAAC,CAAC,YAAY,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QACxF,MAAM,MAAM,GAAG,mDAAmD,OAAO,CAAC,KAAK,KAAK,SAAS,kBAAkB,CAAC;QAChH,SAAS,CAAC,IAAI,CAAC,+BAA+B,YAAY,KAAK,MAAM,MAAM,IAAA,gBAAU,EAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QAE3G,IAAI,OAAO,CAAC,aAAa,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnD,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACpB,CAAC;QACD,SAAS,CAAC,IAAI,CAAC,KAAK,SAAS,QAAQ,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;IACtD,CAAC;IAED,OAAO;QACN,IAAI,EAAE,eAAe,IAAI,uCAAuC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU;QAC9F,IAAI,EAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC;KAC1B,CAAC;AACH,CAAC"}
@@ -0,0 +1,13 @@
1
+ export declare function isElement(node: Node): node is Element;
2
+ export declare function isTextNode(node: Node): node is Text;
3
+ export declare function isCommentNode(node: Node): node is Comment;
4
+ export declare function getComputedStyle(element: Element): CSSStyleDeclaration | null;
5
+ export declare function getWindow(doc: Document): Window | null;
6
+ export declare function textPreview(el: Element): string;
7
+ export declare function logDebug(debug: boolean, message: string, ...args: any[]): void;
8
+ /**
9
+ * Count words in text, handling CJK characters (Chinese, Japanese, Korean).
10
+ * CJK characters are counted individually since they don't use spaces between words.
11
+ * Non-CJK text is counted by splitting on whitespace.
12
+ */
13
+ export declare function countWords(text: string): number;
package/dist/utils.js ADDED
@@ -0,0 +1,98 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.isElement = isElement;
4
+ exports.isTextNode = isTextNode;
5
+ exports.isCommentNode = isCommentNode;
6
+ exports.getComputedStyle = getComputedStyle;
7
+ exports.getWindow = getWindow;
8
+ exports.textPreview = textPreview;
9
+ exports.logDebug = logDebug;
10
+ exports.countWords = countWords;
11
+ const NODE_TYPE = {
12
+ ELEMENT_NODE: 1,
13
+ ATTRIBUTE_NODE: 2,
14
+ TEXT_NODE: 3,
15
+ CDATA_SECTION_NODE: 4,
16
+ ENTITY_REFERENCE_NODE: 5,
17
+ ENTITY_NODE: 6,
18
+ PROCESSING_INSTRUCTION_NODE: 7,
19
+ COMMENT_NODE: 8,
20
+ DOCUMENT_NODE: 9,
21
+ DOCUMENT_TYPE_NODE: 10,
22
+ DOCUMENT_FRAGMENT_NODE: 11,
23
+ NOTATION_NODE: 12
24
+ };
25
+ function isElement(node) {
26
+ return node.nodeType === NODE_TYPE.ELEMENT_NODE;
27
+ }
28
+ function isTextNode(node) {
29
+ return node.nodeType === NODE_TYPE.TEXT_NODE;
30
+ }
31
+ function isCommentNode(node) {
32
+ return node.nodeType === NODE_TYPE.COMMENT_NODE;
33
+ }
34
+ function getComputedStyle(element) {
35
+ const win = getWindow(element.ownerDocument);
36
+ if (!win)
37
+ return null;
38
+ return win.getComputedStyle(element);
39
+ }
40
+ function getWindow(doc) {
41
+ // First try defaultView
42
+ if (doc.defaultView) {
43
+ return doc.defaultView;
44
+ }
45
+ // Then try ownerWindow
46
+ if (doc.ownerWindow) {
47
+ return doc.ownerWindow;
48
+ }
49
+ // Finally try to get window from document
50
+ if (doc.window) {
51
+ return doc.window;
52
+ }
53
+ return null;
54
+ }
55
+ function textPreview(el) {
56
+ return (el.textContent || '').trim().substring(0, 200);
57
+ }
58
+ function logDebug(debug, message, ...args) {
59
+ if (debug) {
60
+ console.log('Defuddle:', message, ...args);
61
+ }
62
+ }
63
+ /**
64
+ * Count words in text, handling CJK characters (Chinese, Japanese, Korean).
65
+ * CJK characters are counted individually since they don't use spaces between words.
66
+ * Non-CJK text is counted by splitting on whitespace.
67
+ */
68
+ function countWords(text) {
69
+ if (!text)
70
+ return 0;
71
+ let cjkCount = 0;
72
+ let wordCount = 0;
73
+ let inWord = false;
74
+ for (let i = 0; i < text.length; i++) {
75
+ const code = text.charCodeAt(i);
76
+ // Check for CJK character ranges (BMP only — Extension B+ are
77
+ // surrogate pairs and would need codePointAt, rare in practice)
78
+ if ((code >= 0x3040 && code <= 0x309f) || // Hiragana
79
+ (code >= 0x30a0 && code <= 0x30ff) || // Katakana
80
+ (code >= 0x3400 && code <= 0x4dbf) || // CJK Extension A
81
+ (code >= 0x4e00 && code <= 0x9fff) || // CJK Unified Ideographs
82
+ (code >= 0xf900 && code <= 0xfaff) || // CJK Compatibility Ideographs
83
+ (code >= 0xac00 && code <= 0xd7af) // Korean Hangul
84
+ ) {
85
+ cjkCount++;
86
+ inWord = false;
87
+ }
88
+ else if (code <= 32) {
89
+ inWord = false;
90
+ }
91
+ else if (!inWord) {
92
+ wordCount++;
93
+ inWord = true;
94
+ }
95
+ }
96
+ return cjkCount + wordCount;
97
+ }
98
+ //# sourceMappingURL=utils.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"utils.js","sourceRoot":"","sources":["../src/utils.ts"],"names":[],"mappings":";;AAeA,8BAEC;AAED,gCAEC;AAED,sCAEC;AAED,4CAIC;AAED,8BAiBC;AAED,kCAEC;AAED,4BAIC;AAOD,gCA+BC;AAlGD,MAAM,SAAS,GAAG;IACjB,YAAY,EAAE,CAAC;IACf,cAAc,EAAE,CAAC;IACjB,SAAS,EAAE,CAAC;IACZ,kBAAkB,EAAE,CAAC;IACrB,qBAAqB,EAAE,CAAC;IACxB,WAAW,EAAE,CAAC;IACd,2BAA2B,EAAE,CAAC;IAC9B,YAAY,EAAE,CAAC;IACf,aAAa,EAAE,CAAC;IAChB,kBAAkB,EAAE,EAAE;IACtB,sBAAsB,EAAE,EAAE;IAC1B,aAAa,EAAE,EAAE;CACjB,CAAC;AAEF,SAAgB,SAAS,CAAC,IAAU;IACnC,OAAO,IAAI,CAAC,QAAQ,KAAK,SAAS,CAAC,YAAY,CAAC;AACjD,CAAC;AAED,SAAgB,UAAU,CAAC,IAAU;IACpC,OAAO,IAAI,CAAC,QAAQ,KAAK,SAAS,CAAC,SAAS,CAAC;AAC9C,CAAC;AAED,SAAgB,aAAa,CAAC,IAAU;IACvC,OAAO,IAAI,CAAC,QAAQ,KAAK,SAAS,CAAC,YAAY,CAAC;AACjD,CAAC;AAED,SAAgB,gBAAgB,CAAC,OAAgB;IAChD,MAAM,GAAG,GAAG,SAAS,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;IAC7C,IAAI,CAAC,GAAG;QAAE,OAAO,IAAI,CAAC;IACtB,OAAO,GAAG,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;AACtC,CAAC;AAED,SAAgB,SAAS,CAAC,GAAa;IACtC,wBAAwB;IACxB,IAAI,GAAG,CAAC,WAAW,EAAE,CAAC;QACrB,OAAO,GAAG,CAAC,WAAW,CAAC;IACxB,CAAC;IAED,uBAAuB;IACvB,IAAK,GAAW,CAAC,WAAW,EAAE,CAAC;QAC9B,OAAQ,GAAW,CAAC,WAAW,CAAC;IACjC,CAAC;IAED,0CAA0C;IAC1C,IAAK,GAAW,CAAC,MAAM,EAAE,CAAC;QACzB,OAAQ,GAAW,CAAC,MAAM,CAAC;IAC5B,CAAC;IAED,OAAO,IAAI,CAAC;AACb,CAAC;AAED,SAAgB,WAAW,CAAC,EAAW;IACtC,OAAO,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AACxD,CAAC;AAED,SAAgB,QAAQ,CAAC,KAAc,EAAE,OAAe,EAAE,GAAG,IAAW;IACvE,IAAI,KAAK,EAAE,CAAC;QACX,OAAO,CAAC,GAAG,CAAC,WAAW,EAAE,OAAO,EAAE,GAAG,IAAI,CAAC,CAAC;IAC5C,CAAC;AACF,CAAC;AAED;;;;GAIG;AACH,SAAgB,UAAU,CAAC,IAAY;IACtC,IAAI,CAAC,IAAI;QAAE,OAAO,CAAC,CAAC;IAEpB,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,MAAM,GAAG,KAAK,CAAC;IAEnB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;QAEhC,8DAA8D;QAC9D,gEAAgE;QAChE,IACC,CAAC,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,IAAI,WAAW;YACjD,CAAC,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,IAAI,WAAW;YACjD,CAAC,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,IAAI,kBAAkB;YACxD,CAAC,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,IAAI,yBAAyB;YAC/D,CAAC,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,IAAI,+BAA+B;YACrE,CAAC,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAC,CAAI,gBAAgB;UACrD,CAAC;YACF,QAAQ,EAAE,CAAC;YACX,MAAM,GAAG,KAAK,CAAC;QAChB,CAAC;aAAM,IAAI,IAAI,IAAI,EAAE,EAAE,CAAC;YACvB,MAAM,GAAG,KAAK,CAAC;QAChB,CAAC;aAAM,IAAI,CAAC,MAAM,EAAE,CAAC;YACpB,SAAS,EAAE,CAAC;YACZ,MAAM,GAAG,IAAI,CAAC;QACf,CAAC;IACF,CAAC;IAED,OAAO,QAAQ,GAAG,SAAS,CAAC;AAC7B,CAAC"}
package/package.json ADDED
@@ -0,0 +1,107 @@
1
+ {
2
+ "name": "@mz1999/defuddle",
3
+ "version": "0.14.1",
4
+ "description": "Extract article content and metadata from web pages (with HTTP proxy support).",
5
+ "publishConfig": {
6
+ "access": "public"
7
+ },
8
+ "main": "dist/index.js",
9
+ "bin": {
10
+ "defuddle": "dist/cli.js"
11
+ },
12
+ "module": "dist/index.js",
13
+ "types": "dist/index.d.ts",
14
+ "typesVersions": {
15
+ "*": {
16
+ "*": [
17
+ "dist/index.d.ts"
18
+ ],
19
+ "full": [
20
+ "dist/index.full.d.ts"
21
+ ],
22
+ "node": [
23
+ "dist/node.d.ts"
24
+ ]
25
+ }
26
+ },
27
+ "exports": {
28
+ ".": {
29
+ "types": "./dist/index.d.ts",
30
+ "import": "./dist/index.js",
31
+ "require": "./dist/index.js"
32
+ },
33
+ "./full": {
34
+ "types": "./dist/index.full.d.ts",
35
+ "import": "./dist/index.full.js",
36
+ "require": "./dist/index.full.js"
37
+ },
38
+ "./node": {
39
+ "types": "./dist/node.d.ts",
40
+ "import": "./dist/node.js"
41
+ }
42
+ },
43
+ "scripts": {
44
+ "clean": "rm -rf dist",
45
+ "build:types": "tsc --project tsconfig.declarations.json",
46
+ "build:js": "webpack",
47
+ "build:node": "tsc --project tsconfig.node.json",
48
+ "build": "npm run clean && npm run build:types && npm run build:node && npm run build:js",
49
+ "prepublishOnly": "npm run build",
50
+ "dev:types": "tsc --project tsconfig.declarations.json --watch",
51
+ "dev:js": "webpack --watch --mode development",
52
+ "dev:node": "tsc --project tsconfig.node.json --watch",
53
+ "dev": "npm run clean && npm run build:types && concurrently \"npm run dev:types\" \"npm run dev:node\" \"npm run dev:js\"",
54
+ "test": "TZ=UTC vitest run",
55
+ "test:jsdom": "TZ=UTC DOM=jsdom vitest run",
56
+ "playground": "node playground/server.js"
57
+ },
58
+ "keywords": [
59
+ "readability",
60
+ "content-extraction",
61
+ "article-extraction",
62
+ "web-scraping",
63
+ "html-cleanup",
64
+ "content-parser",
65
+ "article-parser",
66
+ "dom"
67
+ ],
68
+ "author": "kepano (original), mazhen (proxy fork)",
69
+ "license": "MIT",
70
+ "repository": {
71
+ "type": "git",
72
+ "url": "git+https://github.com/mazhen/defuddle.git"
73
+ },
74
+ "bugs": {
75
+ "url": "https://github.com/mazhen/defuddle/issues"
76
+ },
77
+ "homepage": "https://github.com/mazhen/defuddle",
78
+ "dependencies": {
79
+ "commander": "^12.1.0",
80
+ "undici": "^6.0.0"
81
+ },
82
+ "optionalDependencies": {
83
+ "linkedom": "^0.18.12",
84
+ "mathml-to-latex": "^1.5.0",
85
+ "temml": "^0.13.1",
86
+ "turndown": "^7.2.0"
87
+ },
88
+ "devDependencies": {
89
+ "@types/jsdom": "^21.1.6",
90
+ "@types/node": "^20.19.0",
91
+ "@types/turndown": "^5.0.5",
92
+ "concurrently": "^8.2.2",
93
+ "jsdom": "^24.0.0",
94
+ "terser-webpack-plugin": "^5.3.14",
95
+ "ts-loader": "^9.5.1",
96
+ "typescript": "^5.3.3",
97
+ "undici-types": "^6.0.0",
98
+ "vitest": "^3.2.4",
99
+ "webpack": "^5.90.3",
100
+ "webpack-cli": "^5.1.4"
101
+ },
102
+ "files": [
103
+ "dist",
104
+ "README.md",
105
+ "LICENSE"
106
+ ]
107
+ }