@mz1999/defuddle 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +371 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +145 -0
- package/dist/cli.js.map +1 -0
- package/dist/constants.d.ts +24 -0
- package/dist/constants.js +950 -0
- package/dist/constants.js.map +1 -0
- package/dist/defuddle.d.ts +136 -0
- package/dist/defuddle.js +1816 -0
- package/dist/defuddle.js.map +1 -0
- package/dist/elements/callouts.d.ts +6 -0
- package/dist/elements/callouts.js +74 -0
- package/dist/elements/callouts.js.map +1 -0
- package/dist/elements/code.d.ts +5 -0
- package/dist/elements/code.js +346 -0
- package/dist/elements/code.js.map +1 -0
- package/dist/elements/footnotes.d.ts +5 -0
- package/dist/elements/footnotes.js +619 -0
- package/dist/elements/footnotes.js.map +1 -0
- package/dist/elements/headings.d.ts +11 -0
- package/dist/elements/headings.js +100 -0
- package/dist/elements/headings.js.map +1 -0
- package/dist/elements/images.d.ts +8 -0
- package/dist/elements/images.js +877 -0
- package/dist/elements/images.js.map +1 -0
- package/dist/elements/math.base.d.ts +9 -0
- package/dist/elements/math.base.js +195 -0
- package/dist/elements/math.base.js.map +1 -0
- package/dist/elements/math.core.d.ts +7 -0
- package/dist/elements/math.core.js +52 -0
- package/dist/elements/math.core.js.map +1 -0
- package/dist/elements/math.d.ts +2 -0
- package/dist/elements/math.full.d.ts +8 -0
- package/dist/elements/math.js +7 -0
- package/dist/elements/math.js.map +1 -0
- package/dist/extractor-registry.d.ts +16 -0
- package/dist/extractor-registry.js +140 -0
- package/dist/extractor-registry.js.map +1 -0
- package/dist/extractors/_base.d.ts +22 -0
- package/dist/extractors/_base.js +27 -0
- package/dist/extractors/_base.js.map +1 -0
- package/dist/extractors/_conversation.d.ts +9 -0
- package/dist/extractors/_conversation.js +78 -0
- package/dist/extractors/_conversation.js.map +1 -0
- package/dist/extractors/chatgpt.d.ts +14 -0
- package/dist/extractors/chatgpt.js +138 -0
- package/dist/extractors/chatgpt.js.map +1 -0
- package/dist/extractors/claude.d.ts +10 -0
- package/dist/extractors/claude.js +91 -0
- package/dist/extractors/claude.js.map +1 -0
- package/dist/extractors/gemini.d.ts +14 -0
- package/dist/extractors/gemini.js +111 -0
- package/dist/extractors/gemini.js.map +1 -0
- package/dist/extractors/github.d.ts +20 -0
- package/dist/extractors/github.js +251 -0
- package/dist/extractors/github.js.map +1 -0
- package/dist/extractors/grok.d.ts +15 -0
- package/dist/extractors/grok.js +142 -0
- package/dist/extractors/grok.js.map +1 -0
- package/dist/extractors/hackernews.d.ts +21 -0
- package/dist/extractors/hackernews.js +155 -0
- package/dist/extractors/hackernews.js.map +1 -0
- package/dist/extractors/reddit.d.ts +22 -0
- package/dist/extractors/reddit.js +197 -0
- package/dist/extractors/reddit.js.map +1 -0
- package/dist/extractors/twitter.d.ts +16 -0
- package/dist/extractors/twitter.js +204 -0
- package/dist/extractors/twitter.js.map +1 -0
- package/dist/extractors/x-article.d.ts +24 -0
- package/dist/extractors/x-article.js +267 -0
- package/dist/extractors/x-article.js.map +1 -0
- package/dist/extractors/x-oembed.d.ts +20 -0
- package/dist/extractors/x-oembed.js +350 -0
- package/dist/extractors/x-oembed.js.map +1 -0
- package/dist/extractors/youtube.d.ts +87 -0
- package/dist/extractors/youtube.js +869 -0
- package/dist/extractors/youtube.js.map +1 -0
- package/dist/fetch.d.ts +18 -0
- package/dist/fetch.js +265 -0
- package/dist/fetch.js.map +1 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.full.d.ts +12 -0
- package/dist/index.full.js +1 -0
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -0
- package/dist/markdown.d.ts +30 -0
- package/dist/markdown.js +661 -0
- package/dist/markdown.js.map +1 -0
- package/dist/metadata.d.ts +25 -0
- package/dist/metadata.js +426 -0
- package/dist/metadata.js.map +1 -0
- package/dist/node.d.ts +19 -0
- package/dist/node.js +78 -0
- package/dist/node.js.map +1 -0
- package/dist/scoring.d.ts +31 -0
- package/dist/scoring.js +472 -0
- package/dist/scoring.js.map +1 -0
- package/dist/standardize.d.ts +2 -0
- package/dist/standardize.js +1101 -0
- package/dist/standardize.js.map +1 -0
- package/dist/types/extractors.d.ts +41 -0
- package/dist/types/extractors.js +3 -0
- package/dist/types/extractors.js.map +1 -0
- package/dist/types.d.ts +135 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/comments.d.ts +44 -0
- package/dist/utils/comments.js +103 -0
- package/dist/utils/comments.js.map +1 -0
- package/dist/utils/dom.d.ts +42 -0
- package/dist/utils/dom.js +104 -0
- package/dist/utils/dom.js.map +1 -0
- package/dist/utils/linkedom-compat.d.ts +5 -0
- package/dist/utils/linkedom-compat.js +23 -0
- package/dist/utils/linkedom-compat.js.map +1 -0
- package/dist/utils/transcript.d.ts +37 -0
- package/dist/utils/transcript.js +61 -0
- package/dist/utils/transcript.js.map +1 -0
- package/dist/utils.d.ts +13 -0
- package/dist/utils.js +98 -0
- package/dist/utils.js.map +1 -0
- package/package.json +107 -0
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Remove permalink anchors from inside heading elements.
|
|
3
|
+
* Handles symbols (#, ¶, §, 🔗), empty links, and class-based anchors.
|
|
4
|
+
*/
|
|
5
|
+
export declare function removeHeadingAnchors(element: Element): void;
|
|
6
|
+
export declare function isPermalinkAnchor(node: Element): boolean;
|
|
7
|
+
export declare const headingRules: {
|
|
8
|
+
selector: string;
|
|
9
|
+
element: string;
|
|
10
|
+
transform: (el: Element) => Element;
|
|
11
|
+
}[];
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.headingRules = void 0;
|
|
4
|
+
exports.removeHeadingAnchors = removeHeadingAnchors;
|
|
5
|
+
exports.isPermalinkAnchor = isPermalinkAnchor;
|
|
6
|
+
const constants_1 = require("../constants");
|
|
7
|
+
/**
|
|
8
|
+
* Remove permalink anchors from inside heading elements.
|
|
9
|
+
* Handles symbols (#, ¶, §, 🔗), empty links, and class-based anchors.
|
|
10
|
+
*/
|
|
11
|
+
function removeHeadingAnchors(element) {
|
|
12
|
+
Array.from(element.querySelectorAll('h1 a, h2 a, h3 a, h4 a, h5 a, h6 a')).forEach(link => {
|
|
13
|
+
if (isPermalinkAnchor(link)) {
|
|
14
|
+
link.remove();
|
|
15
|
+
}
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
function isPermalinkAnchor(node) {
|
|
19
|
+
if (node.tagName.toLowerCase() !== 'a')
|
|
20
|
+
return false;
|
|
21
|
+
const href = node.getAttribute('href') || '';
|
|
22
|
+
const title = (node.getAttribute('title') || '').toLowerCase();
|
|
23
|
+
const className = (node.getAttribute('class') || '').toLowerCase();
|
|
24
|
+
const text = (node.textContent || '').trim();
|
|
25
|
+
if (href.startsWith('#') || href.includes('#'))
|
|
26
|
+
return true;
|
|
27
|
+
if (title.includes('permalink'))
|
|
28
|
+
return true;
|
|
29
|
+
if (className.includes('permalink') || className.includes('heading-anchor') || className.includes('anchor-link'))
|
|
30
|
+
return true;
|
|
31
|
+
if (/^[#¶§🔗]$/.test(text))
|
|
32
|
+
return true;
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
function isHeadingNavElement(node) {
|
|
36
|
+
const tag = node.tagName.toLowerCase();
|
|
37
|
+
if (tag === 'button')
|
|
38
|
+
return true;
|
|
39
|
+
if (tag === 'a' && isPermalinkAnchor(node))
|
|
40
|
+
return true;
|
|
41
|
+
if (node.classList.contains('anchor') || node.classList.contains('permalink-widget'))
|
|
42
|
+
return true;
|
|
43
|
+
if ((tag === 'span' || tag === 'div') && Array.from(node.querySelectorAll('a')).some(a => isPermalinkAnchor(a))) {
|
|
44
|
+
return true;
|
|
45
|
+
}
|
|
46
|
+
return false;
|
|
47
|
+
}
|
|
48
|
+
exports.headingRules = [
|
|
49
|
+
// Simplify headings by removing internal navigation elements
|
|
50
|
+
{
|
|
51
|
+
selector: 'h1, h2, h3, h4, h5, h6',
|
|
52
|
+
element: 'keep',
|
|
53
|
+
transform: (el) => {
|
|
54
|
+
// Get document from element's owner document
|
|
55
|
+
const doc = el.ownerDocument;
|
|
56
|
+
if (!doc) {
|
|
57
|
+
console.warn('No document available');
|
|
58
|
+
return el;
|
|
59
|
+
}
|
|
60
|
+
// Create new heading of same level
|
|
61
|
+
const newHeading = doc.createElement(el.tagName);
|
|
62
|
+
// Copy allowed attributes from original heading
|
|
63
|
+
Array.from(el.attributes).forEach(attr => {
|
|
64
|
+
if (constants_1.ALLOWED_ATTRIBUTES.has(attr.name)) {
|
|
65
|
+
newHeading.setAttribute(attr.name, attr.value);
|
|
66
|
+
}
|
|
67
|
+
});
|
|
68
|
+
// Clone the element so we can modify it without affecting the original
|
|
69
|
+
const clone = el.cloneNode(true);
|
|
70
|
+
// Single pass: collect navigation text and build removal list
|
|
71
|
+
const navigationText = new Map();
|
|
72
|
+
const toRemove = [];
|
|
73
|
+
Array.from(clone.querySelectorAll('*')).forEach(child => {
|
|
74
|
+
if (!isHeadingNavElement(child))
|
|
75
|
+
return;
|
|
76
|
+
navigationText.set(child, child.textContent?.trim() || '');
|
|
77
|
+
// If this element contains the only text content of its parent,
|
|
78
|
+
// store its text to be used for the parent
|
|
79
|
+
const parent = child.parentElement;
|
|
80
|
+
if (parent && parent !== clone &&
|
|
81
|
+
parent.textContent?.trim() === child.textContent?.trim()) {
|
|
82
|
+
navigationText.set(parent, child.textContent?.trim() || '');
|
|
83
|
+
}
|
|
84
|
+
toRemove.push(child);
|
|
85
|
+
});
|
|
86
|
+
// Remove navigation elements
|
|
87
|
+
toRemove.forEach(element => element.remove());
|
|
88
|
+
// Get the text content after removing navigation elements
|
|
89
|
+
let textContent = clone.textContent?.trim() || '';
|
|
90
|
+
// If we lost all text content but had navigation text, use that instead
|
|
91
|
+
if (!textContent && navigationText.size > 0) {
|
|
92
|
+
textContent = Array.from(navigationText.values())[0];
|
|
93
|
+
}
|
|
94
|
+
// Set the clean text content
|
|
95
|
+
newHeading.textContent = textContent;
|
|
96
|
+
return newHeading;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
];
|
|
100
|
+
//# sourceMappingURL=headings.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"headings.js","sourceRoot":"","sources":["../../src/elements/headings.ts"],"names":[],"mappings":";;;AAMA,oDAMC;AAED,8CAaC;AA3BD,4CAAkD;AAElD;;;GAGG;AACH,SAAgB,oBAAoB,CAAC,OAAgB;IACpD,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,oCAAoC,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;QACzF,IAAI,iBAAiB,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,IAAI,CAAC,MAAM,EAAE,CAAC;QACf,CAAC;IACF,CAAC,CAAC,CAAC;AACJ,CAAC;AAED,SAAgB,iBAAiB,CAAC,IAAa;IAC9C,IAAI,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,KAAK,GAAG;QAAE,OAAO,KAAK,CAAC;IACrD,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;IAC7C,MAAM,KAAK,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IAC/D,MAAM,SAAS,GAAG,CAAC,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IACnE,MAAM,IAAI,GAAG,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;IAE7C,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAC5D,IAAI,KAAK,CAAC,QAAQ,CAAC,WAAW,CAAC;QAAE,OAAO,IAAI,CAAC;IAC7C,IAAI,SAAS,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC;QAAE,OAAO,IAAI,CAAC;IAC9H,IAAI,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IAExC,OAAO,KAAK,CAAC;AACd,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAa;IACzC,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;IACvC,IAAI,GAAG,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC;IAClC,IAAI,GAAG,KAAK,GAAG,IAAI,iBAAiB,CAAC,IAAI,CAAC;QAAE,OAAO,IAAI,CAAC;IACxD,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,kBAAkB,CAAC;QAAE,OAAO,IAAI,CAAC;IAClG,IAAI,CAAC,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,KAAK,CAAC,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,iBAAiB,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACjH,OAAO,IAAI,CAAC;IACb,CAAC;IACD,OAAO,KAAK,CAAC;AACd,CAAC;AAEY,QAAA,YAAY,GAAG;IACxB,6DAA6D;IAChE;QACC,QAAQ,EAAE,wBAAwB;QAClC,OAAO,EAAE,MAAM;QACf,SAAS,EAAE,CAAC,EAAW,EAAW,EAAE;YACnC,6CAA6C;YAC7C,MAAM,GAAG,GAAG,EAAE,CAAC,aAAa,CAAC;YAC7B,IAAI,CAAC,GAAG,EAAE,CAAC;gBACV,OAAO,CAAC,IAAI,CAAC,uBAAuB,CAAC,CAAC;gBACtC,OAAO,EAAE,CAAC;YACX,CAAC;YAED,mCAAmC;YACnC,MAAM,UAAU,GAAG,GAAG,CAAC,aAAa,CAAC,EAAE,CAAC,OAAO,CAAC,CAAC;YAEjD,gDAAgD;YAChD,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;gBACxC,IAAI,8BAAkB,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;oBACvC,UAAU,CAAC,YAAY,CAAC,IAAI,CAAC,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;gBAChD,CAAC;YACF,CAAC,CAAC,CAAC;YAEH,uEAAuE;YACvE,MAAM,KAAK,GAAG,EAAE,CAAC,SAAS,CAAC,IAAI,CAAY,CAAC;YAE5C,8DAA8D;YAC9D,MAAM,cAAc,GAAG,IAAI,GAAG,EAAmB,CAAC;YAClD,MAAM,QAAQ,GAAc,EAAE,CAAC;YAE/B,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;gBACvD,IAAI,CAAC,mBAAmB,CAAC,KAAK,CAAC;oBAAE,OAAO;gBAExC,cAAc,CAAC,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;gBAE3D,gEAAgE;gBAChE,2CAA2C;gBAC3C,MAAM,MAAM,GAAG,KAAK,CAAC,aAAa,CAAC;gBACnC,IAAI,MAAM,IAAI,MAAM,KAAK,KAAK;oBAC7B,MAAM,CAAC,WAAW,EAAE,IAAI,EAAE,KAAK,KAAK,CAAC,WAAW,EAAE,IAAI,EAAE,EAAE,CAAC;oBAC3D,cAAc,CAAC,GAAG,CAAC,MAAM,EAAE,KAAK,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC;gBAC7D,CAAC;gBAED,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACtB,CAAC,CAAC,CAAC;YAEH,6BAA6B;YAC7B,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;YAE9C,0DAA0D;YAC1D,IAAI,WAAW,GAAG,KAAK,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;YAElD,wEAAwE;YACxE,IAAI,CAAC,WAAW,IAAI,cAAc,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;gBAC7C,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;YACtD,CAAC;YAED,6BAA6B;YAC7B,UAAU,CAAC,WAAW,GAAG,WAAW,CAAC;YAErC,OAAO,UAAU,CAAC;QACnB,CAAC;KACD;CACD,CAAC"}
|