defuddle 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,141 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.GrokExtractor = void 0;
4
+ const _conversation_1 = require("./_conversation");
5
+ class GrokExtractor extends _conversation_1.ConversationExtractor {
6
+ constructor(document, url) {
7
+ super(document, url);
8
+ // Note: This selector relies heavily on CSS utility classes and may break if Grok's UI changes.
9
+ this.messageContainerSelector = '.relative.group.flex.flex-col.justify-center.w-full';
10
+ this.messageBubbles = document.querySelectorAll(this.messageContainerSelector);
11
+ this.footnotes = [];
12
+ this.footnoteCounter = 0;
13
+ }
14
+ canExtract() {
15
+ return !!this.messageBubbles && this.messageBubbles.length > 0;
16
+ }
17
+ extractMessages() {
18
+ const messages = [];
19
+ this.footnotes = [];
20
+ this.footnoteCounter = 0;
21
+ if (!this.messageBubbles || this.messageBubbles.length === 0)
22
+ return messages;
23
+ this.messageBubbles.forEach((container) => {
24
+ // Note: Relies on layout classes 'items-end' and 'items-start' which might change.
25
+ const isUserMessage = container.classList.contains('items-end');
26
+ const isGrokMessage = container.classList.contains('items-start');
27
+ if (!isUserMessage && !isGrokMessage)
28
+ return; // Skip elements that aren't clearly user or Grok messages
29
+ const messageBubble = container.querySelector('.message-bubble');
30
+ if (!messageBubble)
31
+ return; // Skip if the core message bubble isn't found
32
+ let content = '';
33
+ let role = '';
34
+ let author = '';
35
+ if (isUserMessage) {
36
+ // Assume user message bubble's textContent is the desired content.
37
+ // This is simpler and potentially less brittle than selecting specific spans.
38
+ content = messageBubble.textContent || '';
39
+ role = 'user';
40
+ author = 'You'; // Or potentially extract from an attribute if available later
41
+ }
42
+ else if (isGrokMessage) {
43
+ role = 'assistant';
44
+ author = 'Grok'; // Or potentially extract from an attribute if available later
45
+ // Clone the bubble to modify it without affecting the original page
46
+ const clonedBubble = messageBubble.cloneNode(true);
47
+ // Remove known non-content elements like the DeepSearch artifact
48
+ clonedBubble.querySelector('.relative.border.border-border-l1.bg-surface-base')?.remove();
49
+ // Add selectors here for any other known elements to remove (e.g., buttons, toolbars within the bubble)
50
+ content = clonedBubble.innerHTML;
51
+ // Process footnotes/links in the cleaned content
52
+ content = this.processFootnotes(content);
53
+ }
54
+ if (content.trim()) {
55
+ messages.push({
56
+ author: author,
57
+ content: content.trim(),
58
+ metadata: {
59
+ role: role
60
+ }
61
+ });
62
+ }
63
+ });
64
+ return messages;
65
+ }
66
+ getFootnotes() {
67
+ return this.footnotes;
68
+ }
69
+ getMetadata() {
70
+ const title = this.getTitle();
71
+ const messageCount = this.messageBubbles?.length || 0;
72
+ return {
73
+ title,
74
+ site: 'Grok',
75
+ url: this.url,
76
+ messageCount: messageCount, // Use estimated count
77
+ description: `Grok conversation with ${messageCount} messages`
78
+ };
79
+ }
80
+ getTitle() {
81
+ // Try to get the page title first (more reliable)
82
+ const pageTitle = this.document.title?.trim();
83
+ if (pageTitle && pageTitle !== 'Grok' && !pageTitle.startsWith('Grok by ')) {
84
+ // Remove ' - Grok' suffix if present
85
+ return pageTitle.replace(/\s-\s*Grok$/, '').trim();
86
+ }
87
+ // Fallback: Find the first user message bubble and use its text content
88
+ // Note: Still relies on 'items-end' class.
89
+ const firstUserContainer = this.document.querySelector(`${this.messageContainerSelector}.items-end`);
90
+ if (firstUserContainer) {
91
+ const messageBubble = firstUserContainer.querySelector('.message-bubble');
92
+ if (messageBubble) {
93
+ const text = messageBubble.textContent?.trim() || '';
94
+ // Truncate to first 50 characters if longer
95
+ return text.length > 50 ? text.slice(0, 50) + '...' : text;
96
+ }
97
+ }
98
+ return 'Grok Conversation'; // Default fallback
99
+ }
100
+ processFootnotes(content) {
101
+ // Regex to find <a> tags, capture href and link text
102
+ const linkPattern = /<a\s+(?:[^>]*?\s+)?href="([^"]*)"[^>]*>(.*?)<\/a>/gi; // Use 'g' and 'i' flags
103
+ return content.replace(linkPattern, (match, url, linkText) => {
104
+ // Skip processing for internal anchor links, empty URLs, or non-http(s) protocols
105
+ if (!url || url.startsWith('#') || !url.match(/^https?:\/\//i)) {
106
+ return match;
107
+ }
108
+ // Check if this URL already exists in our footnotes
109
+ let footnote = this.footnotes.find(fn => fn.url === url);
110
+ let footnoteIndex;
111
+ if (!footnote) {
112
+ // Create a new footnote if URL doesn't exist
113
+ this.footnoteCounter++;
114
+ footnoteIndex = this.footnoteCounter;
115
+ let domainText = url; // Default to full URL if parsing fails
116
+ try {
117
+ const domain = new URL(url).hostname.replace(/^www\./, '');
118
+ domainText = `<a href="${url}" target="_blank" rel="noopener noreferrer">${domain}</a>`;
119
+ }
120
+ catch (e) {
121
+ // Keep domainText as the original URL if parsing fails
122
+ domainText = `<a href="${url}" target="_blank" rel="noopener noreferrer">${url}</a>`;
123
+ console.warn(`GrokExtractor: Could not parse URL for footnote: ${url}`);
124
+ }
125
+ this.footnotes.push({
126
+ url,
127
+ text: domainText // Store the link HTML directly
128
+ });
129
+ }
130
+ else {
131
+ // Find the 1-based index of the existing footnote
132
+ footnoteIndex = this.footnotes.findIndex(fn => fn.url === url) + 1;
133
+ }
134
+ // Return the original link text wrapped with a footnote reference
135
+ // Ensure the link text itself is not clickable again if it was part of the original match
136
+ return `${linkText}<sup id="fnref:${footnoteIndex}" class="footnote-ref"><a href="#fn:${footnoteIndex}" class="footnote-link">${footnoteIndex}</a></sup>`;
137
+ });
138
+ }
139
+ }
140
+ exports.GrokExtractor = GrokExtractor;
141
+ //# sourceMappingURL=grok.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"grok.js","sourceRoot":"","sources":["../../src/extractors/grok.ts"],"names":[],"mappings":";;;AAAA,mDAAwD;AAGxD,MAAa,aAAc,SAAQ,qCAAqB;IAOvD,YAAY,QAAkB,EAAE,GAAW;QAC1C,KAAK,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAPtB,gGAAgG;QACxF,6BAAwB,GAAG,qDAAqD,CAAC;QAOxF,IAAI,CAAC,cAAc,GAAG,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,wBAAwB,CAAC,CAAC;QAC/E,IAAI,CAAC,SAAS,GAAG,EAAE,CAAC;QACpB,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC;IAC1B,CAAC;IAED,UAAU;QACT,OAAO,CAAC,CAAC,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,CAAC;IAChE,CAAC;IAES,eAAe;QACxB,MAAM,QAAQ,GAA0B,EAAE,CAAC;QAC3C,IAAI,CAAC,SAAS,GAAG,EAAE,CAAC;QACpB,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC;QAEzB,IAAI,CAAC,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,cAAc,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,QAAQ,CAAC;QAE9E,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC,SAAS,EAAE,EAAE;YACzC,mFAAmF;YACnF,MAAM,aAAa,GAAG,SAAS,CAAC,SAAS,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC;YAChE,MAAM,aAAa,GAAG,SAAS,CAAC,SAAS,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;YAElE,IAAI,CAAC,aAAa,IAAI,CAAC,aAAa;gBAAE,OAAO,CAAC,0DAA0D;YAExG,MAAM,aAAa,GAAG,SAAS,CAAC,aAAa,CAAC,iBAAiB,CAAC,CAAC;YACjE,IAAI,CAAC,aAAa;gBAAE,OAAO,CAAC,8CAA8C;YAE1E,IAAI,OAAO,GAAW,EAAE,CAAC;YACzB,IAAI,IAAI,GAAW,EAAE,CAAC;YACtB,IAAI,MAAM,GAAW,EAAE,CAAC;YAExB,IAAI,aAAa,EAAE,CAAC;gBACnB,mEAAmE;gBACnE,8EAA8E;gBAC9E,OAAO,GAAG,aAAa,CAAC,WAAW,IAAI,EAAE,CAAC;gBAC1C,IAAI,GAAG,MAAM,CAAC;gBACd,MAAM,GAAG,KAAK,CAAC,CAAC,8DAA8D;YAC/E,CAAC;iBAAM,IAAI,aAAa,EAAE,CAAC;gBAC1B,IAAI,GAAG,WAAW,CAAC;gBACnB,MAAM,GAAG,MAAM,CAAC,CAAC,8DAA8D;gBAE/E,oEAAoE;gBACpE,MAAM,YAAY,GAAG,aAAa,CAAC,SAAS,CAAC,IAAI,CAAY,CAAC;gBAE9D,iEAAiE;gBACjE,YAAY,CAAC,aAAa,CAAC,mDAAmD,CAAC,EAAE,MAAM,EAAE,CAAC;gBAC1F,wGAAwG;gBAExG,OAAO,GAAG,YAAY,CAAC,SAAS,CAAC;gBAEjC,iDAAiD;gBACjD,OAAO,GAAG,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;YAC1C,CAAC;YAED,IAAI,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;gBACpB,QAAQ,CAAC,IAAI,CAAC;oBACb,MAAM,EAAE,MAAM;oBACd,OAAO,EAAE,OAAO,CAAC,IAAI,EAAE;oBACvB,QAAQ,EAAE;wBACT,IAAI,EAAE,IAAI;qBACV;iBACD,CAAC,CAAC;YACJ,CAAC;QACF,CAAC,CAAC,CAAC;QAEH,OAAO,QAAQ,CAAC;IACjB,CAAC;IAES,YAAY;QACrB,OAAO,IAAI,CAAC,SAAS,CAAC;IACvB,CAAC;IAES,WAAW;QACpB,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC9B,MAAM,YAAY,GAAG,IAAI,CAAC,cAAc,EAAE,MAAM,IAAI,CAAC,CAAC;QAEtD,OAAO;YACN,KAAK;YACL,IAAI,EAAE,MAAM;YACZ,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,YAAY,EAAE,YAAY,EAAE,sBAAsB;YAClD,WAAW,EAAE,0BAA0B,YAAY,WAAW;SAC9D,CAAC;IACH,CAAC;IAEO,QAAQ;QACf,kDAAkD;QAClD,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC;QAC9C,IAAI,SAAS,IAAI,SAAS,KAAK,MAAM,IAAI,CAAC,SAAS,CAAC,UAAU,CAAC,UAAU,CAAC,EAAE,CAAC;YAC5E,qCAAqC;YACrC,OAAO,SAAS,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;QACpD,CAAC;QAED,wEAAwE;QACxE,2CAA2C;QAC3C,MAAM,kBAAkB,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,GAAG,IAAI,CAAC,wBAAwB,YAAY,CAAC,CAAC;QACrG,IAAI,kBAAkB,EAAE,CAAC;YACxB,MAAM,aAAa,GAAG,kBAAkB,CAAC,aAAa,CAAC,iBAAiB,CAAC,CAAC;YAC1E,IAAI,aAAa,EAAE,CAAC;gBACnB,MAAM,IAAI,GAAG,aAAa,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;gBACrD,4CAA4C;gBAC5C,OAAO,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;YAC5D,CAAC;QACF,CAAC;QAED,OAAO,mBAAmB,CAAC,CAAC,mBAAmB;IAChD,CAAC;IAEO,gBAAgB,CAAC,OAAe;QACvC,qDAAqD;QACrD,MAAM,WAAW,GAAG,qDAAqD,CAAC,CAAC,wBAAwB;QAEnG,OAAO,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,QAAQ,EAAE,EAAE;YAC3D,kFAAkF;YACnF,IAAI,CAAC,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,eAAe,CAAC,EAAE,CAAC;gBAChE,OAAO,KAAK,CAAC;YACd,CAAC;YAED,oDAAoD;YACpD,IAAI,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,KAAK,GAAG,CAAC,CAAC;YACzD,IAAI,aAAqB,CAAC;YAE1B,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACf,6CAA6C;gBAC7C,IAAI,CAAC,eAAe,EAAE,CAAC;gBACvB,aAAa,GAAG,IAAI,CAAC,eAAe,CAAC;gBAErC,IAAI,UAAU,GAAG,GAAG,CAAC,CAAC,uCAAuC;gBAC7D,IAAI,CAAC;oBACJ,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;oBAC3D,UAAU,GAAG,YAAY,GAAG,+CAA+C,MAAM,MAAM,CAAC;gBACzF,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACZ,uDAAuD;oBACvD,UAAU,GAAG,YAAY,GAAG,+CAA+C,GAAG,MAAM,CAAC;oBACrF,OAAO,CAAC,IAAI,CAAC,oDAAoD,GAAG,EAAE,CAAC,CAAC;gBACzE,CAAC;gBAED,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;oBACnB,GAAG;oBACH,IAAI,EAAE,UAAU,CAAC,+BAA+B;iBAChD,CAAC,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACP,kDAAkD;gBAClD,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,KAAK,GAAG,CAAC,GAAG,CAAC,CAAC;YACpE,CAAC;YAED,kEAAkE;YAClE,0FAA0F;YAC1F,OAAO,GAAG,QAAQ,kBAAkB,aAAa,uCAAuC,aAAa,2BAA2B,aAAa,YAAY,CAAC;QAC3J,CAAC,CAAC,CAAC;IACJ,CAAC;CACD;AA/JD,sCA+JC"}