@mz1999/defuddle 0.14.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (123) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +371 -0
  3. package/dist/cli.d.ts +2 -0
  4. package/dist/cli.js +145 -0
  5. package/dist/cli.js.map +1 -0
  6. package/dist/constants.d.ts +24 -0
  7. package/dist/constants.js +950 -0
  8. package/dist/constants.js.map +1 -0
  9. package/dist/defuddle.d.ts +136 -0
  10. package/dist/defuddle.js +1816 -0
  11. package/dist/defuddle.js.map +1 -0
  12. package/dist/elements/callouts.d.ts +6 -0
  13. package/dist/elements/callouts.js +74 -0
  14. package/dist/elements/callouts.js.map +1 -0
  15. package/dist/elements/code.d.ts +5 -0
  16. package/dist/elements/code.js +346 -0
  17. package/dist/elements/code.js.map +1 -0
  18. package/dist/elements/footnotes.d.ts +5 -0
  19. package/dist/elements/footnotes.js +619 -0
  20. package/dist/elements/footnotes.js.map +1 -0
  21. package/dist/elements/headings.d.ts +11 -0
  22. package/dist/elements/headings.js +100 -0
  23. package/dist/elements/headings.js.map +1 -0
  24. package/dist/elements/images.d.ts +8 -0
  25. package/dist/elements/images.js +877 -0
  26. package/dist/elements/images.js.map +1 -0
  27. package/dist/elements/math.base.d.ts +9 -0
  28. package/dist/elements/math.base.js +195 -0
  29. package/dist/elements/math.base.js.map +1 -0
  30. package/dist/elements/math.core.d.ts +7 -0
  31. package/dist/elements/math.core.js +52 -0
  32. package/dist/elements/math.core.js.map +1 -0
  33. package/dist/elements/math.d.ts +2 -0
  34. package/dist/elements/math.full.d.ts +8 -0
  35. package/dist/elements/math.js +7 -0
  36. package/dist/elements/math.js.map +1 -0
  37. package/dist/extractor-registry.d.ts +16 -0
  38. package/dist/extractor-registry.js +140 -0
  39. package/dist/extractor-registry.js.map +1 -0
  40. package/dist/extractors/_base.d.ts +22 -0
  41. package/dist/extractors/_base.js +27 -0
  42. package/dist/extractors/_base.js.map +1 -0
  43. package/dist/extractors/_conversation.d.ts +9 -0
  44. package/dist/extractors/_conversation.js +78 -0
  45. package/dist/extractors/_conversation.js.map +1 -0
  46. package/dist/extractors/chatgpt.d.ts +14 -0
  47. package/dist/extractors/chatgpt.js +138 -0
  48. package/dist/extractors/chatgpt.js.map +1 -0
  49. package/dist/extractors/claude.d.ts +10 -0
  50. package/dist/extractors/claude.js +91 -0
  51. package/dist/extractors/claude.js.map +1 -0
  52. package/dist/extractors/gemini.d.ts +14 -0
  53. package/dist/extractors/gemini.js +111 -0
  54. package/dist/extractors/gemini.js.map +1 -0
  55. package/dist/extractors/github.d.ts +20 -0
  56. package/dist/extractors/github.js +251 -0
  57. package/dist/extractors/github.js.map +1 -0
  58. package/dist/extractors/grok.d.ts +15 -0
  59. package/dist/extractors/grok.js +142 -0
  60. package/dist/extractors/grok.js.map +1 -0
  61. package/dist/extractors/hackernews.d.ts +21 -0
  62. package/dist/extractors/hackernews.js +155 -0
  63. package/dist/extractors/hackernews.js.map +1 -0
  64. package/dist/extractors/reddit.d.ts +22 -0
  65. package/dist/extractors/reddit.js +197 -0
  66. package/dist/extractors/reddit.js.map +1 -0
  67. package/dist/extractors/twitter.d.ts +16 -0
  68. package/dist/extractors/twitter.js +204 -0
  69. package/dist/extractors/twitter.js.map +1 -0
  70. package/dist/extractors/x-article.d.ts +24 -0
  71. package/dist/extractors/x-article.js +267 -0
  72. package/dist/extractors/x-article.js.map +1 -0
  73. package/dist/extractors/x-oembed.d.ts +20 -0
  74. package/dist/extractors/x-oembed.js +350 -0
  75. package/dist/extractors/x-oembed.js.map +1 -0
  76. package/dist/extractors/youtube.d.ts +87 -0
  77. package/dist/extractors/youtube.js +869 -0
  78. package/dist/extractors/youtube.js.map +1 -0
  79. package/dist/fetch.d.ts +18 -0
  80. package/dist/fetch.js +265 -0
  81. package/dist/fetch.js.map +1 -0
  82. package/dist/index.d.ts +3 -0
  83. package/dist/index.full.d.ts +12 -0
  84. package/dist/index.full.js +1 -0
  85. package/dist/index.js +1 -0
  86. package/dist/index.js.map +1 -0
  87. package/dist/markdown.d.ts +30 -0
  88. package/dist/markdown.js +661 -0
  89. package/dist/markdown.js.map +1 -0
  90. package/dist/metadata.d.ts +25 -0
  91. package/dist/metadata.js +426 -0
  92. package/dist/metadata.js.map +1 -0
  93. package/dist/node.d.ts +19 -0
  94. package/dist/node.js +78 -0
  95. package/dist/node.js.map +1 -0
  96. package/dist/scoring.d.ts +31 -0
  97. package/dist/scoring.js +472 -0
  98. package/dist/scoring.js.map +1 -0
  99. package/dist/standardize.d.ts +2 -0
  100. package/dist/standardize.js +1101 -0
  101. package/dist/standardize.js.map +1 -0
  102. package/dist/types/extractors.d.ts +41 -0
  103. package/dist/types/extractors.js +3 -0
  104. package/dist/types/extractors.js.map +1 -0
  105. package/dist/types.d.ts +135 -0
  106. package/dist/types.js +3 -0
  107. package/dist/types.js.map +1 -0
  108. package/dist/utils/comments.d.ts +44 -0
  109. package/dist/utils/comments.js +103 -0
  110. package/dist/utils/comments.js.map +1 -0
  111. package/dist/utils/dom.d.ts +42 -0
  112. package/dist/utils/dom.js +104 -0
  113. package/dist/utils/dom.js.map +1 -0
  114. package/dist/utils/linkedom-compat.d.ts +5 -0
  115. package/dist/utils/linkedom-compat.js +23 -0
  116. package/dist/utils/linkedom-compat.js.map +1 -0
  117. package/dist/utils/transcript.d.ts +37 -0
  118. package/dist/utils/transcript.js +61 -0
  119. package/dist/utils/transcript.js.map +1 -0
  120. package/dist/utils.d.ts +13 -0
  121. package/dist/utils.js +98 -0
  122. package/dist/utils.js.map +1 -0
  123. package/package.json +107 -0
@@ -0,0 +1,78 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ConversationExtractor = void 0;
4
+ const _base_1 = require("./_base");
5
+ const defuddle_1 = require("../defuddle");
6
+ const dom_1 = require("../utils/dom");
7
+ class ConversationExtractor extends _base_1.BaseExtractor {
8
+ getFootnotes() {
9
+ return [];
10
+ }
11
+ extract() {
12
+ const messages = this.extractMessages();
13
+ const metadata = this.getMetadata();
14
+ const footnotes = this.getFootnotes();
15
+ const rawContentHtml = this.createContentHtml(messages, footnotes);
16
+ // Create a temporary document to run Defuddle on our content
17
+ const tempDoc = this.document.implementation.createHTMLDocument();
18
+ const container = tempDoc.createElement('article');
19
+ container.appendChild((0, dom_1.parseHTML)(tempDoc, rawContentHtml));
20
+ tempDoc.body.appendChild(container);
21
+ // Run Defuddle on our formatted content
22
+ const defuddled = new defuddle_1.Defuddle(tempDoc).parse();
23
+ const contentHtml = defuddled.content;
24
+ return {
25
+ content: contentHtml,
26
+ contentHtml: contentHtml,
27
+ extractedContent: {
28
+ messageCount: messages.length.toString(),
29
+ },
30
+ variables: {
31
+ title: metadata.title || 'Conversation',
32
+ site: metadata.site,
33
+ description: metadata.description || `${metadata.site} conversation with ${messages.length} messages`,
34
+ wordCount: defuddled.wordCount?.toString() || '',
35
+ }
36
+ };
37
+ }
38
+ createContentHtml(messages, footnotes) {
39
+ const messagesHtml = messages.map((message, index) => {
40
+ const timestampHtml = message.timestamp ?
41
+ `<div class="message-timestamp">${message.timestamp}</div>` : '';
42
+ // Check if content already has paragraph tags
43
+ const hasParagraphs = /<p[^>]*>[\s\S]*?<\/p>/i.test(message.content);
44
+ const contentHtml = hasParagraphs ? message.content : `<p>${message.content}</p>`;
45
+ // Add metadata to data attributes
46
+ const dataAttributes = message.metadata ?
47
+ Object.entries(message.metadata)
48
+ .map(([key, value]) => `data-${key}="${value}"`)
49
+ .join(' ') : '';
50
+ return `
51
+ <div class="message message-${message.author.toLowerCase()}" ${dataAttributes}>
52
+ <div class="message-header">
53
+ <p class="message-author"><strong>${message.author}</strong></p>
54
+ ${timestampHtml}
55
+ </div>
56
+ <div class="message-content">
57
+ ${contentHtml}
58
+ </div>
59
+ </div>${index < messages.length - 1 ? '\n<hr>' : ''}`;
60
+ }).join('\n').trim();
61
+ // Add footnotes section if we have any
62
+ const footnotesHtml = footnotes.length > 0 ? `
63
+ <div id="footnotes">
64
+ <ol>
65
+ ${footnotes.map((footnote, index) => `
66
+ <li class="footnote" id="fn:${index + 1}">
67
+ <p>
68
+ <a href="${footnote.url}" target="_blank">${footnote.text}</a>&nbsp;<a href="#fnref:${index + 1}" class="footnote-backref">↩</a>
69
+ </p>
70
+ </li>
71
+ `).join('')}
72
+ </ol>
73
+ </div>` : '';
74
+ return `${messagesHtml}\n${footnotesHtml}`.trim();
75
+ }
76
+ }
77
+ exports.ConversationExtractor = ConversationExtractor;
78
+ //# sourceMappingURL=_conversation.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"_conversation.js","sourceRoot":"","sources":["../../src/extractors/_conversation.ts"],"names":[],"mappings":";;;AAAA,mCAAwC;AAExC,0CAAuC;AACvC,sCAAyC;AAEzC,MAAsB,qBAAsB,SAAQ,qBAAa;IAGtD,YAAY;QACrB,OAAO,EAAE,CAAC;IACX,CAAC;IAED,OAAO;QACN,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QACxC,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QACpC,MAAM,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAC;QACtC,MAAM,cAAc,GAAG,IAAI,CAAC,iBAAiB,CAAC,QAAQ,EAAE,SAAS,CAAC,CAAC;QAEnE,6DAA6D;QAC7D,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,cAAc,CAAC,kBAAkB,EAAE,CAAC;QAClE,MAAM,SAAS,GAAG,OAAO,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QACnD,SAAS,CAAC,WAAW,CAAC,IAAA,eAAS,EAAC,OAAO,EAAE,cAAc,CAAC,CAAC,CAAC;QAC1D,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,SAAS,CAAC,CAAC;QAEpC,wCAAwC;QACxC,MAAM,SAAS,GAAG,IAAI,mBAAQ,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,CAAC;QAChD,MAAM,WAAW,GAAG,SAAS,CAAC,OAAO,CAAC;QAEtC,OAAO;YACN,OAAO,EAAE,WAAW;YACpB,WAAW,EAAE,WAAW;YACxB,gBAAgB,EAAE;gBACjB,YAAY,EAAE,QAAQ,CAAC,MAAM,CAAC,QAAQ,EAAE;aACxC;YACD,SAAS,EAAE;gBACV,KAAK,EAAE,QAAQ,CAAC,KAAK,IAAI,cAAc;gBACvC,IAAI,EAAE,QAAQ,CAAC,IAAI;gBACnB,WAAW,EAAE,QAAQ,CAAC,WAAW,IAAI,GAAG,QAAQ,CAAC,IAAI,sBAAsB,QAAQ,CAAC,MAAM,WAAW;gBACrG,SAAS,EAAE,SAAS,CAAC,SAAS,EAAE,QAAQ,EAAE,IAAI,EAAE;aAChD;SACD,CAAC;IACH,CAAC;IAES,iBAAiB,CAAC,QAA+B,EAAE,SAAqB;QACjF,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE;YACpD,MAAM,aAAa,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC;gBACxC,kCAAkC,OAAO,CAAC,SAAS,QAAQ,CAAC,CAAC,CAAC,EAAE,CAAC;YAElE,8CAA8C;YAC9C,MAAM,aAAa,GAAG,wBAAwB,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACrE,MAAM,WAAW,GAAG,aAAa,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,OAAO,CAAC,OAAO,MAAM,CAAC;YAElF,kCAAkC;YAClC,MAAM,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC;gBACxC,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,QAAQ,CAAC;qBAC9B,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE,CAAC,QAAQ,GAAG,KAAK,KAAK,GAAG,CAAC;qBAC/C,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAElB,OAAO;iCACuB,OAAO,CAAC,MAAM,CAAC,WAAW,EAAE,KAAK,cAAc;;yCAEvC,OAAO,CAAC,MAAM;OAChD,aAAa;;;OAGb,WAAW;;WAEP,KAAK,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QACvD,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QAErB,uCAAuC;QACvC,MAAM,aAAa,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC;;;OAGxC,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,KAAK,EAAE,EAAE,CAAC;oCACN,KAAK,GAAG,CAAC;;mBAE1B,QAAQ,CAAC,GAAG,qBAAqB,QAAQ,CAAC,IAAI,6BAA6B,KAAK,GAAG,CAAC;;;MAGjG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC;;UAEN,CAAC,CAAC,CAAC,EAAE,CAAC;QAEd,OAAO,GAAG,YAAY,KAAK,aAAa,EAAE,CAAC,IAAI,EAAE,CAAC;IACnD,CAAC;CACD;AAjFD,sDAiFC"}
@@ -0,0 +1,14 @@
1
+ import { ConversationExtractor } from './_conversation';
2
+ import { ConversationMessage, ConversationMetadata, Footnote } from '../types/extractors';
3
+ export declare class ChatGPTExtractor extends ConversationExtractor {
4
+ private articles;
5
+ private footnotes;
6
+ private footnoteCounter;
7
+ private cachedMessages;
8
+ constructor(document: Document, url: string);
9
+ canExtract(): boolean;
10
+ protected extractMessages(): ConversationMessage[];
11
+ protected getFootnotes(): Footnote[];
12
+ protected getMetadata(): ConversationMetadata;
13
+ private getTitle;
14
+ }
@@ -0,0 +1,138 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ChatGPTExtractor = void 0;
4
+ const _conversation_1 = require("./_conversation");
5
+ const dom_1 = require("../utils/dom");
6
+ class ChatGPTExtractor extends _conversation_1.ConversationExtractor {
7
+ constructor(document, url) {
8
+ super(document, url);
9
+ this.cachedMessages = null;
10
+ this.articles = document.querySelectorAll('article[data-testid^="conversation-turn-"]');
11
+ this.footnotes = [];
12
+ this.footnoteCounter = 0;
13
+ }
14
+ canExtract() {
15
+ return !!this.articles && this.articles.length > 0;
16
+ }
17
+ extractMessages() {
18
+ if (this.cachedMessages)
19
+ return this.cachedMessages;
20
+ const messages = [];
21
+ this.footnotes = [];
22
+ this.footnoteCounter = 0;
23
+ if (!this.articles)
24
+ return messages;
25
+ this.articles.forEach((article) => {
26
+ // Get the localized author text from the sr-only heading and clean it
27
+ const authorElement = article.querySelector('h5.sr-only, h6.sr-only');
28
+ const authorText = authorElement?.textContent
29
+ ?.trim()
30
+ ?.replace(/:\s*$/, '') // Remove colon and any trailing whitespace
31
+ || '';
32
+ let currentAuthorRole = '';
33
+ const authorRole = article.getAttribute('data-message-author-role');
34
+ if (authorRole) {
35
+ currentAuthorRole = authorRole;
36
+ }
37
+ let messageContent = (0, dom_1.serializeHTML)(article);
38
+ messageContent = messageContent.replace(/\u200B/g, '');
39
+ // Remove specific elements from the message content
40
+ const tempDiv = this.document.createElement('div');
41
+ tempDiv.appendChild((0, dom_1.parseHTML)(this.document, messageContent));
42
+ tempDiv.querySelectorAll('h5.sr-only, h6.sr-only, span[data-state="closed"]').forEach(el => el.remove());
43
+ messageContent = (0, dom_1.serializeHTML)(tempDiv);
44
+ // Process inline references using regex to find the containers
45
+ // Look for spans containing citation links (a[target=_blank][rel=noopener]), replacing entire structure
46
+ // Also capture optional preceding ZeroWidthSpace
47
+ const citationPattern = /(&ZeroWidthSpace;)?(<span[^>]*?>\s*<a(?=[^>]*?href="([^"]+)")(?=[^>]*?target="_blank")(?=[^>]*?rel="noopener")[^>]*?>[\s\S]*?<\/a>\s*<\/span>)/gi;
48
+ messageContent = messageContent.replace(citationPattern, (match, zws, spanStructure, url) => {
49
+ // url is captured group 3
50
+ let domain = '';
51
+ let fragmentText = '';
52
+ try {
53
+ // Extract domain without www.
54
+ domain = new URL(url).hostname.replace(/^www\./, '');
55
+ // Extract and decode the fragment text if it exists
56
+ const hashParts = url.split('#:~:text=');
57
+ if (hashParts.length > 1) {
58
+ fragmentText = decodeURIComponent(hashParts[1]);
59
+ fragmentText = fragmentText.replace(/%2C/g, ',');
60
+ const parts = fragmentText.split(',');
61
+ if (parts.length > 1 && parts[0].trim()) {
62
+ fragmentText = ` — ${parts[0].trim()}...`;
63
+ }
64
+ else if (parts[0].trim()) {
65
+ fragmentText = ` — ${fragmentText.trim()}`;
66
+ }
67
+ else {
68
+ fragmentText = '';
69
+ }
70
+ }
71
+ }
72
+ catch (e) {
73
+ console.error(`Failed to parse URL: ${url}`, e);
74
+ domain = url;
75
+ }
76
+ // Check if this URL already exists in our footnotes
77
+ let footnoteIndex = this.footnotes.findIndex(fn => fn.url === url);
78
+ let footnoteNumber;
79
+ if (footnoteIndex === -1) {
80
+ this.footnoteCounter++;
81
+ footnoteNumber = this.footnoteCounter;
82
+ this.footnotes.push({
83
+ url,
84
+ text: `<a href="${url}">${domain}</a>${fragmentText}`
85
+ });
86
+ }
87
+ else {
88
+ footnoteNumber = footnoteIndex + 1;
89
+ }
90
+ // Return just the footnote reference, replacing the ZWS (if captured) and the entire span structure
91
+ return `<sup id="fnref:${footnoteNumber}"><a href="#fn:${footnoteNumber}">${footnoteNumber}</a></sup>`;
92
+ });
93
+ // Clean up any stray empty paragraph tags
94
+ messageContent = messageContent
95
+ .replace(/<p[^>]*>\s*<\/p>/g, '');
96
+ messages.push({
97
+ author: authorText,
98
+ content: messageContent.trim(),
99
+ metadata: {
100
+ role: currentAuthorRole || 'unknown'
101
+ }
102
+ });
103
+ });
104
+ this.cachedMessages = messages;
105
+ return messages;
106
+ }
107
+ getFootnotes() {
108
+ return this.footnotes;
109
+ }
110
+ getMetadata() {
111
+ const title = this.getTitle();
112
+ const messages = this.extractMessages();
113
+ return {
114
+ title,
115
+ site: 'ChatGPT',
116
+ url: this.url,
117
+ messageCount: messages.length,
118
+ description: `ChatGPT conversation with ${messages.length} messages`
119
+ };
120
+ }
121
+ getTitle() {
122
+ // Try to get the page title first
123
+ const pageTitle = this.document.title?.trim();
124
+ if (pageTitle && pageTitle !== 'ChatGPT') {
125
+ return pageTitle;
126
+ }
127
+ // Fall back to first user message
128
+ const firstUserTurn = this.articles?.item(0)?.querySelector('.text-message');
129
+ if (firstUserTurn) {
130
+ const text = firstUserTurn.textContent || '';
131
+ // Truncate to first 50 characters if longer
132
+ return text.length > 50 ? text.slice(0, 50) + '...' : text;
133
+ }
134
+ return 'ChatGPT Conversation';
135
+ }
136
+ }
137
+ exports.ChatGPTExtractor = ChatGPTExtractor;
138
+ //# sourceMappingURL=chatgpt.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"chatgpt.js","sourceRoot":"","sources":["../../src/extractors/chatgpt.ts"],"names":[],"mappings":";;;AAAA,mDAAwD;AAExD,sCAAwD;AAExD,MAAa,gBAAiB,SAAQ,qCAAqB;IAM1D,YAAY,QAAkB,EAAE,GAAW;QAC1C,KAAK,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAHd,mBAAc,GAAiC,IAAI,CAAC;QAI3D,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAC,4CAA4C,CAAC,CAAC;QACxF,IAAI,CAAC,SAAS,GAAG,EAAE,CAAC;QACpB,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC;IAC1B,CAAC;IAED,UAAU;QACT,OAAO,CAAC,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;IACpD,CAAC;IAES,eAAe;QACxB,IAAI,IAAI,CAAC,cAAc;YAAE,OAAO,IAAI,CAAC,cAAc,CAAC;QAEpD,MAAM,QAAQ,GAA0B,EAAE,CAAC;QAC3C,IAAI,CAAC,SAAS,GAAG,EAAE,CAAC;QACpB,IAAI,CAAC,eAAe,GAAG,CAAC,CAAC;QAEzB,IAAI,CAAC,IAAI,CAAC,QAAQ;YAAE,OAAO,QAAQ,CAAC;QAEpC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;YACjC,sEAAsE;YACtE,MAAM,aAAa,GAAG,OAAO,CAAC,aAAa,CAAC,wBAAwB,CAAC,CAAC;YACtE,MAAM,UAAU,GAAG,aAAa,EAAE,WAAW;gBAC5C,EAAE,IAAI,EAAE;gBACR,EAAE,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,2CAA2C;mBAC/D,EAAE,CAAC;YAEP,IAAI,iBAAiB,GAAG,EAAE,CAAC;YAE3B,MAAM,UAAU,GAAG,OAAO,CAAC,YAAY,CAAC,0BAA0B,CAAC,CAAC;YACpE,IAAI,UAAU,EAAE,CAAC;gBAChB,iBAAiB,GAAG,UAAU,CAAC;YAChC,CAAC;YAED,IAAI,cAAc,GAAG,IAAA,mBAAa,EAAC,OAAO,CAAC,CAAC;YAC5C,cAAc,GAAG,cAAc,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;YAEvD,oDAAoD;YACpD,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;YACnD,OAAO,CAAC,WAAW,CAAC,IAAA,eAAS,EAAC,IAAI,CAAC,QAAQ,EAAE,cAAc,CAAC,CAAC,CAAC;YAC9D,OAAO,CAAC,gBAAgB,CAAC,mDAAmD,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,CAAC;YACzG,cAAc,GAAG,IAAA,mBAAa,EAAC,OAAO,CAAC,CAAC;YAExC,+DAA+D;YAC/D,wGAAwG;YACxG,iDAAiD;YACjD,MAAM,eAAe,GAAG,kJAAkJ,CAAC;YAE3K,cAAc,GAAG,cAAc,CAAC,OAAO,CAAC,eAAe,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE,aAAa,EAAE,GAAG,EAAE,EAAE;gBAC3F,0BAA0B;gBAC1B,IAAI,MAAM,GAAG,EAAE,CAAC;gBAChB,IAAI,YAAY,GAAG,EAAE,CAAC;gBAEtB,IAAI,CAAC;oBACJ,8BAA8B;oBAC9B,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC,CAAC;oBAErD,oDAAoD;oBACpD,MAAM,SAAS,GAAG,GAAG,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;oBACzC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBAC1B,YAAY,GAAG,kBAAkB,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;wBAChD,YAAY,GAAG,YAAY,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;wBAEjD,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;wBACtC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;4BACzC,YAAY,GAAG,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,CAAC;wBAC3C,CAAC;6BAAM,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC;4BAC5B,YAAY,GAAG,MAAM,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC;wBAC5C,CAAC;6BAAM,CAAC;4BACP,YAAY,GAAG,EAAE,CAAC;wBACnB,CAAC;oBACF,CAAC;gBACF,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACZ,OAAO,CAAC,KAAK,CAAC,wBAAwB,GAAG,EAAE,EAAE,CAAC,CAAC,CAAC;oBAChD,MAAM,GAAG,GAAG,CAAC;gBACd,CAAC;gBAED,oDAAoD;gBACpD,IAAI,aAAa,GAAG,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,GAAG,KAAK,GAAG,CAAC,CAAC;gBACnE,IAAI,cAAsB,CAAC;gBAE3B,IAAI,aAAa,KAAK,CAAC,CAAC,EAAE,CAAC;oBAC1B,IAAI,CAAC,eAAe,EAAE,CAAC;oBACvB,cAAc,GAAG,IAAI,CAAC,eAAe,CAAC;oBACtC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;wBACnB,GAAG;wBACH,IAAI,EAAE,YAAY,GAAG,KAAK,MAAM,OAAO,YAAY,EAAE;qBACrD,CAAC,CAAC;gBACJ,CAAC;qBAAM,CAAC;oBACP,cAAc,GAAG,aAAa,GAAG,CAAC,CAAC;gBACpC,CAAC;gBAED,oGAAoG;gBACpG,OAAO,kBAAkB,cAAc,kBAAkB,cAAc,KAAK,cAAc,YAAY,CAAC;YACxG,CAAC,CAAC,CAAC;YAEH,0CAA0C;YAC1C,cAAc,GAAG,cAAc;iBAC7B,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC;YAEnC,QAAQ,CAAC,IAAI,CAAC;gBACb,MAAM,EAAE,UAAU;gBAClB,OAAO,EAAE,cAAc,CAAC,IAAI,EAAE;gBAC9B,QAAQ,EAAE;oBACT,IAAI,EAAE,iBAAiB,IAAI,SAAS;iBACpC;aACD,CAAC,CAAC;QAEJ,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc,GAAG,QAAQ,CAAC;QAC/B,OAAO,QAAQ,CAAC;IACjB,CAAC;IAES,YAAY;QACrB,OAAO,IAAI,CAAC,SAAS,CAAC;IACvB,CAAC;IAES,WAAW;QACpB,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAExC,OAAO;YACN,KAAK;YACL,IAAI,EAAE,SAAS;YACf,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,YAAY,EAAE,QAAQ,CAAC,MAAM;YAC7B,WAAW,EAAE,6BAA6B,QAAQ,CAAC,MAAM,WAAW;SACpE,CAAC;IACH,CAAC;IAEO,QAAQ;QACf,kCAAkC;QAClC,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC;QAC9C,IAAI,SAAS,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;YAC1C,OAAO,SAAS,CAAC;QAClB,CAAC;QAED,kCAAkC;QAClC,MAAM,aAAa,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,eAAe,CAAC,CAAC;QAC7E,IAAI,aAAa,EAAE,CAAC;YACnB,MAAM,IAAI,GAAG,aAAa,CAAC,WAAW,IAAI,EAAE,CAAC;YAC7C,4CAA4C;YAC5C,OAAO,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;QAC5D,CAAC;QAED,OAAO,sBAAsB,CAAC;IAC/B,CAAC;CACD;AA3JD,4CA2JC"}
@@ -0,0 +1,10 @@
1
+ import { ConversationExtractor } from './_conversation';
2
+ import { ConversationMessage, ConversationMetadata } from '../types/extractors';
3
+ export declare class ClaudeExtractor extends ConversationExtractor {
4
+ private articles;
5
+ constructor(document: Document, url: string);
6
+ canExtract(): boolean;
7
+ protected extractMessages(): ConversationMessage[];
8
+ protected getMetadata(): ConversationMetadata;
9
+ private getTitle;
10
+ }
@@ -0,0 +1,91 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ClaudeExtractor = void 0;
4
+ const _conversation_1 = require("./_conversation");
5
+ const dom_1 = require("../utils/dom");
6
+ class ClaudeExtractor extends _conversation_1.ConversationExtractor {
7
+ constructor(document, url) {
8
+ super(document, url);
9
+ // Find all message blocks - both user and assistant messages
10
+ this.articles = document.querySelectorAll('div[data-testid="user-message"], div[data-testid="assistant-message"], div.font-claude-response');
11
+ }
12
+ canExtract() {
13
+ return !!this.articles && this.articles.length > 0;
14
+ }
15
+ extractMessages() {
16
+ const messages = [];
17
+ if (!this.articles)
18
+ return messages;
19
+ this.articles.forEach((article) => {
20
+ let role;
21
+ let content;
22
+ if (article.hasAttribute('data-testid')) {
23
+ // Handle user messages
24
+ if (article.getAttribute('data-testid') === 'user-message') {
25
+ role = 'you';
26
+ content = (0, dom_1.serializeHTML)(article);
27
+ }
28
+ // Skip non-message elements
29
+ else {
30
+ return;
31
+ }
32
+ }
33
+ else if (article.classList.contains('font-claude-response')) {
34
+ // Handle Claude messages
35
+ role = 'assistant';
36
+ const assistantBody = article.querySelector('.standard-markdown') || article;
37
+ content = (0, dom_1.serializeHTML)(assistantBody);
38
+ }
39
+ else {
40
+ // Skip unknown elements
41
+ return;
42
+ }
43
+ if (content) {
44
+ // Normalize content similar to ChatGPT extractor
45
+ content = content.replace(/\u200B/g, '').replace(/<p[^>]*>\s*<\/p>/g, '');
46
+ messages.push({
47
+ author: role === 'you' ? 'You' : 'Claude',
48
+ content: content.trim(),
49
+ metadata: {
50
+ role: role
51
+ }
52
+ });
53
+ }
54
+ });
55
+ return messages;
56
+ }
57
+ getMetadata() {
58
+ const title = this.getTitle();
59
+ const messages = this.extractMessages();
60
+ return {
61
+ title,
62
+ site: 'Claude',
63
+ url: this.url,
64
+ messageCount: messages.length,
65
+ description: `Claude conversation with ${messages.length} messages`
66
+ };
67
+ }
68
+ getTitle() {
69
+ // Try to get the page title first
70
+ const pageTitle = this.document.title?.trim();
71
+ if (pageTitle && pageTitle !== 'Claude') {
72
+ // Remove ' - Claude' suffix if present
73
+ return pageTitle.replace(/ - Claude$/, '');
74
+ }
75
+ // Try to get title from header
76
+ const headerTitle = this.document.querySelector('header .font-tiempos')?.textContent?.trim();
77
+ if (headerTitle) {
78
+ return headerTitle;
79
+ }
80
+ // Fall back to first user message
81
+ const firstUserMessage = this.articles?.item(0)?.querySelector('[data-testid="user-message"]');
82
+ if (firstUserMessage) {
83
+ const text = firstUserMessage.textContent || '';
84
+ // Truncate to first 50 characters if longer
85
+ return text.length > 50 ? text.slice(0, 50) + '...' : text;
86
+ }
87
+ return 'Claude Conversation';
88
+ }
89
+ }
90
+ exports.ClaudeExtractor = ClaudeExtractor;
91
+ //# sourceMappingURL=claude.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"claude.js","sourceRoot":"","sources":["../../src/extractors/claude.ts"],"names":[],"mappings":";;;AAAA,mDAAwD;AAExD,sCAA6C;AAE7C,MAAa,eAAgB,SAAQ,qCAAqB;IAGzD,YAAY,QAAkB,EAAE,GAAW;QAC1C,KAAK,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACrB,6DAA6D;QAC7D,IAAI,CAAC,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAC,iGAAiG,CAAC,CAAC;IAC9I,CAAC;IAED,UAAU;QACT,OAAO,CAAC,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC;IACpD,CAAC;IAES,eAAe;QACxB,MAAM,QAAQ,GAA0B,EAAE,CAAC;QAE3C,IAAI,CAAC,IAAI,CAAC,QAAQ;YAAE,OAAO,QAAQ,CAAC;QAEpC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;YACjC,IAAI,IAAY,CAAC;YACjB,IAAI,OAAe,CAAC;YAEpB,IAAI,OAAO,CAAC,YAAY,CAAC,aAAa,CAAC,EAAE,CAAC;gBACzC,uBAAuB;gBACvB,IAAI,OAAO,CAAC,YAAY,CAAC,aAAa,CAAC,KAAK,cAAc,EAAE,CAAC;oBAC5D,IAAI,GAAG,KAAK,CAAC;oBACb,OAAO,GAAG,IAAA,mBAAa,EAAC,OAAO,CAAC,CAAC;gBAClC,CAAC;gBACD,4BAA4B;qBACvB,CAAC;oBACL,OAAO;gBACR,CAAC;YACF,CAAC;iBAAM,IAAI,OAAO,CAAC,SAAS,CAAC,QAAQ,CAAC,sBAAsB,CAAC,EAAE,CAAC;gBAC/D,yBAAyB;gBACzB,IAAI,GAAG,WAAW,CAAC;gBACnB,MAAM,aAAa,GAAI,OAAO,CAAC,aAAa,CAAC,oBAAoB,CAAiB,IAAK,OAAuB,CAAC;gBAC/G,OAAO,GAAG,IAAA,mBAAa,EAAC,aAAa,CAAC,CAAC;YACxC,CAAC;iBAAM,CAAC;gBACP,wBAAwB;gBACxB,OAAO;YACR,CAAC;YAED,IAAI,OAAO,EAAE,CAAC;gBACb,iDAAiD;gBACjD,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC;gBAC1E,QAAQ,CAAC,IAAI,CAAC;oBACb,MAAM,EAAE,IAAI,KAAK,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,QAAQ;oBACzC,OAAO,EAAE,OAAO,CAAC,IAAI,EAAE;oBACvB,QAAQ,EAAE;wBACT,IAAI,EAAE,IAAI;qBACV;iBACD,CAAC,CAAC;YACJ,CAAC;QACF,CAAC,CAAC,CAAC;QAEH,OAAO,QAAQ,CAAC;IACjB,CAAC;IAES,WAAW;QACpB,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAe,EAAE,CAAC;QAExC,OAAO;YACN,KAAK;YACL,IAAI,EAAE,QAAQ;YACd,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,YAAY,EAAE,QAAQ,CAAC,MAAM;YAC7B,WAAW,EAAE,4BAA4B,QAAQ,CAAC,MAAM,WAAW;SACnE,CAAC;IACH,CAAC;IAEO,QAAQ;QACf,kCAAkC;QAClC,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC;QAC9C,IAAI,SAAS,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;YACzC,uCAAuC;YACvC,OAAO,SAAS,CAAC,OAAO,CAAC,YAAY,EAAE,EAAE,CAAC,CAAC;QAC5C,CAAC;QAED,+BAA+B;QAC/B,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,sBAAsB,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;QAC7F,IAAI,WAAW,EAAE,CAAC;YACjB,OAAO,WAAW,CAAC;QACpB,CAAC;QAED,kCAAkC;QAClC,MAAM,gBAAgB,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,8BAA8B,CAAC,CAAC;QAC/F,IAAI,gBAAgB,EAAE,CAAC;YACtB,MAAM,IAAI,GAAG,gBAAgB,CAAC,WAAW,IAAI,EAAE,CAAC;YAChD,4CAA4C;YAC5C,OAAO,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;QAC5D,CAAC;QAED,OAAO,qBAAqB,CAAC;IAC9B,CAAC;CACD;AA/FD,0CA+FC"}
@@ -0,0 +1,14 @@
1
+ import { ConversationExtractor } from './_conversation';
2
+ import { ConversationMessage, ConversationMetadata, Footnote } from '../types/extractors';
3
+ export declare class GeminiExtractor extends ConversationExtractor {
4
+ private conversationContainers;
5
+ private footnotes;
6
+ private messageCount;
7
+ constructor(document: Document, url: string);
8
+ canExtract(): boolean;
9
+ protected extractMessages(): ConversationMessage[];
10
+ private extractSources;
11
+ protected getFootnotes(): Footnote[];
12
+ protected getMetadata(): ConversationMetadata;
13
+ private getTitle;
14
+ }
@@ -0,0 +1,111 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.GeminiExtractor = void 0;
4
+ const _conversation_1 = require("./_conversation");
5
+ const dom_1 = require("../utils/dom");
6
+ class GeminiExtractor extends _conversation_1.ConversationExtractor {
7
+ constructor(document, url) {
8
+ super(document, url);
9
+ this.messageCount = null;
10
+ this.conversationContainers = document.querySelectorAll('div.conversation-container');
11
+ this.footnotes = [];
12
+ }
13
+ canExtract() {
14
+ return !!this.conversationContainers && this.conversationContainers.length > 0;
15
+ }
16
+ extractMessages() {
17
+ this.messageCount = 0;
18
+ const messages = [];
19
+ if (!this.conversationContainers)
20
+ return messages;
21
+ this.extractSources();
22
+ this.conversationContainers.forEach((container) => {
23
+ const userQuery = container.querySelector('user-query');
24
+ if (userQuery) {
25
+ const queryText = userQuery.querySelector('.query-text');
26
+ if (queryText) {
27
+ const content = (0, dom_1.serializeHTML)(queryText);
28
+ messages.push({
29
+ author: 'You',
30
+ content: content.trim(),
31
+ metadata: { role: 'user' }
32
+ });
33
+ }
34
+ }
35
+ const modelResponse = container.querySelector('model-response');
36
+ if (modelResponse) {
37
+ const regularContent = modelResponse.querySelector('.model-response-text .markdown');
38
+ const extendedContent = modelResponse.querySelector('#extended-response-markdown-content');
39
+ const contentElement = extendedContent || regularContent;
40
+ if (contentElement) {
41
+ let content = (0, dom_1.serializeHTML)(contentElement);
42
+ const tempDiv = this.document.createElement('div');
43
+ tempDiv.appendChild((0, dom_1.parseHTML)(this.document, content));
44
+ tempDiv.querySelectorAll('.table-content').forEach(el => {
45
+ // `table-content` is a PARTIAL selector in defuddle (table of contents, will be removed), but a real table in Gemini (should be kept).
46
+ el.classList.remove('table-content');
47
+ });
48
+ content = (0, dom_1.serializeHTML)(tempDiv);
49
+ messages.push({
50
+ author: 'Gemini',
51
+ content: content.trim(),
52
+ metadata: { role: 'assistant' }
53
+ });
54
+ }
55
+ }
56
+ });
57
+ this.messageCount = messages.length;
58
+ return messages;
59
+ }
60
+ extractSources() {
61
+ const browseItems = this.document.querySelectorAll('browse-item');
62
+ if (browseItems && browseItems.length > 0) {
63
+ browseItems.forEach(item => {
64
+ const link = item.querySelector('a');
65
+ if (link instanceof HTMLAnchorElement) {
66
+ const url = link.href;
67
+ const domain = link.querySelector('.domain')?.textContent?.trim() || '';
68
+ const title = link.querySelector('.title')?.textContent?.trim() || '';
69
+ if (url && (domain || title)) {
70
+ this.footnotes.push({
71
+ url,
72
+ text: title ? `${domain}: ${title}` : domain
73
+ });
74
+ }
75
+ }
76
+ });
77
+ }
78
+ }
79
+ getFootnotes() {
80
+ return this.footnotes;
81
+ }
82
+ getMetadata() {
83
+ const title = this.getTitle();
84
+ const messageCount = this.messageCount ?? this.extractMessages().length;
85
+ return {
86
+ title,
87
+ site: 'Gemini',
88
+ url: this.url,
89
+ messageCount,
90
+ description: `Gemini conversation with ${messageCount} messages`
91
+ };
92
+ }
93
+ getTitle() {
94
+ const pageTitle = this.document.title?.trim();
95
+ if (pageTitle && pageTitle !== 'Gemini' && !pageTitle.includes('Gemini')) {
96
+ return pageTitle;
97
+ }
98
+ const researchTitle = this.document.querySelector('.title-text')?.textContent?.trim();
99
+ if (researchTitle) {
100
+ return researchTitle;
101
+ }
102
+ const firstUserQuery = this.conversationContainers?.item(0)?.querySelector('.query-text');
103
+ if (firstUserQuery) {
104
+ const text = firstUserQuery.textContent || '';
105
+ return text.length > 50 ? text.slice(0, 50) + '...' : text;
106
+ }
107
+ return 'Gemini Conversation';
108
+ }
109
+ }
110
+ exports.GeminiExtractor = GeminiExtractor;
111
+ //# sourceMappingURL=gemini.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gemini.js","sourceRoot":"","sources":["../../src/extractors/gemini.ts"],"names":[],"mappings":";;;AAAA,mDAAwD;AAExD,sCAAwD;AAExD,MAAa,eAAgB,SAAQ,qCAAqB;IAKzD,YAAY,QAAkB,EAAE,GAAW;QAC1C,KAAK,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QAHd,iBAAY,GAAkB,IAAI,CAAC;QAI1C,IAAI,CAAC,sBAAsB,GAAG,QAAQ,CAAC,gBAAgB,CAAC,4BAA4B,CAAC,CAAC;QACtF,IAAI,CAAC,SAAS,GAAG,EAAE,CAAC;IACrB,CAAC;IAED,UAAU;QACT,OAAO,CAAC,CAAC,IAAI,CAAC,sBAAsB,IAAI,IAAI,CAAC,sBAAsB,CAAC,MAAM,GAAG,CAAC,CAAC;IAChF,CAAC;IAES,eAAe;QACxB,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC;QACtB,MAAM,QAAQ,GAA0B,EAAE,CAAC;QAE3C,IAAI,CAAC,IAAI,CAAC,sBAAsB;YAAE,OAAO,QAAQ,CAAC;QAElD,IAAI,CAAC,cAAc,EAAE,CAAC;QAEtB,IAAI,CAAC,sBAAsB,CAAC,OAAO,CAAC,CAAC,SAAS,EAAE,EAAE;YACjD,MAAM,SAAS,GAAG,SAAS,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;YACxD,IAAI,SAAS,EAAE,CAAC;gBACf,MAAM,SAAS,GAAG,SAAS,CAAC,aAAa,CAAC,aAAa,CAAC,CAAC;gBACzD,IAAI,SAAS,EAAE,CAAC;oBACf,MAAM,OAAO,GAAG,IAAA,mBAAa,EAAC,SAAS,CAAC,CAAC;oBACzC,QAAQ,CAAC,IAAI,CAAC;wBACb,MAAM,EAAE,KAAK;wBACb,OAAO,EAAE,OAAO,CAAC,IAAI,EAAE;wBACvB,QAAQ,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE;qBAC1B,CAAC,CAAC;gBACJ,CAAC;YACF,CAAC;YAED,MAAM,aAAa,GAAG,SAAS,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;YAChE,IAAI,aAAa,EAAE,CAAC;gBACnB,MAAM,cAAc,GAAG,aAAa,CAAC,aAAa,CAAC,gCAAgC,CAAC,CAAC;gBACrF,MAAM,eAAe,GAAG,aAAa,CAAC,aAAa,CAAC,qCAAqC,CAAC,CAAC;gBAC3F,MAAM,cAAc,GAAG,eAAe,IAAI,cAAc,CAAC;gBAEzD,IAAI,cAAc,EAAE,CAAC;oBACpB,IAAI,OAAO,GAAG,IAAA,mBAAa,EAAC,cAAc,CAAC,CAAC;oBAE5C,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;oBACnD,OAAO,CAAC,WAAW,CAAC,IAAA,eAAS,EAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;oBAEvD,OAAO,CAAC,gBAAgB,CAAC,gBAAgB,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,EAAE;wBACvD,uIAAuI;wBACvI,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC;oBACtC,CAAC,CAAC,CAAC;oBAEH,OAAO,GAAG,IAAA,mBAAa,EAAC,OAAO,CAAC,CAAC;oBAEjC,QAAQ,CAAC,IAAI,CAAC;wBACb,MAAM,EAAE,QAAQ;wBAChB,OAAO,EAAE,OAAO,CAAC,IAAI,EAAE;wBACvB,QAAQ,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE;qBAC/B,CAAC,CAAC;gBACJ,CAAC;YACF,CAAC;QACF,CAAC,CAAC,CAAC;QACH,IAAI,CAAC,YAAY,GAAG,QAAQ,CAAC,MAAM,CAAC;QACpC,OAAO,QAAQ,CAAC;IACjB,CAAC;IAEO,cAAc;QACrB,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC;QAElE,IAAI,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3C,WAAW,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE;gBAC1B,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;gBACrC,IAAI,IAAI,YAAY,iBAAiB,EAAE,CAAC;oBACvC,MAAM,GAAG,GAAG,IAAI,CAAC,IAAI,CAAC;oBACtB,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;oBACxE,MAAM,KAAK,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;oBAEtE,IAAI,GAAG,IAAI,CAAC,MAAM,IAAI,KAAK,CAAC,EAAE,CAAC;wBAC9B,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;4BACnB,GAAG;4BACH,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,GAAG,MAAM,KAAK,KAAK,EAAE,CAAC,CAAC,CAAC,MAAM;yBAC5C,CAAC,CAAC;oBACJ,CAAC;gBACF,CAAC;YACF,CAAC,CAAC,CAAC;QACJ,CAAC;IACF,CAAC;IAES,YAAY;QACrB,OAAO,IAAI,CAAC,SAAS,CAAC;IACvB,CAAC;IAES,WAAW;QACpB,MAAM,KAAK,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;QAC9B,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC,MAAM,CAAC;QACxE,OAAO;YACN,KAAK;YACL,IAAI,EAAE,QAAQ;YACd,GAAG,EAAE,IAAI,CAAC,GAAG;YACb,YAAY;YACZ,WAAW,EAAE,4BAA4B,YAAY,WAAW;SAChE,CAAC;IACH,CAAC;IAEO,QAAQ;QACf,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,IAAI,EAAE,CAAC;QAC9C,IAAI,SAAS,IAAI,SAAS,KAAK,QAAQ,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC1E,OAAO,SAAS,CAAC;QAClB,CAAC;QAED,MAAM,aAAa,GAAG,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,aAAa,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;QACtF,IAAI,aAAa,EAAE,CAAC;YACnB,OAAO,aAAa,CAAC;QACtB,CAAC;QAED,MAAM,cAAc,GAAG,IAAI,CAAC,sBAAsB,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,aAAa,CAAC,CAAC;QAC1F,IAAI,cAAc,EAAE,CAAC;YACpB,MAAM,IAAI,GAAG,cAAc,CAAC,WAAW,IAAI,EAAE,CAAC;YAC9C,OAAO,IAAI,CAAC,MAAM,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC;QAC5D,CAAC;QAED,OAAO,qBAAqB,CAAC;IAC9B,CAAC;CACD;AA7HD,0CA6HC"}
@@ -0,0 +1,20 @@
1
+ import { BaseExtractor } from './_base';
2
+ import { ExtractorResult } from '../types/extractors';
3
+ export declare class GitHubExtractor extends BaseExtractor {
4
+ private isIssue;
5
+ private isPR;
6
+ constructor(document: Document, url: string);
7
+ canExtract(): boolean;
8
+ extract(): ExtractorResult;
9
+ private createContentHtml;
10
+ private getIssueContent;
11
+ private extractComments;
12
+ private getPRBody;
13
+ private getPRContent;
14
+ private extractPRComments;
15
+ private extractAuthor;
16
+ private cleanBodyContent;
17
+ private extractNumber;
18
+ private extractRepoInfo;
19
+ private createDescription;
20
+ }