illuma-agents 1.0.8 → 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -5
- package/dist/cjs/common/enum.cjs +1 -2
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/events.cjs +11 -0
- package/dist/cjs/events.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +2 -1
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/instrumentation.cjs +3 -1
- package/dist/cjs/instrumentation.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +79 -2
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/tools.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +99 -0
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -0
- package/dist/cjs/llm/fake.cjs.map +1 -1
- package/dist/cjs/llm/openai/index.cjs +102 -0
- package/dist/cjs/llm/openai/index.cjs.map +1 -1
- package/dist/cjs/llm/openai/utils/index.cjs +87 -1
- package/dist/cjs/llm/openai/utils/index.cjs.map +1 -1
- package/dist/cjs/llm/openrouter/index.cjs +175 -1
- package/dist/cjs/llm/openrouter/index.cjs.map +1 -1
- package/dist/cjs/llm/providers.cjs +13 -16
- package/dist/cjs/llm/providers.cjs.map +1 -1
- package/dist/cjs/llm/text.cjs.map +1 -1
- package/dist/cjs/messages/core.cjs +14 -14
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/ids.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +18 -1
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/splitStream.cjs.map +1 -1
- package/dist/cjs/stream.cjs +24 -1
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +20 -1
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +29 -25
- package/dist/cjs/tools/handlers.cjs.map +1 -1
- package/dist/cjs/tools/search/anthropic.cjs.map +1 -1
- package/dist/cjs/tools/search/content.cjs.map +1 -1
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/highlights.cjs.map +1 -1
- package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
- package/dist/cjs/tools/search/schema.cjs +27 -25
- package/dist/cjs/tools/search/schema.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +6 -1
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/serper-scraper.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +182 -35
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs.map +1 -1
- package/dist/cjs/utils/graph.cjs.map +1 -1
- package/dist/cjs/utils/llm.cjs +0 -1
- package/dist/cjs/utils/llm.cjs.map +1 -1
- package/dist/cjs/utils/misc.cjs.map +1 -1
- package/dist/cjs/utils/run.cjs.map +1 -1
- package/dist/cjs/utils/title.cjs +7 -7
- package/dist/cjs/utils/title.cjs.map +1 -1
- package/dist/esm/common/enum.mjs +1 -2
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/events.mjs +11 -0
- package/dist/esm/events.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +2 -1
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/instrumentation.mjs +3 -1
- package/dist/esm/instrumentation.mjs.map +1 -1
- package/dist/esm/llm/anthropic/types.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +79 -2
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/tools.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +97 -0
- package/dist/esm/llm/bedrock/index.mjs.map +1 -0
- package/dist/esm/llm/fake.mjs.map +1 -1
- package/dist/esm/llm/openai/index.mjs +103 -1
- package/dist/esm/llm/openai/index.mjs.map +1 -1
- package/dist/esm/llm/openai/utils/index.mjs +88 -2
- package/dist/esm/llm/openai/utils/index.mjs.map +1 -1
- package/dist/esm/llm/openrouter/index.mjs +175 -1
- package/dist/esm/llm/openrouter/index.mjs.map +1 -1
- package/dist/esm/llm/providers.mjs +2 -5
- package/dist/esm/llm/providers.mjs.map +1 -1
- package/dist/esm/llm/text.mjs.map +1 -1
- package/dist/esm/messages/core.mjs +14 -14
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/ids.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +18 -1
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/splitStream.mjs.map +1 -1
- package/dist/esm/stream.mjs +24 -1
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +20 -1
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +30 -26
- package/dist/esm/tools/handlers.mjs.map +1 -1
- package/dist/esm/tools/search/anthropic.mjs.map +1 -1
- package/dist/esm/tools/search/content.mjs.map +1 -1
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/highlights.mjs.map +1 -1
- package/dist/esm/tools/search/rerankers.mjs.map +1 -1
- package/dist/esm/tools/search/schema.mjs +27 -25
- package/dist/esm/tools/search/schema.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +6 -1
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/serper-scraper.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +182 -35
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs.map +1 -1
- package/dist/esm/utils/graph.mjs.map +1 -1
- package/dist/esm/utils/llm.mjs +0 -1
- package/dist/esm/utils/llm.mjs.map +1 -1
- package/dist/esm/utils/misc.mjs.map +1 -1
- package/dist/esm/utils/run.mjs.map +1 -1
- package/dist/esm/utils/title.mjs +7 -7
- package/dist/esm/utils/title.mjs.map +1 -1
- package/dist/types/common/enum.d.ts +1 -2
- package/dist/types/llm/bedrock/index.d.ts +36 -0
- package/dist/types/llm/openai/index.d.ts +1 -0
- package/dist/types/llm/openai/utils/index.d.ts +10 -1
- package/dist/types/llm/openrouter/index.d.ts +4 -1
- package/dist/types/tools/search/types.d.ts +2 -0
- package/dist/types/types/llm.d.ts +3 -8
- package/package.json +16 -12
- package/src/common/enum.ts +1 -2
- package/src/common/index.ts +1 -1
- package/src/events.ts +11 -0
- package/src/graphs/Graph.ts +2 -1
- package/src/instrumentation.ts +25 -22
- package/src/llm/anthropic/llm.spec.ts +1442 -1442
- package/src/llm/anthropic/types.ts +140 -140
- package/src/llm/anthropic/utils/message_inputs.ts +757 -660
- package/src/llm/anthropic/utils/output_parsers.ts +133 -133
- package/src/llm/anthropic/utils/tools.ts +29 -29
- package/src/llm/bedrock/index.ts +128 -0
- package/src/llm/fake.ts +133 -133
- package/src/llm/google/llm.spec.ts +3 -1
- package/src/llm/google/utils/tools.ts +160 -160
- package/src/llm/openai/index.ts +126 -0
- package/src/llm/openai/types.ts +24 -24
- package/src/llm/openai/utils/index.ts +116 -1
- package/src/llm/openai/utils/isReasoningModel.test.ts +90 -90
- package/src/llm/openrouter/index.ts +222 -1
- package/src/llm/providers.ts +2 -7
- package/src/llm/text.ts +94 -94
- package/src/messages/core.ts +463 -463
- package/src/messages/formatAgentMessages.tools.test.ts +400 -400
- package/src/messages/formatMessage.test.ts +693 -693
- package/src/messages/ids.ts +26 -26
- package/src/messages/prune.ts +567 -567
- package/src/messages/shiftIndexTokenCountMap.test.ts +81 -81
- package/src/mockStream.ts +98 -98
- package/src/prompts/collab.ts +5 -5
- package/src/prompts/index.ts +1 -1
- package/src/prompts/taskmanager.ts +61 -61
- package/src/run.ts +22 -4
- package/src/scripts/ant_web_search_edge_case.ts +162 -0
- package/src/scripts/ant_web_search_error_edge_case.ts +148 -0
- package/src/scripts/args.ts +48 -48
- package/src/scripts/caching.ts +123 -123
- package/src/scripts/code_exec_files.ts +193 -193
- package/src/scripts/empty_input.ts +137 -137
- package/src/scripts/memory.ts +97 -97
- package/src/scripts/test-tools-before-handoff.ts +1 -5
- package/src/scripts/thinking.ts +149 -149
- package/src/scripts/tools.ts +1 -4
- package/src/specs/anthropic.simple.test.ts +67 -0
- package/src/specs/spec.utils.ts +3 -3
- package/src/specs/token-distribution-edge-case.test.ts +316 -316
- package/src/specs/tool-error.test.ts +193 -193
- package/src/splitStream.test.ts +691 -691
- package/src/splitStream.ts +234 -234
- package/src/stream.test.ts +94 -94
- package/src/stream.ts +30 -1
- package/src/tools/ToolNode.ts +24 -1
- package/src/tools/handlers.ts +32 -28
- package/src/tools/search/anthropic.ts +51 -51
- package/src/tools/search/content.test.ts +173 -173
- package/src/tools/search/content.ts +147 -147
- package/src/tools/search/direct-url.test.ts +530 -0
- package/src/tools/search/firecrawl.ts +210 -210
- package/src/tools/search/format.ts +250 -250
- package/src/tools/search/highlights.ts +320 -320
- package/src/tools/search/index.ts +2 -2
- package/src/tools/search/jina-reranker.test.ts +126 -126
- package/src/tools/search/output.md +2775 -2775
- package/src/tools/search/rerankers.ts +242 -242
- package/src/tools/search/schema.ts +65 -63
- package/src/tools/search/search.ts +766 -759
- package/src/tools/search/serper-scraper.ts +155 -155
- package/src/tools/search/test.html +883 -883
- package/src/tools/search/test.md +642 -642
- package/src/tools/search/test.ts +159 -159
- package/src/tools/search/tool.ts +641 -471
- package/src/tools/search/types.ts +689 -687
- package/src/tools/search/utils.ts +79 -79
- package/src/types/index.ts +6 -6
- package/src/types/llm.ts +2 -8
- package/src/utils/graph.ts +10 -10
- package/src/utils/llm.ts +26 -27
- package/src/utils/llmConfig.ts +13 -5
- package/src/utils/logging.ts +48 -48
- package/src/utils/misc.ts +57 -57
- package/src/utils/run.ts +100 -100
- package/src/utils/title.ts +165 -165
- package/dist/cjs/llm/ollama/index.cjs +0 -70
- package/dist/cjs/llm/ollama/index.cjs.map +0 -1
- package/dist/cjs/llm/ollama/utils.cjs +0 -158
- package/dist/cjs/llm/ollama/utils.cjs.map +0 -1
- package/dist/esm/llm/ollama/index.mjs +0 -68
- package/dist/esm/llm/ollama/index.mjs.map +0 -1
- package/dist/esm/llm/ollama/utils.mjs +0 -155
- package/dist/esm/llm/ollama/utils.mjs.map +0 -1
- package/dist/types/llm/ollama/index.d.ts +0 -8
- package/dist/types/llm/ollama/utils.d.ts +0 -7
- package/src/llm/ollama/index.ts +0 -92
- package/src/llm/ollama/utils.ts +0 -193
- package/src/proto/CollabGraph.ts +0 -269
- package/src/proto/TaskManager.ts +0 -243
- package/src/proto/collab.ts +0 -200
- package/src/proto/collab_design.ts +0 -184
- package/src/proto/collab_design_v2.ts +0 -224
- package/src/proto/collab_design_v3.ts +0 -255
- package/src/proto/collab_design_v4.ts +0 -220
- package/src/proto/collab_design_v5.ts +0 -251
- package/src/proto/collab_graph.ts +0 -181
- package/src/proto/collab_original.ts +0 -123
- package/src/proto/example.ts +0 -93
- package/src/proto/example_new.ts +0 -68
- package/src/proto/example_old.ts +0 -201
- package/src/proto/example_test.ts +0 -152
- package/src/proto/example_test_anthropic.ts +0 -100
- package/src/proto/log_stream.ts +0 -202
- package/src/proto/main_collab_community_event.ts +0 -133
- package/src/proto/main_collab_design_v2.ts +0 -96
- package/src/proto/main_collab_design_v4.ts +0 -100
- package/src/proto/main_collab_design_v5.ts +0 -135
- package/src/proto/main_collab_global_analysis.ts +0 -122
- package/src/proto/main_collab_hackathon_event.ts +0 -153
- package/src/proto/main_collab_space_mission.ts +0 -153
- package/src/proto/main_philosophy.ts +0 -210
- package/src/proto/original_script.ts +0 -126
- package/src/proto/standard.ts +0 -100
- package/src/proto/stream.ts +0 -56
- package/src/proto/tasks.ts +0 -118
- package/src/proto/tools/global_analysis_tools.ts +0 -86
- package/src/proto/tools/space_mission_tools.ts +0 -60
- package/src/proto/vertexai.ts +0 -54
- package/src/scripts/image.ts +0 -178
package/src/tools/search/test.ts
CHANGED
|
@@ -1,159 +1,159 @@
|
|
|
1
|
-
/* eslint-disable no-console */
|
|
2
|
-
// processWikipedia.ts
|
|
3
|
-
import * as fs from 'fs';
|
|
4
|
-
import * as path from 'path';
|
|
5
|
-
import { processContent } from './content';
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Process a Wikipedia article (HTML and Markdown) and create a referenced version
|
|
9
|
-
*/
|
|
10
|
-
async function processWikipediaArticle(): Promise<void> {
|
|
11
|
-
try {
|
|
12
|
-
console.log('Starting Wikipedia article processing...');
|
|
13
|
-
|
|
14
|
-
// Define file paths - adapt these to your specific file locations
|
|
15
|
-
const htmlPath = path.resolve('./test.html');
|
|
16
|
-
const markdownPath = path.resolve('./test.md');
|
|
17
|
-
const outputPath = path.resolve('./output.md');
|
|
18
|
-
|
|
19
|
-
// Check if input files exist
|
|
20
|
-
if (!fs.existsSync(htmlPath)) {
|
|
21
|
-
throw new Error(`Wikipedia HTML file not found at ${htmlPath}`);
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
if (!fs.existsSync(markdownPath)) {
|
|
25
|
-
throw new Error(`Wikipedia Markdown file not found at ${markdownPath}`);
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
console.log('Reading Wikipedia article files...');
|
|
29
|
-
const html = fs.readFileSync(htmlPath, 'utf-8');
|
|
30
|
-
const markdown = fs.readFileSync(markdownPath, 'utf-8');
|
|
31
|
-
|
|
32
|
-
// Extract article title for logging
|
|
33
|
-
const titleMatch = /<h1[^>]*>([^<]+)<\/h1>/i.exec(html);
|
|
34
|
-
const articleTitle = titleMatch
|
|
35
|
-
? titleMatch[1].trim()
|
|
36
|
-
: 'Wikipedia article';
|
|
37
|
-
|
|
38
|
-
console.log(`Processing "${articleTitle}"...`);
|
|
39
|
-
|
|
40
|
-
// Measure processing time
|
|
41
|
-
const startTime = process.hrtime();
|
|
42
|
-
|
|
43
|
-
// Process content
|
|
44
|
-
const result = processContent(html, markdown);
|
|
45
|
-
|
|
46
|
-
// Calculate processing time
|
|
47
|
-
const elapsed = process.hrtime(startTime);
|
|
48
|
-
const timeInMs = elapsed[0] * 1000 + elapsed[1] / 1000000;
|
|
49
|
-
|
|
50
|
-
// Generate reference appendix
|
|
51
|
-
const appendix = generateReferenceAppendix(result);
|
|
52
|
-
|
|
53
|
-
// Create complete output with the processed content and appendix
|
|
54
|
-
const completeOutput = result.markdown + appendix;
|
|
55
|
-
|
|
56
|
-
// Write to output file
|
|
57
|
-
fs.writeFileSync(outputPath, completeOutput);
|
|
58
|
-
|
|
59
|
-
// Print processing statistics
|
|
60
|
-
console.log('\nWikipedia article processing complete! ✓');
|
|
61
|
-
console.log('-'.repeat(60));
|
|
62
|
-
console.log(`Article: ${articleTitle}`);
|
|
63
|
-
console.log(`Processing time: ${timeInMs.toFixed(2)}ms`);
|
|
64
|
-
console.log('Media references replaced:');
|
|
65
|
-
console.log(` - Links: ${result.links.length}`);
|
|
66
|
-
console.log(` - Images: ${result.images.length}`);
|
|
67
|
-
console.log(` - Videos: ${result.videos.length}`);
|
|
68
|
-
console.log(
|
|
69
|
-
` - Total: ${result.links.length + result.images.length + result.videos.length}`
|
|
70
|
-
);
|
|
71
|
-
console.log(`Output saved to: ${outputPath}`);
|
|
72
|
-
console.log('-'.repeat(60));
|
|
73
|
-
|
|
74
|
-
// Print sample of the transformation
|
|
75
|
-
const sampleLines = result.markdown.split('\n').slice(0, 10).join('\n');
|
|
76
|
-
console.log('\nSample of transformed content:');
|
|
77
|
-
console.log('-'.repeat(30));
|
|
78
|
-
console.log(sampleLines);
|
|
79
|
-
console.log('-'.repeat(30));
|
|
80
|
-
console.log('... (continued in output file)');
|
|
81
|
-
} catch (error) {
|
|
82
|
-
console.error('Error processing Wikipedia article:', error);
|
|
83
|
-
process.exit(1);
|
|
84
|
-
}
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
/**
|
|
88
|
-
* Generate a comprehensive reference appendix with all media links
|
|
89
|
-
*/
|
|
90
|
-
function generateReferenceAppendix(result: {
|
|
91
|
-
links: Array<{ originalUrl: string; title?: string; text?: string }>;
|
|
92
|
-
images: Array<{ originalUrl: string; title?: string }>;
|
|
93
|
-
videos: Array<{ originalUrl: string; title?: string }>;
|
|
94
|
-
}): string {
|
|
95
|
-
let appendix = '\n\n' + '---'.repeat(10) + '\n\n';
|
|
96
|
-
appendix += '# References\n\n';
|
|
97
|
-
|
|
98
|
-
if (result.links.length > 0) {
|
|
99
|
-
appendix += '## Links\n\n';
|
|
100
|
-
result.links.forEach((link, index) => {
|
|
101
|
-
// Clean and format text for display
|
|
102
|
-
let displayText = '';
|
|
103
|
-
if (link.text != null && link.text.trim()) {
|
|
104
|
-
// Limit length for very long link text
|
|
105
|
-
let cleanText = link.text.trim();
|
|
106
|
-
if (cleanText.length > 50) {
|
|
107
|
-
cleanText = cleanText.substring(0, 47) + '...';
|
|
108
|
-
}
|
|
109
|
-
displayText = ` - "${cleanText}"`;
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
appendix += `**link#${index + 1}**: ${link.originalUrl}${displayText}\n\n`;
|
|
113
|
-
});
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
if (result.images.length > 0) {
|
|
117
|
-
appendix += '## Images\n\n';
|
|
118
|
-
result.images.forEach((image, index) => {
|
|
119
|
-
const displayTitle =
|
|
120
|
-
image.title != null && image.title.trim()
|
|
121
|
-
? ` - ${image.title.trim()}`
|
|
122
|
-
: '';
|
|
123
|
-
appendix += `**image#${index + 1}**: ${image.originalUrl}${displayTitle}\n\n`;
|
|
124
|
-
});
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
if (result.videos.length > 0) {
|
|
128
|
-
appendix += '## Videos\n\n';
|
|
129
|
-
result.videos.forEach((video, index) => {
|
|
130
|
-
const displayTitle =
|
|
131
|
-
video.title != null && video.title.trim()
|
|
132
|
-
? ` - ${video.title.trim()}`
|
|
133
|
-
: '';
|
|
134
|
-
appendix += `**video#${index + 1}**: ${video.originalUrl}${displayTitle}\n\n`;
|
|
135
|
-
});
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
// Add a category breakdown to show what types of links were found
|
|
139
|
-
const totalRefs =
|
|
140
|
-
result.links.length + result.images.length + result.videos.length;
|
|
141
|
-
|
|
142
|
-
appendix += '## Summary\n\n';
|
|
143
|
-
appendix += `Total references: **${totalRefs}**\n\n`;
|
|
144
|
-
appendix += `- Links: ${result.links.length}\n`;
|
|
145
|
-
appendix += `- Images: ${result.images.length}\n`;
|
|
146
|
-
appendix += `- Videos: ${result.videos.length}\n`;
|
|
147
|
-
|
|
148
|
-
return appendix;
|
|
149
|
-
}
|
|
150
|
-
|
|
151
|
-
// Using async IIFE to allow for better error handling
|
|
152
|
-
(async (): Promise<void> => {
|
|
153
|
-
try {
|
|
154
|
-
await processWikipediaArticle();
|
|
155
|
-
} catch (error) {
|
|
156
|
-
console.error('Unhandled error:', error);
|
|
157
|
-
process.exit(1);
|
|
158
|
-
}
|
|
159
|
-
})();
|
|
1
|
+
/* eslint-disable no-console */
|
|
2
|
+
// processWikipedia.ts
|
|
3
|
+
import * as fs from 'fs';
|
|
4
|
+
import * as path from 'path';
|
|
5
|
+
import { processContent } from './content';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Process a Wikipedia article (HTML and Markdown) and create a referenced version
|
|
9
|
+
*/
|
|
10
|
+
async function processWikipediaArticle(): Promise<void> {
|
|
11
|
+
try {
|
|
12
|
+
console.log('Starting Wikipedia article processing...');
|
|
13
|
+
|
|
14
|
+
// Define file paths - adapt these to your specific file locations
|
|
15
|
+
const htmlPath = path.resolve('./test.html');
|
|
16
|
+
const markdownPath = path.resolve('./test.md');
|
|
17
|
+
const outputPath = path.resolve('./output.md');
|
|
18
|
+
|
|
19
|
+
// Check if input files exist
|
|
20
|
+
if (!fs.existsSync(htmlPath)) {
|
|
21
|
+
throw new Error(`Wikipedia HTML file not found at ${htmlPath}`);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
if (!fs.existsSync(markdownPath)) {
|
|
25
|
+
throw new Error(`Wikipedia Markdown file not found at ${markdownPath}`);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
console.log('Reading Wikipedia article files...');
|
|
29
|
+
const html = fs.readFileSync(htmlPath, 'utf-8');
|
|
30
|
+
const markdown = fs.readFileSync(markdownPath, 'utf-8');
|
|
31
|
+
|
|
32
|
+
// Extract article title for logging
|
|
33
|
+
const titleMatch = /<h1[^>]*>([^<]+)<\/h1>/i.exec(html);
|
|
34
|
+
const articleTitle = titleMatch
|
|
35
|
+
? titleMatch[1].trim()
|
|
36
|
+
: 'Wikipedia article';
|
|
37
|
+
|
|
38
|
+
console.log(`Processing "${articleTitle}"...`);
|
|
39
|
+
|
|
40
|
+
// Measure processing time
|
|
41
|
+
const startTime = process.hrtime();
|
|
42
|
+
|
|
43
|
+
// Process content
|
|
44
|
+
const result = processContent(html, markdown);
|
|
45
|
+
|
|
46
|
+
// Calculate processing time
|
|
47
|
+
const elapsed = process.hrtime(startTime);
|
|
48
|
+
const timeInMs = elapsed[0] * 1000 + elapsed[1] / 1000000;
|
|
49
|
+
|
|
50
|
+
// Generate reference appendix
|
|
51
|
+
const appendix = generateReferenceAppendix(result);
|
|
52
|
+
|
|
53
|
+
// Create complete output with the processed content and appendix
|
|
54
|
+
const completeOutput = result.markdown + appendix;
|
|
55
|
+
|
|
56
|
+
// Write to output file
|
|
57
|
+
fs.writeFileSync(outputPath, completeOutput);
|
|
58
|
+
|
|
59
|
+
// Print processing statistics
|
|
60
|
+
console.log('\nWikipedia article processing complete! ✓');
|
|
61
|
+
console.log('-'.repeat(60));
|
|
62
|
+
console.log(`Article: ${articleTitle}`);
|
|
63
|
+
console.log(`Processing time: ${timeInMs.toFixed(2)}ms`);
|
|
64
|
+
console.log('Media references replaced:');
|
|
65
|
+
console.log(` - Links: ${result.links.length}`);
|
|
66
|
+
console.log(` - Images: ${result.images.length}`);
|
|
67
|
+
console.log(` - Videos: ${result.videos.length}`);
|
|
68
|
+
console.log(
|
|
69
|
+
` - Total: ${result.links.length + result.images.length + result.videos.length}`
|
|
70
|
+
);
|
|
71
|
+
console.log(`Output saved to: ${outputPath}`);
|
|
72
|
+
console.log('-'.repeat(60));
|
|
73
|
+
|
|
74
|
+
// Print sample of the transformation
|
|
75
|
+
const sampleLines = result.markdown.split('\n').slice(0, 10).join('\n');
|
|
76
|
+
console.log('\nSample of transformed content:');
|
|
77
|
+
console.log('-'.repeat(30));
|
|
78
|
+
console.log(sampleLines);
|
|
79
|
+
console.log('-'.repeat(30));
|
|
80
|
+
console.log('... (continued in output file)');
|
|
81
|
+
} catch (error) {
|
|
82
|
+
console.error('Error processing Wikipedia article:', error);
|
|
83
|
+
process.exit(1);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Generate a comprehensive reference appendix with all media links
|
|
89
|
+
*/
|
|
90
|
+
function generateReferenceAppendix(result: {
|
|
91
|
+
links: Array<{ originalUrl: string; title?: string; text?: string }>;
|
|
92
|
+
images: Array<{ originalUrl: string; title?: string }>;
|
|
93
|
+
videos: Array<{ originalUrl: string; title?: string }>;
|
|
94
|
+
}): string {
|
|
95
|
+
let appendix = '\n\n' + '---'.repeat(10) + '\n\n';
|
|
96
|
+
appendix += '# References\n\n';
|
|
97
|
+
|
|
98
|
+
if (result.links.length > 0) {
|
|
99
|
+
appendix += '## Links\n\n';
|
|
100
|
+
result.links.forEach((link, index) => {
|
|
101
|
+
// Clean and format text for display
|
|
102
|
+
let displayText = '';
|
|
103
|
+
if (link.text != null && link.text.trim()) {
|
|
104
|
+
// Limit length for very long link text
|
|
105
|
+
let cleanText = link.text.trim();
|
|
106
|
+
if (cleanText.length > 50) {
|
|
107
|
+
cleanText = cleanText.substring(0, 47) + '...';
|
|
108
|
+
}
|
|
109
|
+
displayText = ` - "${cleanText}"`;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
appendix += `**link#${index + 1}**: ${link.originalUrl}${displayText}\n\n`;
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (result.images.length > 0) {
|
|
117
|
+
appendix += '## Images\n\n';
|
|
118
|
+
result.images.forEach((image, index) => {
|
|
119
|
+
const displayTitle =
|
|
120
|
+
image.title != null && image.title.trim()
|
|
121
|
+
? ` - ${image.title.trim()}`
|
|
122
|
+
: '';
|
|
123
|
+
appendix += `**image#${index + 1}**: ${image.originalUrl}${displayTitle}\n\n`;
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (result.videos.length > 0) {
|
|
128
|
+
appendix += '## Videos\n\n';
|
|
129
|
+
result.videos.forEach((video, index) => {
|
|
130
|
+
const displayTitle =
|
|
131
|
+
video.title != null && video.title.trim()
|
|
132
|
+
? ` - ${video.title.trim()}`
|
|
133
|
+
: '';
|
|
134
|
+
appendix += `**video#${index + 1}**: ${video.originalUrl}${displayTitle}\n\n`;
|
|
135
|
+
});
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Add a category breakdown to show what types of links were found
|
|
139
|
+
const totalRefs =
|
|
140
|
+
result.links.length + result.images.length + result.videos.length;
|
|
141
|
+
|
|
142
|
+
appendix += '## Summary\n\n';
|
|
143
|
+
appendix += `Total references: **${totalRefs}**\n\n`;
|
|
144
|
+
appendix += `- Links: ${result.links.length}\n`;
|
|
145
|
+
appendix += `- Images: ${result.images.length}\n`;
|
|
146
|
+
appendix += `- Videos: ${result.videos.length}\n`;
|
|
147
|
+
|
|
148
|
+
return appendix;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Using async IIFE to allow for better error handling
|
|
152
|
+
(async (): Promise<void> => {
|
|
153
|
+
try {
|
|
154
|
+
await processWikipediaArticle();
|
|
155
|
+
} catch (error) {
|
|
156
|
+
console.error('Unhandled error:', error);
|
|
157
|
+
process.exit(1);
|
|
158
|
+
}
|
|
159
|
+
})();
|