@fettstorch/clai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json ADDED
@@ -0,0 +1,38 @@
1
+ {
2
+ "name": "@fettstorch/clai",
3
+ "version": "0.1.0",
4
+ "main": "dist/index.js",
5
+ "bin": {
6
+ "clai": "dist/cli.js"
7
+ },
8
+ "repository": {
9
+ "type": "git",
10
+ "url": "git+https://github.com/schnullerpip/clai.git"
11
+ },
12
+ "scripts": {
13
+ "start": "bun run src/cli.ts",
14
+ "build": "bun build ./src/index.ts --outdir dist --target node && bun build ./src/cli.ts --outdir dist --target node",
15
+ "dev": "bun --watch src/cli.ts"
16
+ },
17
+ "author": "schnullerpip (https://github.com/schnullerpip)",
18
+ "license": "ISC",
19
+ "description": "AI-powered webpage summarizer",
20
+ "dependencies": {
21
+ "@fettstorch/jule": "^0.5.3",
22
+ "chalk": "^5.3.0",
23
+ "cheerio": "^1.0.0-rc.12",
24
+ "commander": "^12.1.0",
25
+ "inquirer": "^12.1.0",
26
+ "openai": "^4.73.0",
27
+ "ora": "^8.1.1",
28
+ "googleapis": "^126.0.1"
29
+ },
30
+ "devDependencies": {
31
+ "@types/inquirer": "^9.0.7",
32
+ "@types/node": "^20.11.19",
33
+ "bun-types": "latest"
34
+ },
35
+ "publishConfig": {
36
+ "access": "public"
37
+ }
38
+ }
package/src/cli.ts ADDED
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env bun
2
+ import { Command } from 'commander';
3
+ import inquirer from 'inquirer';
4
+ import chalk from 'chalk';
5
+ import ora from 'ora';
6
+ import { clai } from './index';
7
+
8
+ const program = new Command();
9
+
10
+ async function main() {
11
+ try {
12
+ program
13
+ .name('clai')
14
+ .description('AI-powered web scraping tool')
15
+ .version('1.0.0')
16
+ .argument('[input...]', 'URL or search terms to analyze')
17
+ .action(async (inputs: string[]) => {
18
+ const openAIKey = process.env.OPENAI_API_KEY;
19
+
20
+ if (!openAIKey) {
21
+ console.error(chalk.red('❌ OPENAI_API_KEY environment variable is not set'));
22
+ process.exit(1);
23
+ }
24
+
25
+ let input = inputs?.join(' ');
26
+
27
+ if (!input) {
28
+ const answers = await inquirer.prompt([
29
+ {
30
+ type: 'input',
31
+ name: 'input',
32
+ message: 'Enter a URL or search query:',
33
+ validate: (input) => input.length > 0
34
+ }
35
+ ]);
36
+ input = answers.input;
37
+ }
38
+
39
+ await analyzeInput(input, openAIKey);
40
+ process.exit(0);
41
+ });
42
+
43
+ await program.parseAsync();
44
+ } catch (error) {
45
+ console.error(chalk.red('Fatal error:'), error);
46
+ process.exit(1);
47
+ }
48
+ }
49
+
50
+ async function animateText(text: string, delay = 25) {
51
+ let shouldComplete = false;
52
+
53
+ // Setup keypress listener
54
+ const keypressHandler = (str: string, key: { name: string }) => {
55
+ if (key.name === 'return') {
56
+ shouldComplete = true;
57
+ }
58
+ };
59
+
60
+ process.stdin.on('keypress', keypressHandler);
61
+
62
+ // Enable raw mode to get keypress events
63
+ process.stdin.setRawMode(true);
64
+ process.stdin.resume();
65
+
66
+ let currentIndex = 0;
67
+ while (currentIndex < text.length) {
68
+ if (shouldComplete) {
69
+ // Show remaining text immediately
70
+ process.stdout.write(text.slice(currentIndex));
71
+ break;
72
+ }
73
+
74
+ process.stdout.write(text[currentIndex]);
75
+ currentIndex++;
76
+
77
+ if (!shouldComplete) {
78
+ await new Promise(resolve => setTimeout(resolve, delay));
79
+ }
80
+ }
81
+
82
+ // Cleanup
83
+ process.stdin.setRawMode(false);
84
+ process.stdin.pause();
85
+ process.stdin.removeListener('keypress', keypressHandler);
86
+
87
+ process.stdout.write('\n');
88
+ }
89
+
90
+ async function analyzeInput(input: string, openAIKey: string) {
91
+ const spinner = ora('Analyzing content...').start();
92
+
93
+ try {
94
+ const result = await clai(input, openAIKey);
95
+ spinner.succeed('Analysis complete');
96
+
97
+ console.log(chalk.green.bold('\n📝 Summary:'));
98
+ await animateText(result.summary);
99
+
100
+ // Prompt user to select a link
101
+ const { selectedLink } = await inquirer.prompt([
102
+ {
103
+ type: 'list',
104
+ name: 'selectedLink',
105
+ message: '\n\nWhat now?:',
106
+ choices: [
107
+ { name: chalk.yellow('🔍 New search'), value: 'new' },
108
+ ...result.links.map(link => ({
109
+ name: `${chalk.bold(link.name)}: ${chalk.cyan(link.url)}`,
110
+ value: link.url
111
+ })),
112
+ { name: 'Exit', value: 'exit' }
113
+ ]
114
+ }
115
+ ]);
116
+
117
+ if (selectedLink === 'new') {
118
+ const { input: newInput } = await inquirer.prompt([
119
+ {
120
+ type: 'input',
121
+ name: 'input',
122
+ message: 'Enter a URL or search query:',
123
+ validate: (input) => input.length > 0
124
+ }
125
+ ]);
126
+ await analyzeInput(newInput, openAIKey);
127
+ } else if (selectedLink && selectedLink !== 'exit') {
128
+ await analyzeInput(selectedLink, openAIKey);
129
+ }
130
+
131
+ } catch (error) {
132
+ spinner?.fail('Analysis failed');
133
+ console.error(chalk.red('Error:'), error);
134
+ }
135
+ }
136
+
137
+ main();
package/src/index.ts ADDED
@@ -0,0 +1,45 @@
1
+ import { scrape } from './scraper';
2
+ import { summarizeWebPage as summarize } from './summarizer';
3
+
4
+ export interface SummaryOutput {
5
+ summary: string;
6
+ links: ReadonlyArray<{
7
+ name: string;
8
+ url: string;
9
+ }>;
10
+ sources: string[];
11
+ }
12
+
13
+ /**
14
+ * Scrapes and analyzes webpages using AI
15
+ * @param input - URL or search query to analyze
16
+ * @param openAIKey - OpenAI API key
17
+ * @returns Promise with summary, extracted links, and source URLs
18
+ *
19
+ * @example
20
+ * ```ts
21
+ * const result = await clai('https://example.com', 'your-openai-key')
22
+ * console.log(result.summary) // AI generated summary
23
+ * console.log(result.links) // Extracted links
24
+ * console.log(result.sources) // Source URLs
25
+ * ```
26
+ */
27
+ export async function clai(input: string, openAIKey: string): Promise<SummaryOutput> {
28
+ const scrapedData = await scrape(input);
29
+
30
+ // Combine all content with source attribution
31
+ const combinedContent = scrapedData
32
+ .map(data => `Content from ${data.url}:\n${data.content}`)
33
+ .join('\n\n');
34
+
35
+ const result = await summarize(combinedContent, openAIKey);
36
+
37
+ return {
38
+ summary: result.textual.trim(),
39
+ links: result.links,
40
+ sources: scrapedData.map(data => data.url)
41
+ };
42
+ }
43
+
44
+ // Default export for easier importing
45
+ export default clai;
package/src/openai.ts ADDED
@@ -0,0 +1,92 @@
1
+ import { once } from '@fettstorch/jule';
2
+ import OpenAI from 'openai';
3
+
4
+ const MAX_INPUT_TOKENS = 10000;
5
+
6
+ function truncateContent(content: string): string {
7
+ const maxChars = MAX_INPUT_TOKENS * 4;
8
+ if (content.length <= maxChars) return content;
9
+ return content.slice(0, maxChars) + '... (content truncated)';
10
+ }
11
+
12
+ export interface StructuredResponse<T> {
13
+ function_call: {
14
+ arguments: string;
15
+ };
16
+ }
17
+
18
+ class OpenAIWrapper {
19
+ private client: OpenAI;
20
+
21
+ constructor(apiKey: string) {
22
+ this.client = new OpenAI({ apiKey });
23
+ }
24
+
25
+ async complete(
26
+ prompt: string,
27
+ options: {
28
+ model?: string;
29
+ temperature?: number;
30
+ } = {}
31
+ ): Promise<string> {
32
+ const truncatedPrompt = truncateContent(prompt);
33
+ const { model = 'gpt-4o-turbo', temperature = 0.6 } = options;
34
+
35
+ const response = await this.client.chat.completions.create({
36
+ model,
37
+ messages: [{ role: 'user', content: truncatedPrompt }],
38
+ temperature,
39
+ max_tokens: 2000
40
+ });
41
+
42
+ return response.choices[0]?.message?.content ?? '';
43
+ }
44
+
45
+ async completeStructured<T>(
46
+ prompt: string,
47
+ options: {
48
+ model?: string;
49
+ temperature?: number;
50
+ functionName?: string;
51
+ responseSchema: Record<string, unknown>;
52
+ }
53
+ ): Promise<T> {
54
+ const truncatedPrompt = truncateContent(prompt);
55
+ const {
56
+ model = 'gpt-3.5-turbo',
57
+ temperature = 0.6,
58
+ functionName = 'generate_response',
59
+ responseSchema
60
+ } = options;
61
+
62
+ const response = await this.client.chat.completions.create({
63
+ model,
64
+ messages: [{ role: 'user', content: truncatedPrompt }],
65
+ temperature,
66
+ max_tokens: 2000,
67
+ functions: [{
68
+ name: functionName,
69
+ parameters: {
70
+ type: 'object',
71
+ properties: responseSchema,
72
+ required: Object.keys(responseSchema)
73
+ }
74
+ }],
75
+ function_call: { name: functionName }
76
+ });
77
+
78
+ const functionCall = response.choices[0]?.message?.function_call;
79
+ if (!functionCall?.arguments) {
80
+ throw new Error('No function call arguments received');
81
+ }
82
+
83
+ return JSON.parse(functionCall.arguments) as T;
84
+ }
85
+ }
86
+
87
+ export const openaiClient: (apiKey?: string) => OpenAIWrapper = once((apiKey?: string) => {
88
+ if (!apiKey) {
89
+ throw new Error('OPENAI_API_KEY is not set')
90
+ }
91
+ return new OpenAIWrapper(apiKey)
92
+ });
package/src/scraper.ts ADDED
@@ -0,0 +1,102 @@
1
+ import * as Cheerio from 'cheerio';
2
+
3
+ export interface ScrapedData {
4
+ title: string;
5
+ content: string;
6
+ url: string;
7
+ }
8
+
9
+ export async function scrape(input: string): Promise<ScrapedData[]> {
10
+ try {
11
+ let urls: string[];
12
+
13
+ if (isValidUrl(input)) {
14
+ urls = [normalizeUrl(input)];
15
+ } else {
16
+ urls = await getGoogleResults(input);
17
+ }
18
+
19
+ // Fetch all URLs in parallel
20
+ const results = await Promise.all(
21
+ urls.map(async (url) => {
22
+ try {
23
+ const html = await fetchHtml(url);
24
+ const data = extractDataFromHtml(html);
25
+ return { ...data, url };
26
+ } catch (error) {
27
+ console.error(`Error scraping ${url}:`, error);
28
+ return null;
29
+ }
30
+ })
31
+ );
32
+
33
+ // Filter out failed scrapes
34
+ return results.filter((result): result is ScrapedData => result !== null);
35
+ } catch (error) {
36
+ console.error('Error during scraping:', error);
37
+ throw error;
38
+ }
39
+ }
40
+
41
+ // --- module private
42
+
43
+ function isValidUrl(input: string): boolean {
44
+ return !input.includes(' ');
45
+ }
46
+
47
+ function normalizeUrl(url: string): string {
48
+ if (!url.startsWith('http://') && !url.startsWith('https://')) {
49
+ return `https://${url}`;
50
+ }
51
+ return url;
52
+ }
53
+
54
+ async function getGoogleResults(query: string): Promise<string[]> {
55
+ const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(query)}`;
56
+ const html = await fetchHtml(searchUrl);
57
+
58
+ const urlPattern = /https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)/gi;
59
+ const urls = html.match(urlPattern) || [];
60
+
61
+ const queryWords = query
62
+ .toLowerCase()
63
+ .split(/\s+/)
64
+ .filter(word => word.length > 2);
65
+
66
+ const filteredUrls = new Set(
67
+ urls.filter(url => {
68
+ const urlLower = url.toLowerCase();
69
+ return !urlLower.includes('www.google') &&
70
+ !urlLower.includes('gstatic.com') &&
71
+ !urlLower.includes('googleapis.com') &&
72
+ !urlLower.includes('googleadservices') &&
73
+ queryWords.some(word => urlLower.includes(word));
74
+ })
75
+ );
76
+
77
+ const results = [...filteredUrls].slice(0, 3);
78
+
79
+ if (results.length === 0) {
80
+ throw new Error('No search results found');
81
+ }
82
+
83
+ return results;
84
+ }
85
+
86
+ async function fetchHtml(url: string): Promise<string> {
87
+ const response = await fetch(url, {
88
+ headers: {
89
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
90
+ }
91
+ });
92
+ return response.text();
93
+ }
94
+
95
+ function extractDataFromHtml(html: string): ScrapedData {
96
+ const cheerioDoc = Cheerio.load(html);
97
+ return {
98
+ title: cheerioDoc('title').text(),
99
+ content: cheerioDoc('body').text(),
100
+ url: cheerioDoc('link[rel="canonical"]').attr('href') || ''
101
+ };
102
+ }
@@ -0,0 +1,77 @@
1
+ import { openaiClient } from './openai';
2
+
3
+ export type SummaryResult = Readonly<{
4
+ textual: string;
5
+ links: ReadonlyArray<{
6
+ name: string;
7
+ url: string;
8
+ }>;
9
+ }>
10
+
11
+ /**
12
+ * Summarizes content and extracts relevant links using OpenAI
13
+ * @param content - The text content to analyze and summarize
14
+ * @param maxLength - Maximum length of the summary in words
15
+ * @returns Promise containing the summary text and extracted links
16
+ * @throws Will throw an error if OpenAI API call fails
17
+ *
18
+ * @example
19
+ * ```ts
20
+ * const result = await summarizeContent(longText, 100)
21
+ * console.log(result.textual) // Summary text
22
+ * console.log(result.links) // Array of extracted links
23
+ * ```
24
+ */
25
+
26
+ export async function summarizeWebPage(content: string, openAIApiKey: string): Promise<SummaryResult> {
27
+ const openai = openaiClient(openAIApiKey);
28
+
29
+ const prompt = `Your are an expert educator. Analyze the following text and create a
30
+ concise summary with the following guidelines:
31
+ 1. Always use bullet points, lists and tables over paragraphs.
32
+ 2. Produce valid markdown output
33
+ 3. Use the articles titles and headings as a guide
34
+ 4. Try to present the most relevant information
35
+ 5. Extract all meaningful links from the text
36
+ 6. Don't narrate the content e.g. 'The text says that the earth is round' but rather use the content itself e.g. 'The earth is round'
37
+ 7. Don't use the word 'text' or 'content' in your summary
38
+ 8. Don't try to emulate emotions or tone of the original content, always be neutral and objective
39
+ 9. If the content is instructional repeat the instructions step by step e.g.:
40
+ - Step 1: Do this
41
+ - Step 2: Do that
42
+ - Step 3: Done
43
+ 10. Mark proper nouns as bold e.g. **Harry Potter**
44
+
45
+ Don't just summarize, cite the key information.
46
+
47
+ Text to analyze:\n"${content}\n"`;
48
+
49
+ const schema = {
50
+ textual: {
51
+ type: 'string',
52
+ description: 'Concise summary of the text'
53
+ },
54
+ links: {
55
+ type: 'array',
56
+ items: {
57
+ type: 'object',
58
+ properties: {
59
+ name: {
60
+ type: 'string',
61
+ description: 'Descriptive name or title of the link'
62
+ },
63
+ url: {
64
+ type: 'string',
65
+ description: 'The URL of the link'
66
+ }
67
+ },
68
+ required: ['name', 'url']
69
+ }
70
+ }
71
+ };
72
+
73
+ return openai.completeStructured<SummaryResult>(prompt, {
74
+ temperature: 0.3,
75
+ responseSchema: schema
76
+ });
77
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,14 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "esnext",
4
+ "module": "esnext",
5
+ "moduleResolution": "bundler",
6
+ "types": ["bun-types"],
7
+ "outDir": "./dist",
8
+ "rootDir": "./src",
9
+ "strict": true,
10
+ "skipLibCheck": true,
11
+ "forceConsistentCasingInFileNames": true
12
+ },
13
+ "include": ["src/**/*"]
14
+ }