@fettstorch/clai 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.vscode/settings.json +10 -0
- package/README.md +1 -0
- package/biome.json +33 -0
- package/dist/cli.js +5933 -5602
- package/dist/index.js +1642 -1372
- package/package.json +3 -3
- package/src/cli.ts +80 -68
- package/src/index.ts +40 -14
- package/src/openai.ts +29 -31
- package/src/scraper.ts +351 -43
- package/src/summarizer.ts +83 -19
- package/tsconfig.json +14 -14
package/package.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
{
|
2
2
|
"name": "@fettstorch/clai",
|
3
|
-
"version": "0.1.
|
3
|
+
"version": "0.1.8",
|
4
4
|
"main": "dist/index.js",
|
5
5
|
"bin": {
|
6
6
|
"clai": "dist/cli.js"
|
@@ -18,14 +18,14 @@
|
|
18
18
|
"license": "ISC",
|
19
19
|
"description": "AI-powered webpage summarizer",
|
20
20
|
"dependencies": {
|
21
|
+
"@biomejs/biome": "^1.9.4",
|
21
22
|
"@fettstorch/jule": "^0.5.3",
|
22
23
|
"chalk": "^5.3.0",
|
23
24
|
"cheerio": "^1.0.0-rc.12",
|
24
25
|
"commander": "^12.1.0",
|
25
26
|
"inquirer": "^12.1.0",
|
26
27
|
"openai": "^4.73.0",
|
27
|
-
"ora": "^8.1.1"
|
28
|
-
"googleapis": "^126.0.1"
|
28
|
+
"ora": "^8.1.1"
|
29
29
|
},
|
30
30
|
"devDependencies": {
|
31
31
|
"@types/inquirer": "^9.0.7",
|
package/src/cli.ts
CHANGED
@@ -1,39 +1,43 @@
|
|
1
1
|
#!/usr/bin/env bun
|
2
|
-
import {
|
3
|
-
import
|
4
|
-
import
|
5
|
-
import
|
6
|
-
import
|
7
|
-
import {
|
8
|
-
import
|
2
|
+
import { when } from "@fettstorch/jule";
|
3
|
+
import chalk from "chalk";
|
4
|
+
import { Command } from "commander";
|
5
|
+
import inquirer from "inquirer";
|
6
|
+
import ora from "ora";
|
7
|
+
import pkg from "../package.json" assert { type: "json" };
|
8
|
+
import { clai } from "./index";
|
9
|
+
import { version } from "../package.json";
|
9
10
|
|
10
11
|
const program = new Command();
|
11
12
|
|
12
13
|
async function main() {
|
14
|
+
console.log(`[clAi]::${chalk.cyan(version)}`);
|
13
15
|
try {
|
14
16
|
program
|
15
|
-
.name(
|
16
|
-
.description(
|
17
|
+
.name("clai")
|
18
|
+
.description("AI-powered web scraping tool")
|
17
19
|
.version(pkg.version)
|
18
|
-
.argument(
|
20
|
+
.argument("[input...]", "URL or search terms to analyze")
|
19
21
|
.action(async (inputs: string[]) => {
|
20
22
|
const openAIKey = process.env.OPENAI_API_KEY;
|
21
|
-
|
23
|
+
|
22
24
|
if (!openAIKey) {
|
23
|
-
console.error(
|
25
|
+
console.error(
|
26
|
+
chalk.red("❌ OPENAI_API_KEY environment variable is not set")
|
27
|
+
);
|
24
28
|
process.exit(1);
|
25
29
|
}
|
26
30
|
|
27
|
-
let input = inputs?.join(
|
28
|
-
|
31
|
+
let input = inputs?.join(" ");
|
32
|
+
|
29
33
|
if (!input) {
|
30
34
|
const answers = await inquirer.prompt([
|
31
35
|
{
|
32
|
-
type:
|
33
|
-
name:
|
34
|
-
message:
|
35
|
-
validate: (input) => input.length > 0
|
36
|
-
}
|
36
|
+
type: "input",
|
37
|
+
name: "input",
|
38
|
+
message: "Enter a URL or search query:",
|
39
|
+
validate: (input) => input.length > 0,
|
40
|
+
},
|
37
41
|
]);
|
38
42
|
input = answers.input;
|
39
43
|
}
|
@@ -44,27 +48,27 @@ async function main() {
|
|
44
48
|
|
45
49
|
await program.parseAsync();
|
46
50
|
} catch (error) {
|
47
|
-
console.error(chalk.red(
|
51
|
+
console.error(chalk.red("Fatal error:"), error);
|
48
52
|
process.exit(1);
|
49
53
|
}
|
50
54
|
}
|
51
55
|
|
52
56
|
async function animateText(text: string, delay = 25) {
|
53
57
|
let shouldComplete = false;
|
54
|
-
|
58
|
+
|
55
59
|
// Setup keypress listener
|
56
60
|
const keypressHandler = (str: string, key: { name: string }) => {
|
57
|
-
if (key.name ===
|
61
|
+
if (key.name === "return") {
|
58
62
|
shouldComplete = true;
|
59
63
|
}
|
60
64
|
};
|
61
|
-
|
62
|
-
process.stdin.on(
|
63
|
-
|
65
|
+
|
66
|
+
process.stdin.on("keypress", keypressHandler);
|
67
|
+
|
64
68
|
// Enable raw mode to get keypress events
|
65
69
|
process.stdin.setRawMode(true);
|
66
70
|
process.stdin.resume();
|
67
|
-
|
71
|
+
|
68
72
|
let currentIndex = 0;
|
69
73
|
while (currentIndex < text.length) {
|
70
74
|
if (shouldComplete) {
|
@@ -72,84 +76,92 @@ async function animateText(text: string, delay = 25) {
|
|
72
76
|
process.stdout.write(text.slice(currentIndex));
|
73
77
|
break;
|
74
78
|
}
|
75
|
-
|
79
|
+
|
76
80
|
process.stdout.write(text[currentIndex]);
|
77
81
|
currentIndex++;
|
78
|
-
|
82
|
+
|
79
83
|
if (!shouldComplete) {
|
80
|
-
await new Promise(resolve => setTimeout(resolve, delay));
|
84
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
81
85
|
}
|
82
86
|
}
|
83
|
-
|
87
|
+
|
84
88
|
// Cleanup
|
85
89
|
process.stdin.setRawMode(false);
|
86
90
|
process.stdin.pause();
|
87
|
-
process.stdin.removeListener(
|
88
|
-
|
89
|
-
process.stdout.write(
|
91
|
+
process.stdin.removeListener("keypress", keypressHandler);
|
92
|
+
|
93
|
+
process.stdout.write("\n");
|
90
94
|
}
|
91
95
|
|
92
96
|
function formatMarkdownForTerminal(text: string): string {
|
93
97
|
// Handle headings first
|
94
|
-
const headingHandled = text.replace(
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
98
|
+
const headingHandled = text.replace(
|
99
|
+
/^(#{1,3})\s+(.*?)$/gm,
|
100
|
+
(_, hashes, content) =>
|
101
|
+
when(hashes.length)({
|
102
|
+
1: () =>
|
103
|
+
`\n${chalk.yellow.bold("═══ ")}${chalk.yellow.bold(
|
104
|
+
content
|
105
|
+
)}${chalk.yellow.bold(" ═══")}`,
|
106
|
+
2: () => chalk.yellowBright.bold(content),
|
107
|
+
3: () => chalk.yellow(content),
|
108
|
+
else: () => content,
|
109
|
+
})
|
110
|
+
);
|
111
|
+
|
101
112
|
// Handle regular bold text after headings
|
102
|
-
const boldHandled = headingHandled.replace(/\*\*(.*?)\*\*/g, (_, content) =>
|
103
|
-
|
113
|
+
const boldHandled = headingHandled.replace(/\*\*(.*?)\*\*/g, (_, content) =>
|
114
|
+
chalk.bold(content)
|
115
|
+
);
|
116
|
+
|
104
117
|
return boldHandled;
|
105
118
|
}
|
106
119
|
|
107
120
|
async function analyzeInput(input: string, openAIKey: string) {
|
108
|
-
const spinner = ora(
|
109
|
-
|
121
|
+
const spinner = ora("Thinking...").start();
|
122
|
+
|
110
123
|
try {
|
111
124
|
const result = await clai(input, openAIKey);
|
112
|
-
spinner.succeed(
|
113
|
-
|
114
|
-
console.log(chalk.green.bold(
|
125
|
+
spinner.succeed("AHA!");
|
126
|
+
|
127
|
+
console.log(chalk.green.bold("\n📝 ═══ Summary ═══ :"));
|
115
128
|
const formattedContent = formatMarkdownForTerminal(result.summary);
|
116
129
|
await animateText(formattedContent);
|
117
|
-
|
130
|
+
|
118
131
|
// Prompt user to select a link
|
119
132
|
const { selectedLink } = await inquirer.prompt([
|
120
133
|
{
|
121
|
-
type:
|
122
|
-
name:
|
123
|
-
message:
|
134
|
+
type: "list",
|
135
|
+
name: "selectedLink",
|
136
|
+
message: "\n\nWhat now?:",
|
124
137
|
choices: [
|
125
|
-
{ name: chalk.yellow(
|
126
|
-
...result.links.map(link => ({
|
138
|
+
{ name: chalk.yellow("🔍 New search"), value: "new" },
|
139
|
+
...result.links.map((link) => ({
|
127
140
|
name: `${chalk.bold(link.name)}: ${chalk.cyan(link.url)}`,
|
128
|
-
value: link.url
|
141
|
+
value: link.url,
|
129
142
|
})),
|
130
|
-
{ name:
|
131
|
-
]
|
132
|
-
}
|
143
|
+
{ name: "Exit", value: "exit" },
|
144
|
+
],
|
145
|
+
},
|
133
146
|
]);
|
134
147
|
|
135
|
-
if (selectedLink ===
|
148
|
+
if (selectedLink === "new") {
|
136
149
|
const { input: newInput } = await inquirer.prompt([
|
137
150
|
{
|
138
|
-
type:
|
139
|
-
name:
|
140
|
-
message:
|
141
|
-
validate: (input) => input.length > 0
|
142
|
-
}
|
151
|
+
type: "input",
|
152
|
+
name: "input",
|
153
|
+
message: "Enter a URL or search query:",
|
154
|
+
validate: (input) => input.length > 0,
|
155
|
+
},
|
143
156
|
]);
|
144
157
|
await analyzeInput(newInput, openAIKey);
|
145
|
-
} else if (selectedLink && selectedLink !==
|
158
|
+
} else if (selectedLink && selectedLink !== "exit") {
|
146
159
|
await analyzeInput(selectedLink, openAIKey);
|
147
160
|
}
|
148
|
-
|
149
161
|
} catch (error) {
|
150
|
-
spinner?.fail(
|
151
|
-
console.error(chalk.red(
|
162
|
+
spinner?.fail("Analysis failed");
|
163
|
+
console.error(chalk.red("Error:"), error);
|
152
164
|
}
|
153
165
|
}
|
154
166
|
|
155
|
-
main();
|
167
|
+
main();
|
package/src/index.ts
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
import { scrape } from
|
2
|
-
import { summarizeWebPage as summarize } from
|
1
|
+
import { scrape } from "./scraper";
|
2
|
+
import { summarizeWebPage as summarize, summarizeQuery } from "./summarizer";
|
3
3
|
|
4
4
|
export interface SummaryOutput {
|
5
5
|
summary: string;
|
@@ -15,7 +15,7 @@ export interface SummaryOutput {
|
|
15
15
|
* @param input - URL or search query to analyze
|
16
16
|
* @param openAIKey - OpenAI API key
|
17
17
|
* @returns Promise with summary, extracted links, and source URLs
|
18
|
-
*
|
18
|
+
*
|
19
19
|
* @example
|
20
20
|
* ```ts
|
21
21
|
* const result = await clai('https://example.com', 'your-openai-key')
|
@@ -24,22 +24,48 @@ export interface SummaryOutput {
|
|
24
24
|
* console.log(result.sources) // Source URLs
|
25
25
|
* ```
|
26
26
|
*/
|
27
|
-
export async function clai(
|
27
|
+
export async function clai(
|
28
|
+
input: string,
|
29
|
+
openAIKey: string
|
30
|
+
): Promise<SummaryOutput> {
|
28
31
|
const scrapedData = await scrape(input);
|
29
|
-
|
30
|
-
//
|
31
|
-
const
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
|
33
|
+
// Check if we have useful scraped data (not just error pages)
|
34
|
+
const usefulData = scrapedData.filter(
|
35
|
+
(data) =>
|
36
|
+
data.content.length > 200 &&
|
37
|
+
!data.content.includes("Wikipedia does not have an article") &&
|
38
|
+
!data.content.includes("page not found") &&
|
39
|
+
!data.content.includes("404") &&
|
40
|
+
!data.content.includes("error")
|
41
|
+
);
|
42
|
+
|
43
|
+
// If we have useful scraped data, use it
|
44
|
+
if (usefulData.length > 0) {
|
45
|
+
// Combine all useful content with source attribution
|
46
|
+
const combinedContent = usefulData
|
47
|
+
.map((data) => `Content from ${data.url}:\n${data.content}`)
|
48
|
+
.join("\n\n");
|
49
|
+
|
50
|
+
const result = await summarize(combinedContent, openAIKey);
|
51
|
+
|
52
|
+
return {
|
53
|
+
summary: result.textual.trim(),
|
54
|
+
links: result.links,
|
55
|
+
sources: usefulData.map((data) => data.url),
|
56
|
+
};
|
57
|
+
}
|
58
|
+
|
59
|
+
// If no scraped data available, use OpenAI directly with the query
|
60
|
+
console.log("No scraped data available - using OpenAI directly for query...");
|
61
|
+
const result = await summarizeQuery(input, openAIKey);
|
62
|
+
|
37
63
|
return {
|
38
64
|
summary: result.textual.trim(),
|
39
65
|
links: result.links,
|
40
|
-
sources:
|
66
|
+
sources: ["OpenAI Knowledge Base"],
|
41
67
|
};
|
42
68
|
}
|
43
69
|
|
44
70
|
// Default export for easier importing
|
45
|
-
export default clai;
|
71
|
+
export default clai;
|
package/src/openai.ts
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
import { once } from
|
2
|
-
import OpenAI from
|
1
|
+
import { once } from "@fettstorch/jule";
|
2
|
+
import OpenAI from "openai";
|
3
3
|
|
4
4
|
const MAX_INPUT_TOKENS = 10000;
|
5
5
|
|
@@ -9,12 +9,6 @@ function truncateContent(content: string): string {
|
|
9
9
|
return content.slice(0, maxChars);
|
10
10
|
}
|
11
11
|
|
12
|
-
export interface StructuredResponse<T> {
|
13
|
-
function_call: {
|
14
|
-
arguments: string;
|
15
|
-
};
|
16
|
-
}
|
17
|
-
|
18
12
|
class OpenAIWrapper {
|
19
13
|
private client: OpenAI;
|
20
14
|
|
@@ -30,16 +24,16 @@ class OpenAIWrapper {
|
|
30
24
|
} = {}
|
31
25
|
): Promise<string> {
|
32
26
|
const truncatedPrompt = truncateContent(prompt);
|
33
|
-
const { model =
|
27
|
+
const { model = "gpt-4o", temperature = 0.6 } = options;
|
34
28
|
|
35
29
|
const response = await this.client.chat.completions.create({
|
36
30
|
model,
|
37
|
-
messages: [{ role:
|
31
|
+
messages: [{ role: "user", content: truncatedPrompt }],
|
38
32
|
temperature,
|
39
|
-
max_tokens: 2000
|
33
|
+
max_tokens: 2000,
|
40
34
|
});
|
41
35
|
|
42
|
-
return response.choices[0]?.message?.content ??
|
36
|
+
return response.choices[0]?.message?.content ?? "";
|
43
37
|
}
|
44
38
|
|
45
39
|
async completeStructured<T>(
|
@@ -52,41 +46,45 @@ class OpenAIWrapper {
|
|
52
46
|
}
|
53
47
|
): Promise<T> {
|
54
48
|
const truncatedPrompt = truncateContent(prompt);
|
55
|
-
const {
|
56
|
-
model =
|
49
|
+
const {
|
50
|
+
model = "gpt-4o",
|
57
51
|
temperature = 1.6,
|
58
|
-
functionName =
|
59
|
-
responseSchema
|
52
|
+
functionName = "generate_response",
|
53
|
+
responseSchema,
|
60
54
|
} = options;
|
61
55
|
|
62
56
|
const response = await this.client.chat.completions.create({
|
63
57
|
model,
|
64
|
-
messages: [{ role:
|
58
|
+
messages: [{ role: "user", content: truncatedPrompt }],
|
65
59
|
temperature,
|
66
60
|
max_tokens: 2000,
|
67
|
-
functions: [
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
61
|
+
functions: [
|
62
|
+
{
|
63
|
+
name: functionName,
|
64
|
+
parameters: {
|
65
|
+
type: "object",
|
66
|
+
properties: responseSchema,
|
67
|
+
required: Object.keys(responseSchema),
|
68
|
+
},
|
69
|
+
},
|
70
|
+
],
|
71
|
+
function_call: { name: functionName },
|
76
72
|
});
|
77
73
|
|
78
74
|
const functionCall = response.choices[0]?.message?.function_call;
|
79
75
|
if (!functionCall?.arguments) {
|
80
|
-
throw new Error(
|
76
|
+
throw new Error("No function call arguments received");
|
81
77
|
}
|
82
78
|
|
83
79
|
return JSON.parse(functionCall.arguments) as T;
|
84
80
|
}
|
85
81
|
}
|
86
82
|
|
87
|
-
export const openaiClient: (apiKey?: string) => OpenAIWrapper = once(
|
83
|
+
export const openaiClient: (apiKey?: string) => OpenAIWrapper = once(
|
84
|
+
(apiKey?: string) => {
|
88
85
|
if (!apiKey) {
|
89
|
-
|
86
|
+
throw new Error("OPENAI_API_KEY is not set");
|
90
87
|
}
|
91
|
-
return new OpenAIWrapper(apiKey)
|
92
|
-
}
|
88
|
+
return new OpenAIWrapper(apiKey);
|
89
|
+
}
|
90
|
+
);
|