@projectservan8n/cnapse 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -534,18 +534,99 @@ function ProviderSelector({ onClose, onSelect }) {
534
534
  // src/hooks/useChat.ts
535
535
  import { useState as useState3, useCallback, useRef, useEffect as useEffect2 } from "react";
536
536
 
537
+ // src/lib/system.ts
538
+ import os from "os";
539
+ import { exec as exec2 } from "child_process";
540
+ import { promisify as promisify2 } from "util";
541
+ var execAsync2 = promisify2(exec2);
542
+ var cachedSystemInfo = null;
543
+ async function getSystemInfo() {
544
+ if (cachedSystemInfo) return cachedSystemInfo;
545
+ const platform = os.platform();
546
+ const cpus = os.cpus();
547
+ let osName = platform;
548
+ const osVersion = os.release();
549
+ if (platform === "win32") {
550
+ try {
551
+ const { stdout } = await execAsync2("wmic os get Caption /value", { timeout: 5e3 });
552
+ const match = stdout.match(/Caption=(.+)/);
553
+ if (match) osName = match[1].trim();
554
+ } catch {
555
+ osName = `Windows ${osVersion}`;
556
+ }
557
+ } else if (platform === "darwin") {
558
+ try {
559
+ const { stdout } = await execAsync2("sw_vers -productName && sw_vers -productVersion", { timeout: 5e3 });
560
+ const lines = stdout.trim().split("\n");
561
+ osName = `${lines[0]} ${lines[1]}`;
562
+ } catch {
563
+ osName = `macOS ${osVersion}`;
564
+ }
565
+ } else if (platform === "linux") {
566
+ try {
567
+ const { stdout } = await execAsync2("cat /etc/os-release | grep PRETTY_NAME", { timeout: 5e3 });
568
+ const match = stdout.match(/PRETTY_NAME="(.+)"/);
569
+ if (match) osName = match[1];
570
+ } catch {
571
+ osName = `Linux ${osVersion}`;
572
+ }
573
+ }
574
+ cachedSystemInfo = {
575
+ platform,
576
+ osName,
577
+ osVersion,
578
+ arch: os.arch(),
579
+ cpuModel: cpus[0]?.model || "Unknown CPU",
580
+ cpuCores: cpus.length,
581
+ totalMemoryGB: Math.round(os.totalmem() / 1024 ** 3 * 10) / 10,
582
+ freeMemoryGB: Math.round(os.freemem() / 1024 ** 3 * 10) / 10,
583
+ username: os.userInfo().username,
584
+ hostname: os.hostname(),
585
+ homeDir: os.homedir(),
586
+ shell: process.env.SHELL || process.env.COMSPEC || "unknown"
587
+ };
588
+ return cachedSystemInfo;
589
+ }
590
+ async function getSystemContext() {
591
+ const info = await getSystemInfo();
592
+ return `SYSTEM INFO:
593
+ - OS: ${info.osName} (${info.arch})
594
+ - CPU: ${info.cpuModel} (${info.cpuCores} cores)
595
+ - RAM: ${info.totalMemoryGB}GB total, ${info.freeMemoryGB}GB free
596
+ - User: ${info.username}@${info.hostname}
597
+ - Home: ${info.homeDir}
598
+ - Shell: ${info.shell}`;
599
+ }
600
+ function getCwd() {
601
+ return process.cwd();
602
+ }
603
+
537
604
  // src/lib/api.ts
538
- var SYSTEM_PROMPT = `You are C-napse, a helpful AI assistant for PC automation running on the user's desktop.
539
- You can help with coding, file management, shell commands, and more. Be concise and helpful.
605
+ var BASE_PROMPT = `You are C-napse, an AI assistant for PC automation running on the user's desktop.
606
+ You have access to their system and can help with coding, file management, shell commands, and more.
540
607
 
541
608
  When responding:
542
609
  - Be direct and practical
543
610
  - Use markdown formatting for code blocks
544
- - If asked to do something, explain what you'll do first`;
611
+ - If asked to do something, explain what you'll do first
612
+ - Give commands specific to the user's OS (use the system info below)
613
+ - Be aware of the user's current working directory`;
614
+ var systemContextCache = null;
615
+ async function getSystemPrompt() {
616
+ if (!systemContextCache) {
617
+ systemContextCache = await getSystemContext();
618
+ }
619
+ const cwd = getCwd();
620
+ return `${BASE_PROMPT}
621
+
622
+ ${systemContextCache}
623
+ - Current directory: ${cwd}`;
624
+ }
545
625
  async function chat(messages, systemPrompt) {
546
626
  const config = getConfig();
627
+ const finalPrompt = systemPrompt || await getSystemPrompt();
547
628
  const allMessages = [
548
- { role: "system", content: systemPrompt || SYSTEM_PROMPT },
629
+ { role: "system", content: finalPrompt },
549
630
  ...messages
550
631
  ];
551
632
  switch (config.provider) {
@@ -664,134 +745,163 @@ async function chatOpenAI(messages, model) {
664
745
  const content = data.choices?.[0]?.message?.content || "";
665
746
  return { content, model };
666
747
  }
667
-
668
- // src/lib/screen.ts
669
- import { exec as exec2 } from "child_process";
670
- import { promisify as promisify2 } from "util";
671
- var execAsync2 = promisify2(exec2);
672
- async function getScreenDescription() {
673
- try {
674
- const platform = process.platform;
675
- if (platform === "win32") {
676
- const { stdout } = await execAsync2(`
677
- Add-Type -AssemblyName System.Windows.Forms
678
- $screen = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds
679
- Write-Output "$($screen.Width)x$($screen.Height)"
680
- `, { shell: "powershell.exe" });
681
- return `Screen ${stdout.trim()} captured`;
682
- } else if (platform === "darwin") {
683
- const { stdout } = await execAsync2(`system_profiler SPDisplaysDataType | grep Resolution | head -1`);
684
- return `Screen ${stdout.trim()}`;
685
- } else {
686
- const { stdout } = await execAsync2(`xdpyinfo | grep dimensions | awk '{print $2}'`);
687
- return `Screen ${stdout.trim()} captured`;
748
+ async function chatWithVision(messages, screenshotBase64) {
749
+ const config = getConfig();
750
+ const systemPrompt = await getSystemPrompt();
751
+ const visionPrompt = systemPrompt + "\n\nYou can see the user's screen. Describe what you see and help them with their request.";
752
+ switch (config.provider) {
753
+ case "openrouter":
754
+ return chatWithVisionOpenRouter(messages, screenshotBase64, visionPrompt);
755
+ case "ollama":
756
+ return chatWithVisionOllama(messages, screenshotBase64, visionPrompt);
757
+ case "anthropic":
758
+ return chatWithVisionAnthropic(messages, screenshotBase64, visionPrompt);
759
+ case "openai":
760
+ return chatWithVisionOpenAI(messages, screenshotBase64, visionPrompt);
761
+ default:
762
+ throw new Error(`Vision not supported for provider: ${config.provider}`);
763
+ }
764
+ }
765
+ async function chatWithVisionOpenRouter(messages, screenshot, systemPrompt) {
766
+ const apiKey = getApiKey("openrouter");
767
+ if (!apiKey) throw new Error("OpenRouter API key not configured");
768
+ const config = getConfig();
769
+ let model = config.model;
770
+ if (!model.includes("gpt-5") && !model.includes("claude") && !model.includes("gemini")) {
771
+ model = "openai/gpt-5-nano";
772
+ }
773
+ const lastUserIdx = messages.length - 1;
774
+ const visionMessages = messages.map((m, i) => {
775
+ if (i === lastUserIdx && m.role === "user") {
776
+ return {
777
+ role: "user",
778
+ content: [
779
+ { type: "text", text: m.content },
780
+ { type: "image_url", image_url: { url: `data:image/png;base64,${screenshot}` } }
781
+ ]
782
+ };
688
783
  }
689
- } catch {
690
- return null;
784
+ return m;
785
+ });
786
+ const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
787
+ method: "POST",
788
+ headers: {
789
+ "Authorization": `Bearer ${apiKey}`,
790
+ "Content-Type": "application/json",
791
+ "HTTP-Referer": config.openrouter.siteUrl,
792
+ "X-Title": config.openrouter.appName
793
+ },
794
+ body: JSON.stringify({
795
+ model,
796
+ messages: [{ role: "system", content: systemPrompt }, ...visionMessages],
797
+ max_tokens: 2048
798
+ })
799
+ });
800
+ if (!response.ok) {
801
+ const error = await response.text();
802
+ throw new Error(`OpenRouter vision error: ${response.status} - ${error}`);
691
803
  }
804
+ const data = await response.json();
805
+ return { content: data.choices?.[0]?.message?.content || "", model };
692
806
  }
807
+ async function chatWithVisionOllama(messages, screenshot, systemPrompt) {
808
+ const config = getConfig();
809
+ const visionModels = ["llava", "llama3.2-vision", "bakllava"];
810
+ const model = visionModels.find((m) => config.model.includes(m)) || "llava";
811
+ const lastUserMsg = messages.filter((m) => m.role === "user").pop();
812
+ const response = await fetch(`${config.ollamaHost}/api/generate`, {
813
+ method: "POST",
814
+ headers: { "Content-Type": "application/json" },
815
+ body: JSON.stringify({
816
+ model,
817
+ prompt: `${systemPrompt}
693
818
 
694
- // src/hooks/useChat.ts
695
- var WELCOME_MESSAGE = {
696
- id: "0",
697
- role: "system",
698
- content: "Welcome to C-napse! Type your message and press Enter.\n\nShortcuts: Ctrl+H for help, Ctrl+P for provider",
699
- timestamp: /* @__PURE__ */ new Date()
700
- };
701
- function useChat(screenWatch = false) {
702
- const [messages, setMessages] = useState3([WELCOME_MESSAGE]);
703
- const [isProcessing, setIsProcessing] = useState3(false);
704
- const [error, setError] = useState3(null);
705
- const screenContextRef = useRef(null);
706
- useEffect2(() => {
707
- if (!screenWatch) {
708
- screenContextRef.current = null;
709
- return;
819
+ User: ${lastUserMsg?.content || "What do you see?"}`,
820
+ images: [screenshot],
821
+ stream: false
822
+ })
823
+ });
824
+ if (!response.ok) {
825
+ const error = await response.text();
826
+ throw new Error(`Ollama vision error: ${error}`);
827
+ }
828
+ const data = await response.json();
829
+ return { content: data.response || "", model };
830
+ }
831
+ async function chatWithVisionAnthropic(messages, screenshot, systemPrompt) {
832
+ const apiKey = getApiKey("anthropic");
833
+ if (!apiKey) throw new Error("Anthropic API key not configured");
834
+ const chatMessages = messages.filter((m) => m.role !== "system");
835
+ const lastUserIdx = chatMessages.length - 1;
836
+ const visionMessages = chatMessages.map((m, i) => {
837
+ if (i === lastUserIdx && m.role === "user") {
838
+ return {
839
+ role: "user",
840
+ content: [
841
+ { type: "image", source: { type: "base64", media_type: "image/png", data: screenshot } },
842
+ { type: "text", text: m.content }
843
+ ]
844
+ };
710
845
  }
711
- const checkScreen = async () => {
712
- const desc = await getScreenDescription();
713
- if (desc) {
714
- screenContextRef.current = desc;
715
- }
716
- };
717
- checkScreen();
718
- const interval = setInterval(checkScreen, 5e3);
719
- return () => clearInterval(interval);
720
- }, [screenWatch]);
721
- const addSystemMessage = useCallback((content) => {
722
- setMessages((prev) => [
723
- ...prev,
724
- {
725
- id: Date.now().toString(),
726
- role: "system",
727
- content,
728
- timestamp: /* @__PURE__ */ new Date()
729
- }
730
- ]);
731
- }, []);
732
- const sendMessage = useCallback(async (content) => {
733
- if (!content.trim() || isProcessing) return;
734
- setError(null);
735
- const userMsg = {
736
- id: Date.now().toString(),
737
- role: "user",
738
- content,
739
- timestamp: /* @__PURE__ */ new Date()
740
- };
741
- const assistantId = (Date.now() + 1).toString();
742
- const assistantMsg = {
743
- id: assistantId,
744
- role: "assistant",
745
- content: "",
746
- timestamp: /* @__PURE__ */ new Date(),
747
- isStreaming: true
748
- };
749
- setMessages((prev) => [...prev, userMsg, assistantMsg]);
750
- setIsProcessing(true);
751
- try {
752
- const apiMessages = messages.filter((m) => m.role === "user" || m.role === "assistant").slice(-10).map((m) => ({ role: m.role, content: m.content }));
753
- let finalContent = content;
754
- if (screenWatch && screenContextRef.current) {
755
- finalContent = `[Screen context: ${screenContextRef.current}]
756
-
757
- ${content}`;
758
- }
759
- apiMessages.push({ role: "user", content: finalContent });
760
- const response = await chat(apiMessages);
761
- setMessages(
762
- (prev) => prev.map(
763
- (m) => m.id === assistantId ? { ...m, content: response.content || "(no response)", isStreaming: false } : m
764
- )
765
- );
766
- } catch (err2) {
767
- const errorMsg = err2 instanceof Error ? err2.message : "Unknown error";
768
- setError(errorMsg);
769
- setMessages(
770
- (prev) => prev.map(
771
- (m) => m.id === assistantId ? { ...m, content: `Error: ${errorMsg}`, isStreaming: false } : m
772
- )
773
- );
774
- } finally {
775
- setIsProcessing(false);
846
+ return { role: m.role, content: m.content };
847
+ });
848
+ const response = await fetch("https://api.anthropic.com/v1/messages", {
849
+ method: "POST",
850
+ headers: {
851
+ "x-api-key": apiKey,
852
+ "anthropic-version": "2023-06-01",
853
+ "Content-Type": "application/json"
854
+ },
855
+ body: JSON.stringify({
856
+ model: "claude-3-5-sonnet-20241022",
857
+ max_tokens: 2048,
858
+ system: systemPrompt,
859
+ messages: visionMessages
860
+ })
861
+ });
862
+ if (!response.ok) {
863
+ const error = await response.text();
864
+ throw new Error(`Anthropic vision error: ${error}`);
865
+ }
866
+ const data = await response.json();
867
+ return { content: data.content?.[0]?.text || "", model: "claude-3-5-sonnet-20241022" };
868
+ }
869
+ async function chatWithVisionOpenAI(messages, screenshot, systemPrompt) {
870
+ const apiKey = getApiKey("openai");
871
+ if (!apiKey) throw new Error("OpenAI API key not configured");
872
+ const lastUserIdx = messages.length - 1;
873
+ const visionMessages = messages.map((m, i) => {
874
+ if (i === lastUserIdx && m.role === "user") {
875
+ return {
876
+ role: "user",
877
+ content: [
878
+ { type: "text", text: m.content },
879
+ { type: "image_url", image_url: { url: `data:image/png;base64,${screenshot}` } }
880
+ ]
881
+ };
776
882
  }
777
- }, [messages, isProcessing, screenWatch]);
778
- const clearMessages = useCallback(() => {
779
- setMessages([WELCOME_MESSAGE]);
780
- setError(null);
781
- }, []);
782
- return {
783
- messages,
784
- isProcessing,
785
- error,
786
- sendMessage,
787
- addSystemMessage,
788
- clearMessages
789
- };
883
+ return m;
884
+ });
885
+ const response = await fetch("https://api.openai.com/v1/chat/completions", {
886
+ method: "POST",
887
+ headers: {
888
+ "Authorization": `Bearer ${apiKey}`,
889
+ "Content-Type": "application/json"
890
+ },
891
+ body: JSON.stringify({
892
+ model: "gpt-4o",
893
+ messages: [{ role: "system", content: systemPrompt }, ...visionMessages],
894
+ max_tokens: 2048
895
+ })
896
+ });
897
+ if (!response.ok) {
898
+ const error = await response.text();
899
+ throw new Error(`OpenAI vision error: ${error}`);
900
+ }
901
+ const data = await response.json();
902
+ return { content: data.choices?.[0]?.message?.content || "", model: "gpt-4o" };
790
903
  }
791
904
 
792
- // src/hooks/useVision.ts
793
- import { useState as useState4, useCallback as useCallback2 } from "react";
794
-
795
905
  // src/lib/vision.ts
796
906
  async function describeScreen() {
797
907
  const screenshot = await captureScreenshot();
@@ -996,7 +1106,98 @@ async function analyzeWithOpenAI(base64Image, prompt) {
996
1106
  return data.choices?.[0]?.message?.content || "Unable to analyze image";
997
1107
  }
998
1108
 
1109
+ // src/hooks/useChat.ts
1110
+ var WELCOME_MESSAGE = {
1111
+ id: "0",
1112
+ role: "system",
1113
+ content: "Welcome to C-napse! Type your message and press Enter.\n\nShortcuts: Ctrl+H for help, Ctrl+P for provider",
1114
+ timestamp: /* @__PURE__ */ new Date()
1115
+ };
1116
+ function useChat(screenWatch = false) {
1117
+ const [messages, setMessages] = useState3([WELCOME_MESSAGE]);
1118
+ const [isProcessing, setIsProcessing] = useState3(false);
1119
+ const [error, setError] = useState3(null);
1120
+ const screenWatchRef = useRef(screenWatch);
1121
+ useEffect2(() => {
1122
+ screenWatchRef.current = screenWatch;
1123
+ }, [screenWatch]);
1124
+ const addSystemMessage = useCallback((content) => {
1125
+ setMessages((prev) => [
1126
+ ...prev,
1127
+ {
1128
+ id: Date.now().toString(),
1129
+ role: "system",
1130
+ content,
1131
+ timestamp: /* @__PURE__ */ new Date()
1132
+ }
1133
+ ]);
1134
+ }, []);
1135
+ const sendMessage = useCallback(async (content) => {
1136
+ if (!content.trim() || isProcessing) return;
1137
+ setError(null);
1138
+ const userMsg = {
1139
+ id: Date.now().toString(),
1140
+ role: "user",
1141
+ content,
1142
+ timestamp: /* @__PURE__ */ new Date()
1143
+ };
1144
+ const assistantId = (Date.now() + 1).toString();
1145
+ const assistantMsg = {
1146
+ id: assistantId,
1147
+ role: "assistant",
1148
+ content: "",
1149
+ timestamp: /* @__PURE__ */ new Date(),
1150
+ isStreaming: true
1151
+ };
1152
+ setMessages((prev) => [...prev, userMsg, assistantMsg]);
1153
+ setIsProcessing(true);
1154
+ try {
1155
+ const apiMessages = messages.filter((m) => m.role === "user" || m.role === "assistant").slice(-10).map((m) => ({ role: m.role, content: m.content }));
1156
+ apiMessages.push({ role: "user", content });
1157
+ let response;
1158
+ if (screenWatchRef.current) {
1159
+ const screenshot = await captureScreenshot();
1160
+ if (screenshot) {
1161
+ response = await chatWithVision(apiMessages, screenshot);
1162
+ } else {
1163
+ response = await chat(apiMessages);
1164
+ }
1165
+ } else {
1166
+ response = await chat(apiMessages);
1167
+ }
1168
+ setMessages(
1169
+ (prev) => prev.map(
1170
+ (m) => m.id === assistantId ? { ...m, content: response.content || "(no response)", isStreaming: false } : m
1171
+ )
1172
+ );
1173
+ } catch (err2) {
1174
+ const errorMsg = err2 instanceof Error ? err2.message : "Unknown error";
1175
+ setError(errorMsg);
1176
+ setMessages(
1177
+ (prev) => prev.map(
1178
+ (m) => m.id === assistantId ? { ...m, content: `Error: ${errorMsg}`, isStreaming: false } : m
1179
+ )
1180
+ );
1181
+ } finally {
1182
+ setIsProcessing(false);
1183
+ }
1184
+ }, [messages, isProcessing]);
1185
+ const clearMessages = useCallback(() => {
1186
+ setMessages([WELCOME_MESSAGE]);
1187
+ setError(null);
1188
+ }, []);
1189
+ return {
1190
+ messages,
1191
+ isProcessing,
1192
+ error,
1193
+ sendMessage,
1194
+ addSystemMessage,
1195
+ clearMessages
1196
+ };
1197
+ }
1198
+
999
1199
  // src/hooks/useVision.ts
1200
+ import { useState as useState4, useCallback as useCallback2 } from "react";
1000
1201
  function useVision() {
1001
1202
  const [isAnalyzing, setIsAnalyzing] = useState4(false);
1002
1203
  const [lastDescription, setLastDescription] = useState4(null);
@@ -1539,8 +1740,8 @@ import { useState as useState6, useCallback as useCallback4 } from "react";
1539
1740
  // src/lib/tasks.ts
1540
1741
  import * as fs from "fs";
1541
1742
  import * as path from "path";
1542
- import * as os from "os";
1543
- var TASK_MEMORY_FILE = path.join(os.homedir(), ".cnapse", "task-memory.json");
1743
+ import * as os2 from "os";
1744
+ var TASK_MEMORY_FILE = path.join(os2.homedir(), ".cnapse", "task-memory.json");
1544
1745
  function loadTaskMemory() {
1545
1746
  try {
1546
1747
  if (fs.existsSync(TASK_MEMORY_FILE)) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@projectservan8n/cnapse",
3
- "version": "0.5.5",
3
+ "version": "0.5.7",
4
4
  "description": "Autonomous PC intelligence - AI assistant for desktop automation",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -3,8 +3,8 @@
3
3
  */
4
4
 
5
5
  import { useState, useCallback, useRef, useEffect } from 'react';
6
- import { chat, Message } from '../lib/api.js';
7
- import { getScreenDescription } from '../lib/screen.js';
6
+ import { chat, chatWithVision, Message } from '../lib/api.js';
7
+ import { captureScreenshot } from '../lib/vision.js';
8
8
 
9
9
  export interface ChatMessage {
10
10
  id: string;
@@ -34,25 +34,11 @@ export function useChat(screenWatch: boolean = false): UseChatResult {
34
34
  const [messages, setMessages] = useState<ChatMessage[]>([WELCOME_MESSAGE]);
35
35
  const [isProcessing, setIsProcessing] = useState(false);
36
36
  const [error, setError] = useState<string | null>(null);
37
- const screenContextRef = useRef<string | null>(null);
37
+ const screenWatchRef = useRef(screenWatch);
38
38
 
39
- // Screen watching effect
39
+ // Keep ref in sync with prop
40
40
  useEffect(() => {
41
- if (!screenWatch) {
42
- screenContextRef.current = null;
43
- return;
44
- }
45
-
46
- const checkScreen = async () => {
47
- const desc = await getScreenDescription();
48
- if (desc) {
49
- screenContextRef.current = desc;
50
- }
51
- };
52
-
53
- checkScreen();
54
- const interval = setInterval(checkScreen, 5000);
55
- return () => clearInterval(interval);
41
+ screenWatchRef.current = screenWatch;
56
42
  }, [screenWatch]);
57
43
 
58
44
  const addSystemMessage = useCallback((content: string) => {
@@ -100,16 +86,23 @@ export function useChat(screenWatch: boolean = false): UseChatResult {
100
86
  .slice(-10)
101
87
  .map(m => ({ role: m.role as 'user' | 'assistant', content: m.content }));
102
88
 
103
- // Add screen context if watching
104
- let finalContent = content;
105
- if (screenWatch && screenContextRef.current) {
106
- finalContent = `[Screen context: ${screenContextRef.current}]\n\n${content}`;
89
+ apiMessages.push({ role: 'user', content });
90
+
91
+ let response;
92
+
93
+ // If screen watching is enabled, capture screenshot and use vision API
94
+ if (screenWatchRef.current) {
95
+ const screenshot = await captureScreenshot();
96
+ if (screenshot) {
97
+ response = await chatWithVision(apiMessages, screenshot);
98
+ } else {
99
+ // Fallback to regular chat if screenshot fails
100
+ response = await chat(apiMessages);
101
+ }
102
+ } else {
103
+ response = await chat(apiMessages);
107
104
  }
108
105
 
109
- apiMessages.push({ role: 'user', content: finalContent });
110
-
111
- const response = await chat(apiMessages);
112
-
113
106
  // Update assistant message
114
107
  setMessages(prev =>
115
108
  prev.map(m =>
@@ -131,7 +124,7 @@ export function useChat(screenWatch: boolean = false): UseChatResult {
131
124
  } finally {
132
125
  setIsProcessing(false);
133
126
  }
134
- }, [messages, isProcessing, screenWatch]);
127
+ }, [messages, isProcessing]);
135
128
 
136
129
  const clearMessages = useCallback(() => {
137
130
  setMessages([WELCOME_MESSAGE]);
package/src/lib/api.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import { getConfig, getApiKey } from './config.js';
2
+ import { getSystemContext, getCwd } from './system.js';
2
3
 
3
4
  export interface Message {
4
5
  role: 'system' | 'user' | 'assistant';
@@ -10,19 +11,36 @@ export interface ChatResponse {
10
11
  model: string;
11
12
  }
12
13
 
13
- const SYSTEM_PROMPT = `You are C-napse, a helpful AI assistant for PC automation running on the user's desktop.
14
- You can help with coding, file management, shell commands, and more. Be concise and helpful.
14
+ const BASE_PROMPT = `You are C-napse, an AI assistant for PC automation running on the user's desktop.
15
+ You have access to their system and can help with coding, file management, shell commands, and more.
15
16
 
16
17
  When responding:
17
18
  - Be direct and practical
18
19
  - Use markdown formatting for code blocks
19
- - If asked to do something, explain what you'll do first`;
20
+ - If asked to do something, explain what you'll do first
21
+ - Give commands specific to the user's OS (use the system info below)
22
+ - Be aware of the user's current working directory`;
23
+
24
+ // Cache system context to avoid repeated calls
25
+ let systemContextCache: string | null = null;
26
+
27
+ async function getSystemPrompt(): Promise<string> {
28
+ if (!systemContextCache) {
29
+ systemContextCache = await getSystemContext();
30
+ }
31
+ const cwd = getCwd();
32
+ return `${BASE_PROMPT}
33
+
34
+ ${systemContextCache}
35
+ - Current directory: ${cwd}`;
36
+ }
20
37
 
21
38
  export async function chat(messages: Message[], systemPrompt?: string): Promise<ChatResponse> {
22
39
  const config = getConfig();
40
+ const finalPrompt = systemPrompt || await getSystemPrompt();
23
41
 
24
42
  const allMessages: Message[] = [
25
- { role: 'system', content: systemPrompt || SYSTEM_PROMPT },
43
+ { role: 'system', content: finalPrompt },
26
44
  ...messages,
27
45
  ];
28
46
 
@@ -166,6 +184,194 @@ async function chatOpenAI(messages: Message[], model: string): Promise<ChatRespo
166
184
  return { content, model };
167
185
  }
168
186
 
187
+ /**
188
+ * Chat with vision - sends screenshot along with messages
189
+ */
190
+ export async function chatWithVision(messages: Message[], screenshotBase64: string): Promise<ChatResponse> {
191
+ const config = getConfig();
192
+ const systemPrompt = await getSystemPrompt();
193
+
194
+ // Add vision context to system prompt
195
+ const visionPrompt = systemPrompt + '\n\nYou can see the user\'s screen. Describe what you see and help them with their request.';
196
+
197
+ switch (config.provider) {
198
+ case 'openrouter':
199
+ return chatWithVisionOpenRouter(messages, screenshotBase64, visionPrompt);
200
+ case 'ollama':
201
+ return chatWithVisionOllama(messages, screenshotBase64, visionPrompt);
202
+ case 'anthropic':
203
+ return chatWithVisionAnthropic(messages, screenshotBase64, visionPrompt);
204
+ case 'openai':
205
+ return chatWithVisionOpenAI(messages, screenshotBase64, visionPrompt);
206
+ default:
207
+ throw new Error(`Vision not supported for provider: ${config.provider}`);
208
+ }
209
+ }
210
+
211
+ async function chatWithVisionOpenRouter(messages: Message[], screenshot: string, systemPrompt: string): Promise<ChatResponse> {
212
+ const apiKey = getApiKey('openrouter');
213
+ if (!apiKey) throw new Error('OpenRouter API key not configured');
214
+
215
+ const config = getConfig();
216
+
217
+ // Use vision-capable model - prefer GPT-5 Nano or Claude
218
+ let model = config.model;
219
+ if (!model.includes('gpt-5') && !model.includes('claude') && !model.includes('gemini')) {
220
+ model = 'openai/gpt-5-nano'; // Default to GPT-5 Nano for vision
221
+ }
222
+
223
+ // Build messages with image in the last user message
224
+ const lastUserIdx = messages.length - 1;
225
+ const visionMessages = messages.map((m, i) => {
226
+ if (i === lastUserIdx && m.role === 'user') {
227
+ return {
228
+ role: 'user',
229
+ content: [
230
+ { type: 'text', text: m.content },
231
+ { type: 'image_url', image_url: { url: `data:image/png;base64,${screenshot}` } },
232
+ ],
233
+ };
234
+ }
235
+ return m;
236
+ });
237
+
238
+ const response = await fetch('https://openrouter.ai/api/v1/chat/completions', {
239
+ method: 'POST',
240
+ headers: {
241
+ 'Authorization': `Bearer ${apiKey}`,
242
+ 'Content-Type': 'application/json',
243
+ 'HTTP-Referer': config.openrouter.siteUrl,
244
+ 'X-Title': config.openrouter.appName,
245
+ },
246
+ body: JSON.stringify({
247
+ model,
248
+ messages: [{ role: 'system', content: systemPrompt }, ...visionMessages],
249
+ max_tokens: 2048,
250
+ }),
251
+ });
252
+
253
+ if (!response.ok) {
254
+ const error = await response.text();
255
+ throw new Error(`OpenRouter vision error: ${response.status} - ${error}`);
256
+ }
257
+
258
+ const data = await response.json() as any;
259
+ return { content: data.choices?.[0]?.message?.content || '', model };
260
+ }
261
+
262
+ async function chatWithVisionOllama(messages: Message[], screenshot: string, systemPrompt: string): Promise<ChatResponse> {
263
+ const config = getConfig();
264
+
265
+ // Use vision model
266
+ const visionModels = ['llava', 'llama3.2-vision', 'bakllava'];
267
+ const model = visionModels.find(m => config.model.includes(m)) || 'llava';
268
+
269
+ const lastUserMsg = messages.filter(m => m.role === 'user').pop();
270
+
271
+ const response = await fetch(`${config.ollamaHost}/api/generate`, {
272
+ method: 'POST',
273
+ headers: { 'Content-Type': 'application/json' },
274
+ body: JSON.stringify({
275
+ model,
276
+ prompt: `${systemPrompt}\n\nUser: ${lastUserMsg?.content || 'What do you see?'}`,
277
+ images: [screenshot],
278
+ stream: false,
279
+ }),
280
+ });
281
+
282
+ if (!response.ok) {
283
+ const error = await response.text();
284
+ throw new Error(`Ollama vision error: ${error}`);
285
+ }
286
+
287
+ const data = await response.json() as any;
288
+ return { content: data.response || '', model };
289
+ }
290
+
291
+ async function chatWithVisionAnthropic(messages: Message[], screenshot: string, systemPrompt: string): Promise<ChatResponse> {
292
+ const apiKey = getApiKey('anthropic');
293
+ if (!apiKey) throw new Error('Anthropic API key not configured');
294
+
295
+ const chatMessages = messages.filter(m => m.role !== 'system');
296
+ const lastUserIdx = chatMessages.length - 1;
297
+
298
+ const visionMessages = chatMessages.map((m, i) => {
299
+ if (i === lastUserIdx && m.role === 'user') {
300
+ return {
301
+ role: 'user',
302
+ content: [
303
+ { type: 'image', source: { type: 'base64', media_type: 'image/png', data: screenshot } },
304
+ { type: 'text', text: m.content },
305
+ ],
306
+ };
307
+ }
308
+ return { role: m.role, content: m.content };
309
+ });
310
+
311
+ const response = await fetch('https://api.anthropic.com/v1/messages', {
312
+ method: 'POST',
313
+ headers: {
314
+ 'x-api-key': apiKey,
315
+ 'anthropic-version': '2023-06-01',
316
+ 'Content-Type': 'application/json',
317
+ },
318
+ body: JSON.stringify({
319
+ model: 'claude-3-5-sonnet-20241022',
320
+ max_tokens: 2048,
321
+ system: systemPrompt,
322
+ messages: visionMessages,
323
+ }),
324
+ });
325
+
326
+ if (!response.ok) {
327
+ const error = await response.text();
328
+ throw new Error(`Anthropic vision error: ${error}`);
329
+ }
330
+
331
+ const data = await response.json() as any;
332
+ return { content: data.content?.[0]?.text || '', model: 'claude-3-5-sonnet-20241022' };
333
+ }
334
+
335
+ async function chatWithVisionOpenAI(messages: Message[], screenshot: string, systemPrompt: string): Promise<ChatResponse> {
336
+ const apiKey = getApiKey('openai');
337
+ if (!apiKey) throw new Error('OpenAI API key not configured');
338
+
339
+ const lastUserIdx = messages.length - 1;
340
+ const visionMessages = messages.map((m, i) => {
341
+ if (i === lastUserIdx && m.role === 'user') {
342
+ return {
343
+ role: 'user',
344
+ content: [
345
+ { type: 'text', text: m.content },
346
+ { type: 'image_url', image_url: { url: `data:image/png;base64,${screenshot}` } },
347
+ ],
348
+ };
349
+ }
350
+ return m;
351
+ });
352
+
353
+ const response = await fetch('https://api.openai.com/v1/chat/completions', {
354
+ method: 'POST',
355
+ headers: {
356
+ 'Authorization': `Bearer ${apiKey}`,
357
+ 'Content-Type': 'application/json',
358
+ },
359
+ body: JSON.stringify({
360
+ model: 'gpt-4o',
361
+ messages: [{ role: 'system', content: systemPrompt }, ...visionMessages],
362
+ max_tokens: 2048,
363
+ }),
364
+ });
365
+
366
+ if (!response.ok) {
367
+ const error = await response.text();
368
+ throw new Error(`OpenAI vision error: ${error}`);
369
+ }
370
+
371
+ const data = await response.json() as any;
372
+ return { content: data.choices?.[0]?.message?.content || '', model: 'gpt-4o' };
373
+ }
374
+
169
375
  export async function testConnection(): Promise<boolean> {
170
376
  try {
171
377
  await chat([{ role: 'user', content: 'hi' }]);
@@ -0,0 +1,105 @@
1
+ /**
2
+ * System information utilities
3
+ */
4
+
5
+ import os from 'os';
6
+ import { exec } from 'child_process';
7
+ import { promisify } from 'util';
8
+
9
+ const execAsync = promisify(exec);
10
+
11
+ export interface SystemInfo {
12
+ platform: string;
13
+ osName: string;
14
+ osVersion: string;
15
+ arch: string;
16
+ cpuModel: string;
17
+ cpuCores: number;
18
+ totalMemoryGB: number;
19
+ freeMemoryGB: number;
20
+ username: string;
21
+ hostname: string;
22
+ homeDir: string;
23
+ shell: string;
24
+ }
25
+
26
+ let cachedSystemInfo: SystemInfo | null = null;
27
+
28
+ /**
29
+ * Get detailed system information
30
+ */
31
+ export async function getSystemInfo(): Promise<SystemInfo> {
32
+ if (cachedSystemInfo) return cachedSystemInfo;
33
+
34
+ const platform = os.platform();
35
+ const cpus = os.cpus();
36
+
37
+ let osName: string = platform;
38
+ const osVersion = os.release();
39
+
40
+ // Get friendly OS name
41
+ if (platform === 'win32') {
42
+ try {
43
+ const { stdout } = await execAsync('wmic os get Caption /value', { timeout: 5000 });
44
+ const match = stdout.match(/Caption=(.+)/);
45
+ if (match) osName = match[1].trim();
46
+ } catch {
47
+ osName = `Windows ${osVersion}`;
48
+ }
49
+ } else if (platform === 'darwin') {
50
+ try {
51
+ const { stdout } = await execAsync('sw_vers -productName && sw_vers -productVersion', { timeout: 5000 });
52
+ const lines = stdout.trim().split('\n');
53
+ osName = `${lines[0]} ${lines[1]}`;
54
+ } catch {
55
+ osName = `macOS ${osVersion}`;
56
+ }
57
+ } else if (platform === 'linux') {
58
+ try {
59
+ const { stdout } = await execAsync('cat /etc/os-release | grep PRETTY_NAME', { timeout: 5000 });
60
+ const match = stdout.match(/PRETTY_NAME="(.+)"/);
61
+ if (match) osName = match[1];
62
+ } catch {
63
+ osName = `Linux ${osVersion}`;
64
+ }
65
+ }
66
+
67
+ cachedSystemInfo = {
68
+ platform,
69
+ osName,
70
+ osVersion,
71
+ arch: os.arch(),
72
+ cpuModel: cpus[0]?.model || 'Unknown CPU',
73
+ cpuCores: cpus.length,
74
+ totalMemoryGB: Math.round(os.totalmem() / (1024 ** 3) * 10) / 10,
75
+ freeMemoryGB: Math.round(os.freemem() / (1024 ** 3) * 10) / 10,
76
+ username: os.userInfo().username,
77
+ hostname: os.hostname(),
78
+ homeDir: os.homedir(),
79
+ shell: process.env.SHELL || process.env.COMSPEC || 'unknown',
80
+ };
81
+
82
+ return cachedSystemInfo;
83
+ }
84
+
85
+ /**
86
+ * Get a formatted system context string for AI prompts
87
+ */
88
+ export async function getSystemContext(): Promise<string> {
89
+ const info = await getSystemInfo();
90
+
91
+ return `SYSTEM INFO:
92
+ - OS: ${info.osName} (${info.arch})
93
+ - CPU: ${info.cpuModel} (${info.cpuCores} cores)
94
+ - RAM: ${info.totalMemoryGB}GB total, ${info.freeMemoryGB}GB free
95
+ - User: ${info.username}@${info.hostname}
96
+ - Home: ${info.homeDir}
97
+ - Shell: ${info.shell}`;
98
+ }
99
+
100
+ /**
101
+ * Get current working directory
102
+ */
103
+ export function getCwd(): string {
104
+ return process.cwd();
105
+ }