anveesa 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.lock CHANGED
@@ -60,7 +60,7 @@ dependencies = [
60
60
 
61
61
  [[package]]
62
62
  name = "anveesa"
63
- version = "0.5.0"
63
+ version = "0.5.1"
64
64
  dependencies = [
65
65
  "anyhow",
66
66
  "base64",
package/Cargo.toml CHANGED
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "anveesa"
3
- version = "0.5.0"
3
+ version = "0.5.1"
4
4
  edition = "2024"
5
5
  default-run = "anveesa"
6
6
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "anveesa",
3
- "version": "0.5.0",
3
+ "version": "0.5.1",
4
4
  "description": "A terminal CLI that wraps AI providers (OpenAI-compatible APIs and local CLIs) into a single unified command",
5
5
  "main": "bin/anveesa.js",
6
6
  "bin": {
package/src/tools.rs CHANGED
@@ -258,12 +258,14 @@ pub fn definitions(include_write: bool) -> Vec<Value> {
258
258
  "type": "function",
259
259
  "function": {
260
260
  "name": "fetch_url",
261
- "description": "Fetch the content of a URL and return it as plain text. Strips HTML tags automatically.",
261
+ "description": "Fetch a URL. mode=\"text\" (default): returns plain text with HTML tags stripped. mode=\"raw\": returns the full HTML source unchanged. mode=\"deep\": returns HTML source PLUS the full content of every linked CSS file (and JS bundles if include_js=true) in one call use this when you need to inspect design tokens, Tailwind classes, color variables, font imports, or component structure without multiple round-trips.",
262
262
  "parameters": {
263
263
  "type": "object",
264
264
  "properties": {
265
265
  "url": { "type": "string", "description": "URL to fetch." },
266
- "max_chars": { "type": "integer", "description": "Max characters to return (default 40000)." }
266
+ "mode": { "type": "string", "description": "\"text\" (default, strips HTML), \"raw\" (full HTML source), \"deep\" (HTML source + fetch all linked CSS assets, and JS if include_js=true)." },
267
+ "max_chars": { "type": "integer", "description": "Max chars per resource (default 40000 for text, 60000 for raw/deep HTML, 30000 per asset)." },
268
+ "include_js": { "type": "boolean", "description": "deep mode only — also fetch linked JS bundles (default false; bundles can be large)." }
267
269
  },
268
270
  "required": ["url"]
269
271
  }
@@ -987,6 +989,84 @@ fn scrape_ddg_html(html: &str, max: usize) -> Vec<Value> {
987
989
  results
988
990
  }
989
991
 
992
+ fn tag_attr(tag: &str, attr: &str) -> Option<String> {
993
+ let dq = format!("{attr}=\"");
994
+ let sq = format!("{attr}='");
995
+ if let Some(s) = tag.find(&dq) {
996
+ let start = s + dq.len();
997
+ tag[start..].find('"').map(|e| tag[start..start + e].to_string())
998
+ } else if let Some(s) = tag.find(&sq) {
999
+ let start = s + sq.len();
1000
+ tag[start..].find('\'').map(|e| tag[start..start + e].to_string())
1001
+ } else {
1002
+ None
1003
+ }
1004
+ }
1005
+
1006
+ fn url_origin(url: &str) -> String {
1007
+ let skip = if url.starts_with("https://") { 8 } else if url.starts_with("http://") { 7 } else { return String::new() };
1008
+ let scheme = &url[..skip - 3];
1009
+ let host = url[skip..].split('/').next().unwrap_or("");
1010
+ format!("{scheme}://{host}")
1011
+ }
1012
+
1013
+ fn url_base_path(url: &str) -> String {
1014
+ let skip = if url.starts_with("https://") { 8 } else if url.starts_with("http://") { 7 } else { return "/".to_string() };
1015
+ let rest = &url[skip..];
1016
+ let path = rest.split_once('/').map(|(_, p)| format!("/{p}")).unwrap_or_default();
1017
+ path.rfind('/').map(|i| path[..i + 1].to_string()).unwrap_or_else(|| "/".to_string())
1018
+ }
1019
+
1020
+ fn resolve_asset_url(href: &str, origin: &str, base_path: &str) -> Option<String> {
1021
+ let h = href.trim();
1022
+ if h.is_empty() { return None; }
1023
+ if h.starts_with("http://") || h.starts_with("https://") {
1024
+ Some(h.to_string())
1025
+ } else if h.starts_with("//") {
1026
+ let scheme = if origin.starts_with("https") { "https" } else { "http" };
1027
+ Some(format!("{scheme}:{h}"))
1028
+ } else if h.starts_with('/') {
1029
+ if origin.is_empty() { None } else { Some(format!("{origin}{h}")) }
1030
+ } else if !origin.is_empty() {
1031
+ Some(format!("{origin}{base_path}{h}"))
1032
+ } else {
1033
+ None
1034
+ }
1035
+ }
1036
+
1037
+ fn extract_asset_urls(html: &str, base_url: &str, include_js: bool) -> Vec<String> {
1038
+ let origin = url_origin(base_url);
1039
+ let base_path = url_base_path(base_url);
1040
+ let mut urls: Vec<String> = Vec::new();
1041
+ let mut pos = 0;
1042
+
1043
+ while pos < html.len() {
1044
+ let Some(lt) = html[pos..].find('<') else { break };
1045
+ let abs = pos + lt;
1046
+ let Some(gt) = html[abs..].find('>') else { break };
1047
+ let tag = &html[abs..abs + gt + 1];
1048
+ let tag_lo = tag.to_lowercase();
1049
+ pos = abs + gt + 1;
1050
+
1051
+ let href = if tag_lo.starts_with("<link") {
1052
+ let rel = tag_attr(&tag_lo, "rel").unwrap_or_default();
1053
+ let as_ = tag_attr(&tag_lo, "as").unwrap_or_default();
1054
+ if rel == "stylesheet" || (rel == "preload" && as_ == "style") {
1055
+ tag_attr(tag, "href").or_else(|| tag_attr(&tag_lo, "href"))
1056
+ } else { None }
1057
+ } else if include_js && tag_lo.starts_with("<script") {
1058
+ tag_attr(tag, "src").or_else(|| tag_attr(&tag_lo, "src"))
1059
+ } else { None };
1060
+
1061
+ if let Some(h) = href {
1062
+ if let Some(resolved) = resolve_asset_url(&h, &origin, &base_path) {
1063
+ if !urls.contains(&resolved) { urls.push(resolved); }
1064
+ }
1065
+ }
1066
+ }
1067
+ urls
1068
+ }
1069
+
990
1070
  fn extract_attr<'a>(html: &'a str, attr: &str) -> Option<&'a str> {
991
1071
  let key = format!("{attr}=\"");
992
1072
  let start = html.find(&key)? + key.len();
@@ -1027,14 +1107,18 @@ async fn fetch_url(arguments: &str) -> Result<Value> {
1027
1107
  url: String,
1028
1108
  #[serde(default)]
1029
1109
  max_chars: Option<usize>,
1110
+ #[serde(default)]
1111
+ mode: Option<String>,
1112
+ #[serde(default)]
1113
+ include_js: Option<bool>,
1030
1114
  }
1031
1115
  let args: Args = parse_args(arguments)?;
1032
- let url = args.url.trim();
1116
+ let url = args.url.trim().to_string();
1033
1117
  if url.is_empty() { bail!("url is required"); }
1034
- let max_chars = args.max_chars.unwrap_or(40_000);
1118
+ let mode = args.mode.as_deref().unwrap_or("text").to_string();
1035
1119
 
1036
1120
  let response = http_client()
1037
- .get(url)
1121
+ .get(&url)
1038
1122
  .send()
1039
1123
  .await
1040
1124
  .with_context(|| format!("failed to fetch {url}"))?;
@@ -1050,23 +1134,98 @@ async fn fetch_url(arguments: &str) -> Result<Value> {
1050
1134
  .to_string();
1051
1135
 
1052
1136
  let body = response.text().await.context("failed to read response body")?;
1053
- let text = if content_type.contains("html") || content_type.contains("xml") {
1054
- html_to_text(&body)
1055
- } else {
1056
- body
1057
- };
1058
1137
 
1059
- let char_count = text.chars().count();
1060
- let truncated = char_count > max_chars;
1061
- let text: String = text.chars().take(max_chars).collect();
1138
+ match mode.as_str() {
1139
+ "raw" => {
1140
+ let max = args.max_chars.unwrap_or(80_000);
1141
+ let char_count = body.chars().count();
1142
+ let truncated = char_count > max;
1143
+ let html: String = body.chars().take(max).collect();
1144
+ Ok(json!({
1145
+ "ok": true,
1146
+ "url": url,
1147
+ "content_type": content_type,
1148
+ "html": html,
1149
+ "char_count": char_count,
1150
+ "truncated": truncated,
1151
+ }))
1152
+ }
1153
+ "deep" => {
1154
+ const ASSET_MAX: usize = 30_000;
1155
+ const MAX_ASSETS: usize = 10;
1156
+ let html_max = args.max_chars.unwrap_or(60_000);
1157
+ let include_js = args.include_js.unwrap_or(false);
1158
+
1159
+ let asset_urls: Vec<String> = extract_asset_urls(&body, &url, include_js)
1160
+ .into_iter()
1161
+ .take(MAX_ASSETS)
1162
+ .collect();
1062
1163
 
1063
- Ok(json!({
1064
- "ok": true,
1065
- "url": url,
1066
- "content_type": content_type,
1067
- "text": text,
1068
- "truncated": truncated,
1069
- }))
1164
+ let mut handles = Vec::new();
1165
+ for asset_url in asset_urls {
1166
+ handles.push(tokio::spawn(async move {
1167
+ let Ok(resp) = http_client().get(&asset_url).send().await else { return None; };
1168
+ if !resp.status().is_success() { return None; }
1169
+ let ct = resp.headers()
1170
+ .get("content-type")
1171
+ .and_then(|v| v.to_str().ok())
1172
+ .unwrap_or("")
1173
+ .to_string();
1174
+ let Ok(content) = resp.text().await else { return None; };
1175
+ let kind = if ct.contains("css") || asset_url.ends_with(".css") { "css" }
1176
+ else if ct.contains("javascript") || asset_url.contains(".js") { "js" }
1177
+ else { "other" };
1178
+ let char_count = content.chars().count();
1179
+ let truncated = char_count > ASSET_MAX;
1180
+ let trimmed: String = content.chars().take(ASSET_MAX).collect();
1181
+ Some(json!({
1182
+ "url": asset_url,
1183
+ "type": kind,
1184
+ "char_count": char_count,
1185
+ "truncated": truncated,
1186
+ "content": trimmed,
1187
+ }))
1188
+ }));
1189
+ }
1190
+
1191
+ let mut assets: Vec<Value> = Vec::new();
1192
+ for h in handles {
1193
+ if let Ok(Some(a)) = h.await { assets.push(a); }
1194
+ }
1195
+
1196
+ let html_chars = body.chars().count();
1197
+ let html_truncated = html_chars > html_max;
1198
+ let html: String = body.chars().take(html_max).collect();
1199
+
1200
+ Ok(json!({
1201
+ "ok": true,
1202
+ "url": url,
1203
+ "html": html,
1204
+ "html_chars": html_chars,
1205
+ "html_truncated": html_truncated,
1206
+ "assets": assets,
1207
+ }))
1208
+ }
1209
+ _ => {
1210
+ // "text" mode — current behaviour
1211
+ let max = args.max_chars.unwrap_or(40_000);
1212
+ let text = if content_type.contains("html") || content_type.contains("xml") {
1213
+ html_to_text(&body)
1214
+ } else {
1215
+ body
1216
+ };
1217
+ let char_count = text.chars().count();
1218
+ let truncated = char_count > max;
1219
+ let text: String = text.chars().take(max).collect();
1220
+ Ok(json!({
1221
+ "ok": true,
1222
+ "url": url,
1223
+ "content_type": content_type,
1224
+ "text": text,
1225
+ "truncated": truncated,
1226
+ }))
1227
+ }
1228
+ }
1070
1229
  }
1071
1230
 
1072
1231
  fn html_to_text(html: &str) -> String {