anveesa 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +1 -1
- package/Cargo.toml +1 -1
- package/package.json +1 -1
- package/src/tools.rs +179 -20
package/Cargo.lock
CHANGED
package/Cargo.toml
CHANGED
package/package.json
CHANGED
package/src/tools.rs
CHANGED
|
@@ -258,12 +258,14 @@ pub fn definitions(include_write: bool) -> Vec<Value> {
|
|
|
258
258
|
"type": "function",
|
|
259
259
|
"function": {
|
|
260
260
|
"name": "fetch_url",
|
|
261
|
-
"description": "Fetch the content of
|
|
261
|
+
"description": "Fetch a URL. mode=\"text\" (default): returns plain text with HTML tags stripped. mode=\"raw\": returns the full HTML source unchanged. mode=\"deep\": returns HTML source PLUS the full content of every linked CSS file (and JS bundles if include_js=true) in one call — use this when you need to inspect design tokens, Tailwind classes, color variables, font imports, or component structure without multiple round-trips.",
|
|
262
262
|
"parameters": {
|
|
263
263
|
"type": "object",
|
|
264
264
|
"properties": {
|
|
265
265
|
"url": { "type": "string", "description": "URL to fetch." },
|
|
266
|
-
"
|
|
266
|
+
"mode": { "type": "string", "description": "\"text\" (default, strips HTML), \"raw\" (full HTML source), \"deep\" (HTML source + fetch all linked CSS assets, and JS if include_js=true)." },
|
|
267
|
+
"max_chars": { "type": "integer", "description": "Max chars per resource (default 40000 for text, 60000 for raw/deep HTML, 30000 per asset)." },
|
|
268
|
+
"include_js": { "type": "boolean", "description": "deep mode only — also fetch linked JS bundles (default false; bundles can be large)." }
|
|
267
269
|
},
|
|
268
270
|
"required": ["url"]
|
|
269
271
|
}
|
|
@@ -987,6 +989,84 @@ fn scrape_ddg_html(html: &str, max: usize) -> Vec<Value> {
|
|
|
987
989
|
results
|
|
988
990
|
}
|
|
989
991
|
|
|
992
|
+
fn tag_attr(tag: &str, attr: &str) -> Option<String> {
|
|
993
|
+
let dq = format!("{attr}=\"");
|
|
994
|
+
let sq = format!("{attr}='");
|
|
995
|
+
if let Some(s) = tag.find(&dq) {
|
|
996
|
+
let start = s + dq.len();
|
|
997
|
+
tag[start..].find('"').map(|e| tag[start..start + e].to_string())
|
|
998
|
+
} else if let Some(s) = tag.find(&sq) {
|
|
999
|
+
let start = s + sq.len();
|
|
1000
|
+
tag[start..].find('\'').map(|e| tag[start..start + e].to_string())
|
|
1001
|
+
} else {
|
|
1002
|
+
None
|
|
1003
|
+
}
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
fn url_origin(url: &str) -> String {
|
|
1007
|
+
let skip = if url.starts_with("https://") { 8 } else if url.starts_with("http://") { 7 } else { return String::new() };
|
|
1008
|
+
let scheme = &url[..skip - 3];
|
|
1009
|
+
let host = url[skip..].split('/').next().unwrap_or("");
|
|
1010
|
+
format!("{scheme}://{host}")
|
|
1011
|
+
}
|
|
1012
|
+
|
|
1013
|
+
fn url_base_path(url: &str) -> String {
|
|
1014
|
+
let skip = if url.starts_with("https://") { 8 } else if url.starts_with("http://") { 7 } else { return "/".to_string() };
|
|
1015
|
+
let rest = &url[skip..];
|
|
1016
|
+
let path = rest.split_once('/').map(|(_, p)| format!("/{p}")).unwrap_or_default();
|
|
1017
|
+
path.rfind('/').map(|i| path[..i + 1].to_string()).unwrap_or_else(|| "/".to_string())
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
fn resolve_asset_url(href: &str, origin: &str, base_path: &str) -> Option<String> {
|
|
1021
|
+
let h = href.trim();
|
|
1022
|
+
if h.is_empty() { return None; }
|
|
1023
|
+
if h.starts_with("http://") || h.starts_with("https://") {
|
|
1024
|
+
Some(h.to_string())
|
|
1025
|
+
} else if h.starts_with("//") {
|
|
1026
|
+
let scheme = if origin.starts_with("https") { "https" } else { "http" };
|
|
1027
|
+
Some(format!("{scheme}:{h}"))
|
|
1028
|
+
} else if h.starts_with('/') {
|
|
1029
|
+
if origin.is_empty() { None } else { Some(format!("{origin}{h}")) }
|
|
1030
|
+
} else if !origin.is_empty() {
|
|
1031
|
+
Some(format!("{origin}{base_path}{h}"))
|
|
1032
|
+
} else {
|
|
1033
|
+
None
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
fn extract_asset_urls(html: &str, base_url: &str, include_js: bool) -> Vec<String> {
|
|
1038
|
+
let origin = url_origin(base_url);
|
|
1039
|
+
let base_path = url_base_path(base_url);
|
|
1040
|
+
let mut urls: Vec<String> = Vec::new();
|
|
1041
|
+
let mut pos = 0;
|
|
1042
|
+
|
|
1043
|
+
while pos < html.len() {
|
|
1044
|
+
let Some(lt) = html[pos..].find('<') else { break };
|
|
1045
|
+
let abs = pos + lt;
|
|
1046
|
+
let Some(gt) = html[abs..].find('>') else { break };
|
|
1047
|
+
let tag = &html[abs..abs + gt + 1];
|
|
1048
|
+
let tag_lo = tag.to_lowercase();
|
|
1049
|
+
pos = abs + gt + 1;
|
|
1050
|
+
|
|
1051
|
+
let href = if tag_lo.starts_with("<link") {
|
|
1052
|
+
let rel = tag_attr(&tag_lo, "rel").unwrap_or_default();
|
|
1053
|
+
let as_ = tag_attr(&tag_lo, "as").unwrap_or_default();
|
|
1054
|
+
if rel == "stylesheet" || (rel == "preload" && as_ == "style") {
|
|
1055
|
+
tag_attr(tag, "href").or_else(|| tag_attr(&tag_lo, "href"))
|
|
1056
|
+
} else { None }
|
|
1057
|
+
} else if include_js && tag_lo.starts_with("<script") {
|
|
1058
|
+
tag_attr(tag, "src").or_else(|| tag_attr(&tag_lo, "src"))
|
|
1059
|
+
} else { None };
|
|
1060
|
+
|
|
1061
|
+
if let Some(h) = href {
|
|
1062
|
+
if let Some(resolved) = resolve_asset_url(&h, &origin, &base_path) {
|
|
1063
|
+
if !urls.contains(&resolved) { urls.push(resolved); }
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
}
|
|
1067
|
+
urls
|
|
1068
|
+
}
|
|
1069
|
+
|
|
990
1070
|
fn extract_attr<'a>(html: &'a str, attr: &str) -> Option<&'a str> {
|
|
991
1071
|
let key = format!("{attr}=\"");
|
|
992
1072
|
let start = html.find(&key)? + key.len();
|
|
@@ -1027,14 +1107,18 @@ async fn fetch_url(arguments: &str) -> Result<Value> {
|
|
|
1027
1107
|
url: String,
|
|
1028
1108
|
#[serde(default)]
|
|
1029
1109
|
max_chars: Option<usize>,
|
|
1110
|
+
#[serde(default)]
|
|
1111
|
+
mode: Option<String>,
|
|
1112
|
+
#[serde(default)]
|
|
1113
|
+
include_js: Option<bool>,
|
|
1030
1114
|
}
|
|
1031
1115
|
let args: Args = parse_args(arguments)?;
|
|
1032
|
-
let url = args.url.trim();
|
|
1116
|
+
let url = args.url.trim().to_string();
|
|
1033
1117
|
if url.is_empty() { bail!("url is required"); }
|
|
1034
|
-
let
|
|
1118
|
+
let mode = args.mode.as_deref().unwrap_or("text").to_string();
|
|
1035
1119
|
|
|
1036
1120
|
let response = http_client()
|
|
1037
|
-
.get(url)
|
|
1121
|
+
.get(&url)
|
|
1038
1122
|
.send()
|
|
1039
1123
|
.await
|
|
1040
1124
|
.with_context(|| format!("failed to fetch {url}"))?;
|
|
@@ -1050,23 +1134,98 @@ async fn fetch_url(arguments: &str) -> Result<Value> {
|
|
|
1050
1134
|
.to_string();
|
|
1051
1135
|
|
|
1052
1136
|
let body = response.text().await.context("failed to read response body")?;
|
|
1053
|
-
let text = if content_type.contains("html") || content_type.contains("xml") {
|
|
1054
|
-
html_to_text(&body)
|
|
1055
|
-
} else {
|
|
1056
|
-
body
|
|
1057
|
-
};
|
|
1058
1137
|
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1138
|
+
match mode.as_str() {
|
|
1139
|
+
"raw" => {
|
|
1140
|
+
let max = args.max_chars.unwrap_or(80_000);
|
|
1141
|
+
let char_count = body.chars().count();
|
|
1142
|
+
let truncated = char_count > max;
|
|
1143
|
+
let html: String = body.chars().take(max).collect();
|
|
1144
|
+
Ok(json!({
|
|
1145
|
+
"ok": true,
|
|
1146
|
+
"url": url,
|
|
1147
|
+
"content_type": content_type,
|
|
1148
|
+
"html": html,
|
|
1149
|
+
"char_count": char_count,
|
|
1150
|
+
"truncated": truncated,
|
|
1151
|
+
}))
|
|
1152
|
+
}
|
|
1153
|
+
"deep" => {
|
|
1154
|
+
const ASSET_MAX: usize = 30_000;
|
|
1155
|
+
const MAX_ASSETS: usize = 10;
|
|
1156
|
+
let html_max = args.max_chars.unwrap_or(60_000);
|
|
1157
|
+
let include_js = args.include_js.unwrap_or(false);
|
|
1158
|
+
|
|
1159
|
+
let asset_urls: Vec<String> = extract_asset_urls(&body, &url, include_js)
|
|
1160
|
+
.into_iter()
|
|
1161
|
+
.take(MAX_ASSETS)
|
|
1162
|
+
.collect();
|
|
1062
1163
|
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1164
|
+
let mut handles = Vec::new();
|
|
1165
|
+
for asset_url in asset_urls {
|
|
1166
|
+
handles.push(tokio::spawn(async move {
|
|
1167
|
+
let Ok(resp) = http_client().get(&asset_url).send().await else { return None; };
|
|
1168
|
+
if !resp.status().is_success() { return None; }
|
|
1169
|
+
let ct = resp.headers()
|
|
1170
|
+
.get("content-type")
|
|
1171
|
+
.and_then(|v| v.to_str().ok())
|
|
1172
|
+
.unwrap_or("")
|
|
1173
|
+
.to_string();
|
|
1174
|
+
let Ok(content) = resp.text().await else { return None; };
|
|
1175
|
+
let kind = if ct.contains("css") || asset_url.ends_with(".css") { "css" }
|
|
1176
|
+
else if ct.contains("javascript") || asset_url.contains(".js") { "js" }
|
|
1177
|
+
else { "other" };
|
|
1178
|
+
let char_count = content.chars().count();
|
|
1179
|
+
let truncated = char_count > ASSET_MAX;
|
|
1180
|
+
let trimmed: String = content.chars().take(ASSET_MAX).collect();
|
|
1181
|
+
Some(json!({
|
|
1182
|
+
"url": asset_url,
|
|
1183
|
+
"type": kind,
|
|
1184
|
+
"char_count": char_count,
|
|
1185
|
+
"truncated": truncated,
|
|
1186
|
+
"content": trimmed,
|
|
1187
|
+
}))
|
|
1188
|
+
}));
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
let mut assets: Vec<Value> = Vec::new();
|
|
1192
|
+
for h in handles {
|
|
1193
|
+
if let Ok(Some(a)) = h.await { assets.push(a); }
|
|
1194
|
+
}
|
|
1195
|
+
|
|
1196
|
+
let html_chars = body.chars().count();
|
|
1197
|
+
let html_truncated = html_chars > html_max;
|
|
1198
|
+
let html: String = body.chars().take(html_max).collect();
|
|
1199
|
+
|
|
1200
|
+
Ok(json!({
|
|
1201
|
+
"ok": true,
|
|
1202
|
+
"url": url,
|
|
1203
|
+
"html": html,
|
|
1204
|
+
"html_chars": html_chars,
|
|
1205
|
+
"html_truncated": html_truncated,
|
|
1206
|
+
"assets": assets,
|
|
1207
|
+
}))
|
|
1208
|
+
}
|
|
1209
|
+
_ => {
|
|
1210
|
+
// "text" mode — current behaviour
|
|
1211
|
+
let max = args.max_chars.unwrap_or(40_000);
|
|
1212
|
+
let text = if content_type.contains("html") || content_type.contains("xml") {
|
|
1213
|
+
html_to_text(&body)
|
|
1214
|
+
} else {
|
|
1215
|
+
body
|
|
1216
|
+
};
|
|
1217
|
+
let char_count = text.chars().count();
|
|
1218
|
+
let truncated = char_count > max;
|
|
1219
|
+
let text: String = text.chars().take(max).collect();
|
|
1220
|
+
Ok(json!({
|
|
1221
|
+
"ok": true,
|
|
1222
|
+
"url": url,
|
|
1223
|
+
"content_type": content_type,
|
|
1224
|
+
"text": text,
|
|
1225
|
+
"truncated": truncated,
|
|
1226
|
+
}))
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1070
1229
|
}
|
|
1071
1230
|
|
|
1072
1231
|
fn html_to_text(html: &str) -> String {
|