cobolx 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +1 -1
- package/Cargo.toml +1 -1
- package/package.json +1 -1
- package/src/agent/client.rs +107 -8
- package/src/agent/db_agent.rs +71 -23
- package/src/agent/explain_agent.rs +53 -22
- package/src/agent/fs_agent.rs +211 -83
- package/src/agent/skills.rs +336 -0
- package/src/agent/types.rs +7 -0
- package/src/agent.rs +1 -0
- package/src/cobol/indexer.rs +375 -5
- package/src/cobol/model.rs +78 -0
- package/src/cobol/scanner.rs +2 -0
- package/src/cobol/source_parser.rs +341 -2
- package/src/lib.rs +1 -0
- package/src/main.rs +1 -0
- package/src/memory/memories.rs +208 -0
- package/src/memory/runs.rs +161 -0
- package/src/memory/store.rs +120 -0
- package/src/memory.rs +8 -2
- package/src/path_safety.rs +280 -0
- package/src/ui/draw.rs +1 -0
- package/src/ui/tui.rs +239 -0
- package/tests/indexer_tests.rs +261 -0
- package/tests/project_files_tests.rs +23 -51
- package/src/memory/files.rs +0 -155
package/src/agent/fs_agent.rs
CHANGED
|
@@ -1,11 +1,28 @@
|
|
|
1
1
|
use super::AgentRouter;
|
|
2
|
+
use super::skills::{AgentKind, append_agent_skills};
|
|
2
3
|
use super::types::merge_tool_call_deltas;
|
|
3
4
|
use super::types::{
|
|
4
5
|
ChatMessage, ChatRequest, FunctionDefinition, StreamOptions, Tool, ToolCall, Usage,
|
|
6
|
+
WriteBuffer, WriteBufferEntry,
|
|
5
7
|
};
|
|
6
8
|
use crate::memory::MemoryStore;
|
|
9
|
+
use crate::path_safety::{
|
|
10
|
+
validate_and_resolve_write, validate_sandbox_path as resolve_sandbox_path, write_validated_path,
|
|
11
|
+
};
|
|
7
12
|
use std::path::Path;
|
|
8
13
|
|
|
14
|
+
fn truncate_utf8_preview(content: &str, max_bytes: usize) -> &str {
|
|
15
|
+
if content.len() <= max_bytes {
|
|
16
|
+
return content;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
let mut end = max_bytes;
|
|
20
|
+
while end > 0 && !content.is_char_boundary(end) {
|
|
21
|
+
end -= 1;
|
|
22
|
+
}
|
|
23
|
+
&content[..end]
|
|
24
|
+
}
|
|
25
|
+
|
|
9
26
|
impl AgentRouter {
|
|
10
27
|
/// Validates `user_path` resolves inside `sandbox`.
|
|
11
28
|
/// Returns the canonical absolute path or an error string.
|
|
@@ -13,70 +30,7 @@ impl AgentRouter {
|
|
|
13
30
|
sandbox: &Path,
|
|
14
31
|
user_path: &str,
|
|
15
32
|
) -> Result<std::path::PathBuf, String> {
|
|
16
|
-
|
|
17
|
-
user_path.to_string()
|
|
18
|
-
} else {
|
|
19
|
-
user_path.trim_start_matches(['/', '\\']).to_string()
|
|
20
|
-
};
|
|
21
|
-
|
|
22
|
-
let candidate = if std::path::Path::new(&normalized).is_absolute() {
|
|
23
|
-
std::path::PathBuf::from(&normalized)
|
|
24
|
-
} else {
|
|
25
|
-
sandbox.join(&normalized)
|
|
26
|
-
};
|
|
27
|
-
|
|
28
|
-
let sandbox_canon = sandbox
|
|
29
|
-
.canonicalize()
|
|
30
|
-
.map_err(|e| format!("Sandbox path error: {e}"))?;
|
|
31
|
-
|
|
32
|
-
let clean_canon = |p: &Path| -> String {
|
|
33
|
-
let s = p.to_string_lossy().into_owned();
|
|
34
|
-
let s_stripped = if let Some(stripped) = s.strip_prefix(r"\\?\") {
|
|
35
|
-
stripped.to_string()
|
|
36
|
-
} else {
|
|
37
|
-
s
|
|
38
|
-
};
|
|
39
|
-
s_stripped.replace('\\', "/").to_lowercase()
|
|
40
|
-
};
|
|
41
|
-
|
|
42
|
-
let sandbox_canon_str = clean_canon(&sandbox_canon);
|
|
43
|
-
|
|
44
|
-
let mut existing = candidate.clone();
|
|
45
|
-
let mut suffix = std::path::PathBuf::new();
|
|
46
|
-
loop {
|
|
47
|
-
if existing.exists() {
|
|
48
|
-
break;
|
|
49
|
-
}
|
|
50
|
-
if let Some(parent) = existing.parent() {
|
|
51
|
-
if let Some(file_name) = existing.file_name() {
|
|
52
|
-
suffix = std::path::Path::new(file_name).join(&suffix);
|
|
53
|
-
existing = parent.to_path_buf();
|
|
54
|
-
} else {
|
|
55
|
-
break;
|
|
56
|
-
}
|
|
57
|
-
} else {
|
|
58
|
-
break;
|
|
59
|
-
}
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
let canon_existing = existing
|
|
63
|
-
.canonicalize()
|
|
64
|
-
.map_err(|e| format!("Path resolution error: {e}"))?;
|
|
65
|
-
let resolved = canon_existing.join(&suffix);
|
|
66
|
-
let resolved_str = clean_canon(&resolved);
|
|
67
|
-
|
|
68
|
-
let is_sub = resolved_str == sandbox_canon_str
|
|
69
|
-
|| (resolved_str.starts_with(&sandbox_canon_str)
|
|
70
|
-
&& (sandbox_canon_str.ends_with('/')
|
|
71
|
-
|| resolved_str.chars().nth(sandbox_canon_str.chars().count()) == Some('/')));
|
|
72
|
-
|
|
73
|
-
if !is_sub {
|
|
74
|
-
return Err(format!(
|
|
75
|
-
"Access denied: '{}' is outside the sandbox directory",
|
|
76
|
-
user_path
|
|
77
|
-
));
|
|
78
|
-
}
|
|
79
|
-
Ok(resolved)
|
|
33
|
+
resolve_sandbox_path(sandbox, user_path)
|
|
80
34
|
}
|
|
81
35
|
|
|
82
36
|
/// Phase 1 — silent read-only data retrieval (DB + files).
|
|
@@ -110,7 +64,7 @@ impl AgentRouter {
|
|
|
110
64
|
|
|
111
65
|
if let Some(first_msg) = messages.get_mut(0) {
|
|
112
66
|
if first_msg.role == "system" {
|
|
113
|
-
|
|
67
|
+
let mut system_prompt = format!(
|
|
114
68
|
"You are the COBOLX Filesystem Retrieval Agent. Your ONLY job is to collect \
|
|
115
69
|
raw data about COBOL files using the tools below. Do NOT explain or interpret \
|
|
116
70
|
— just gather and output a structured data summary.\n\
|
|
@@ -120,7 +74,9 @@ impl AgentRouter {
|
|
|
120
74
|
\n\
|
|
121
75
|
WORKFLOW:\n\
|
|
122
76
|
1. query_sqlite: SELECT id, path, kind FROM files\n\
|
|
123
|
-
2. query_sqlite: get programs, data_items, call_edges, copybook_uses\
|
|
77
|
+
2. query_sqlite: get programs, data_items, call_edges, copybook_uses, \
|
|
78
|
+
program_features, code_blocks, external_ops, identifiers, literals, \
|
|
79
|
+
copybook_features\n\
|
|
124
80
|
3. read_file: raw source text only when needed\n\
|
|
125
81
|
4. list_directory / search_in_file: locate files if needed\n\
|
|
126
82
|
\n\
|
|
@@ -133,8 +89,20 @@ impl AgentRouter {
|
|
|
133
89
|
2. programs(id, name, file_id)\n\
|
|
134
90
|
3. copybook_uses(id, from_file_id, copybook_name, resolve_status)\n\
|
|
135
91
|
4. call_edges(id, caller_program_id, callee_name, kind)\n\
|
|
136
|
-
5. data_items(id, program_id, name, level, parent_name, pic, usage_clause, section)
|
|
137
|
-
|
|
92
|
+
5. data_items(id, program_id, name, level, parent_name, pic, usage_clause, section)\n\
|
|
93
|
+
6. program_features(program_id, incoming_call_count, outgoing_call_count, is_entrypoint, paragraph_count, external_op_count)\n\
|
|
94
|
+
7. code_blocks(program_id, name, kind, parent_section, sequence_no, statement_count)\n\
|
|
95
|
+
8. external_ops(program_id, kind, verb, target)\n\
|
|
96
|
+
9. identifiers(program_id, kind, value, occurrences)\n\
|
|
97
|
+
10. literals(program_id, kind, value, occurrences)\n\
|
|
98
|
+
11. copybook_features(copybook_file_id, copybook_name, used_by_program_count, contains_header_fields, contains_error_fields)"
|
|
99
|
+
);
|
|
100
|
+
append_agent_skills(
|
|
101
|
+
&mut system_prompt,
|
|
102
|
+
sandbox_path,
|
|
103
|
+
AgentKind::FilesystemRetrieval,
|
|
104
|
+
)?;
|
|
105
|
+
first_msg.content = Some(system_prompt);
|
|
138
106
|
}
|
|
139
107
|
}
|
|
140
108
|
|
|
@@ -253,12 +221,20 @@ impl AgentRouter {
|
|
|
253
221
|
function: FunctionDefinition {
|
|
254
222
|
name: "query_sqlite".to_string(),
|
|
255
223
|
description:
|
|
256
|
-
"Run
|
|
257
|
-
|
|
224
|
+
"Run one read-only SELECT query against the indexed project SQLite database. \
|
|
225
|
+
Use this for project facts from files, programs, data_items, call_edges, \
|
|
226
|
+
copybook_uses, program_features, code_blocks, external_ops, identifiers, \
|
|
227
|
+
literals, or copybook_features. Do not use it for writes, DDL, or guessed \
|
|
228
|
+
values."
|
|
258
229
|
.to_string(),
|
|
259
230
|
parameters: serde_json::json!({
|
|
260
231
|
"type": "object",
|
|
261
|
-
"properties": {
|
|
232
|
+
"properties": {
|
|
233
|
+
"sql": {
|
|
234
|
+
"type": "string",
|
|
235
|
+
"description": "A single SQLite SELECT statement that reads indexed project data."
|
|
236
|
+
}
|
|
237
|
+
},
|
|
262
238
|
"required": ["sql"]
|
|
263
239
|
}),
|
|
264
240
|
},
|
|
@@ -267,10 +243,17 @@ impl AgentRouter {
|
|
|
267
243
|
r#type: "function".to_string(),
|
|
268
244
|
function: FunctionDefinition {
|
|
269
245
|
name: "read_file".to_string(),
|
|
270
|
-
description:
|
|
246
|
+
description:
|
|
247
|
+
"Read the full text of one sandbox file. Use this when exact source content matters, and pass only a path inside the sandbox."
|
|
248
|
+
.to_string(),
|
|
271
249
|
parameters: serde_json::json!({
|
|
272
250
|
"type": "object",
|
|
273
|
-
"properties": {
|
|
251
|
+
"properties": {
|
|
252
|
+
"path": {
|
|
253
|
+
"type": "string",
|
|
254
|
+
"description": "Relative path to one file inside the sandbox."
|
|
255
|
+
}
|
|
256
|
+
},
|
|
274
257
|
"required": ["path"]
|
|
275
258
|
}),
|
|
276
259
|
},
|
|
@@ -279,12 +262,20 @@ impl AgentRouter {
|
|
|
279
262
|
r#type: "function".to_string(),
|
|
280
263
|
function: FunctionDefinition {
|
|
281
264
|
name: "list_directory".to_string(),
|
|
282
|
-
description:
|
|
265
|
+
description:
|
|
266
|
+
"List entries in one sandbox directory, optionally filtered by extension. Use this to discover candidate files before reading them."
|
|
267
|
+
.to_string(),
|
|
283
268
|
parameters: serde_json::json!({
|
|
284
269
|
"type": "object",
|
|
285
270
|
"properties": {
|
|
286
|
-
"path": {
|
|
287
|
-
|
|
271
|
+
"path": {
|
|
272
|
+
"type": "string",
|
|
273
|
+
"description": "Relative path to a directory inside the sandbox."
|
|
274
|
+
},
|
|
275
|
+
"extension": {
|
|
276
|
+
"type": "string",
|
|
277
|
+
"description": "Optional extension filter such as .cbl or .cpy."
|
|
278
|
+
}
|
|
288
279
|
},
|
|
289
280
|
"required": ["path"]
|
|
290
281
|
}),
|
|
@@ -294,13 +285,20 @@ impl AgentRouter {
|
|
|
294
285
|
r#type: "function".to_string(),
|
|
295
286
|
function: FunctionDefinition {
|
|
296
287
|
name: "search_in_file".to_string(),
|
|
297
|
-
description:
|
|
298
|
-
.
|
|
288
|
+
description:
|
|
289
|
+
"Search one sandbox file for a plain-text pattern, case-insensitive, and return matching lines with line numbers."
|
|
290
|
+
.to_string(),
|
|
299
291
|
parameters: serde_json::json!({
|
|
300
292
|
"type": "object",
|
|
301
293
|
"properties": {
|
|
302
|
-
"path": {
|
|
303
|
-
|
|
294
|
+
"path": {
|
|
295
|
+
"type": "string",
|
|
296
|
+
"description": "Relative path to one file inside the sandbox."
|
|
297
|
+
},
|
|
298
|
+
"pattern": {
|
|
299
|
+
"type": "string",
|
|
300
|
+
"description": "Plain-text pattern to search for."
|
|
301
|
+
}
|
|
304
302
|
},
|
|
305
303
|
"required": ["path", "pattern"]
|
|
306
304
|
}),
|
|
@@ -322,8 +320,15 @@ impl AgentRouter {
|
|
|
322
320
|
let _ = tx.send("\x01STATUS:Querying project database...".to_string());
|
|
323
321
|
match MemoryStore::open_or_create(sandbox_path) {
|
|
324
322
|
Err(e) => serde_json::json!({ "error": format!("DB error: {e}") }).to_string(),
|
|
325
|
-
Ok(store) => match store.
|
|
326
|
-
Ok(
|
|
323
|
+
Ok(store) => match store.project_index_is_empty() {
|
|
324
|
+
Ok(true) => serde_json::json!({
|
|
325
|
+
"error": "Project index is empty. Run /init before asking for indexed project data."
|
|
326
|
+
})
|
|
327
|
+
.to_string(),
|
|
328
|
+
Ok(false) => match store.query_readonly(sql) {
|
|
329
|
+
Ok(val) => val.to_string(),
|
|
330
|
+
Err(e) => serde_json::json!({ "error": e.to_string() }).to_string(),
|
|
331
|
+
},
|
|
327
332
|
Err(e) => serde_json::json!({ "error": e.to_string() }).to_string(),
|
|
328
333
|
},
|
|
329
334
|
}
|
|
@@ -338,10 +343,11 @@ impl AgentRouter {
|
|
|
338
343
|
Ok(content) => {
|
|
339
344
|
const MAX: usize = 120_000;
|
|
340
345
|
let body = if content.len() > MAX {
|
|
346
|
+
let preview = truncate_utf8_preview(&content, MAX);
|
|
341
347
|
format!(
|
|
342
348
|
"[truncated: first {MAX} of {} bytes]\n{}",
|
|
343
349
|
content.len(),
|
|
344
|
-
|
|
350
|
+
preview
|
|
345
351
|
)
|
|
346
352
|
} else {
|
|
347
353
|
content
|
|
@@ -423,4 +429,126 @@ impl AgentRouter {
|
|
|
423
429
|
}
|
|
424
430
|
})
|
|
425
431
|
}
|
|
432
|
+
|
|
433
|
+
/// Writes a file to the sandbox. If a buffer is provided, it is pushed to the buffer instead of writing physically.
|
|
434
|
+
/// Returns the resolved path or an error string.
|
|
435
|
+
pub(crate) fn write_file(
|
|
436
|
+
&self,
|
|
437
|
+
sandbox: &Path,
|
|
438
|
+
user_path: &str,
|
|
439
|
+
content: &str,
|
|
440
|
+
buffer: Option<&WriteBuffer>,
|
|
441
|
+
) -> Result<std::path::PathBuf, String> {
|
|
442
|
+
let full_path = validate_and_resolve_write(sandbox, user_path)?;
|
|
443
|
+
if let Some(buf) = buffer {
|
|
444
|
+
if let Ok(mut lock) = buf.lock() {
|
|
445
|
+
lock.push((full_path.clone(), content.to_string()));
|
|
446
|
+
} else {
|
|
447
|
+
return Err("Failed to lock write buffer".to_string());
|
|
448
|
+
}
|
|
449
|
+
} else {
|
|
450
|
+
write_validated_path(&full_path, content)?;
|
|
451
|
+
}
|
|
452
|
+
Ok(full_path)
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
/// Commits a list of buffered writes to disk.
|
|
456
|
+
pub(crate) fn commit_write_buffer(&self, buffer: &[WriteBufferEntry]) -> Result<(), String> {
|
|
457
|
+
for (full_path, content) in buffer {
|
|
458
|
+
write_validated_path(full_path, content)?;
|
|
459
|
+
}
|
|
460
|
+
Ok(())
|
|
461
|
+
}
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
#[cfg(test)]
|
|
465
|
+
mod tests {
|
|
466
|
+
use super::*;
|
|
467
|
+
use crate::agent::types::{FunctionCall, ToolCall};
|
|
468
|
+
use std::io::Write;
|
|
469
|
+
|
|
470
|
+
#[test]
|
|
471
|
+
fn readonly_tools_descriptions_include_usage_constraints() {
|
|
472
|
+
let tools = AgentRouter::build_readonly_tools();
|
|
473
|
+
|
|
474
|
+
let query_sqlite = tools
|
|
475
|
+
.iter()
|
|
476
|
+
.find(|t| t.function.name == "query_sqlite")
|
|
477
|
+
.unwrap();
|
|
478
|
+
assert!(query_sqlite.function.description.contains("SELECT"));
|
|
479
|
+
assert!(query_sqlite.function.description.contains("read-only"));
|
|
480
|
+
|
|
481
|
+
let read_file = tools
|
|
482
|
+
.iter()
|
|
483
|
+
.find(|t| t.function.name == "read_file")
|
|
484
|
+
.unwrap();
|
|
485
|
+
assert!(read_file.function.description.contains("sandbox"));
|
|
486
|
+
assert!(read_file.function.description.contains("full text"));
|
|
487
|
+
|
|
488
|
+
let list_directory = tools
|
|
489
|
+
.iter()
|
|
490
|
+
.find(|t| t.function.name == "list_directory")
|
|
491
|
+
.unwrap();
|
|
492
|
+
assert!(list_directory.function.description.contains("directory"));
|
|
493
|
+
assert!(list_directory.function.description.contains("extension"));
|
|
494
|
+
|
|
495
|
+
let search_in_file = tools
|
|
496
|
+
.iter()
|
|
497
|
+
.find(|t| t.function.name == "search_in_file")
|
|
498
|
+
.unwrap();
|
|
499
|
+
assert!(
|
|
500
|
+
search_in_file
|
|
501
|
+
.function
|
|
502
|
+
.description
|
|
503
|
+
.contains("case-insensitive")
|
|
504
|
+
);
|
|
505
|
+
assert!(search_in_file.function.description.contains("line"));
|
|
506
|
+
}
|
|
507
|
+
|
|
508
|
+
#[tokio::test]
|
|
509
|
+
async fn read_file_truncation_handles_utf8_boundaries_without_panicking() {
|
|
510
|
+
let dir = tempfile::tempdir().unwrap();
|
|
511
|
+
let path = dir.path().join("utf8.cbl");
|
|
512
|
+
let mut file = std::fs::File::create(&path).unwrap();
|
|
513
|
+
let content = format!("a{}", "你".repeat(40_100));
|
|
514
|
+
file.write_all(content.as_bytes()).unwrap();
|
|
515
|
+
|
|
516
|
+
let tc = ToolCall {
|
|
517
|
+
id: "1".to_string(),
|
|
518
|
+
r#type: "function".to_string(),
|
|
519
|
+
function: FunctionCall {
|
|
520
|
+
name: "read_file".to_string(),
|
|
521
|
+
arguments: serde_json::json!({ "path": "utf8.cbl" }).to_string(),
|
|
522
|
+
},
|
|
523
|
+
};
|
|
524
|
+
let (tx, _rx) = tokio::sync::mpsc::unbounded_channel();
|
|
525
|
+
|
|
526
|
+
let result = AgentRouter::execute_readonly_tool(&tc, dir.path(), &tx).await;
|
|
527
|
+
assert!(result.is_ok());
|
|
528
|
+
|
|
529
|
+
let result_json = result.unwrap();
|
|
530
|
+
let parsed: serde_json::Value = serde_json::from_str(&result_json).unwrap();
|
|
531
|
+
let body = parsed["content"].as_str().unwrap_or("");
|
|
532
|
+
assert!(body.contains("[truncated:"), "tool result: {}", result_json);
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
#[tokio::test]
|
|
536
|
+
async fn query_sqlite_returns_init_guidance_when_index_is_empty() {
|
|
537
|
+
let dir = tempfile::tempdir().unwrap();
|
|
538
|
+
let tc = ToolCall {
|
|
539
|
+
id: "2".to_string(),
|
|
540
|
+
r#type: "function".to_string(),
|
|
541
|
+
function: FunctionCall {
|
|
542
|
+
name: "query_sqlite".to_string(),
|
|
543
|
+
arguments: serde_json::json!({ "sql": "SELECT * FROM files" }).to_string(),
|
|
544
|
+
},
|
|
545
|
+
};
|
|
546
|
+
let (tx, _rx) = tokio::sync::mpsc::unbounded_channel();
|
|
547
|
+
|
|
548
|
+
let result = AgentRouter::execute_readonly_tool(&tc, dir.path(), &tx)
|
|
549
|
+
.await
|
|
550
|
+
.unwrap();
|
|
551
|
+
assert!(result.contains("/init"));
|
|
552
|
+
assert!(result.contains("index"));
|
|
553
|
+
}
|
|
426
554
|
}
|