cobolx 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,28 @@
1
1
  use super::AgentRouter;
2
+ use super::skills::{AgentKind, append_agent_skills};
2
3
  use super::types::merge_tool_call_deltas;
3
4
  use super::types::{
4
5
  ChatMessage, ChatRequest, FunctionDefinition, StreamOptions, Tool, ToolCall, Usage,
6
+ WriteBuffer, WriteBufferEntry,
5
7
  };
6
8
  use crate::memory::MemoryStore;
9
+ use crate::path_safety::{
10
+ validate_and_resolve_write, validate_sandbox_path as resolve_sandbox_path, write_validated_path,
11
+ };
7
12
  use std::path::Path;
8
13
 
14
+ fn truncate_utf8_preview(content: &str, max_bytes: usize) -> &str {
15
+ if content.len() <= max_bytes {
16
+ return content;
17
+ }
18
+
19
+ let mut end = max_bytes;
20
+ while end > 0 && !content.is_char_boundary(end) {
21
+ end -= 1;
22
+ }
23
+ &content[..end]
24
+ }
25
+
9
26
  impl AgentRouter {
10
27
  /// Validates `user_path` resolves inside `sandbox`.
11
28
  /// Returns the canonical absolute path or an error string.
@@ -13,70 +30,7 @@ impl AgentRouter {
13
30
  sandbox: &Path,
14
31
  user_path: &str,
15
32
  ) -> Result<std::path::PathBuf, String> {
16
- let normalized = if std::path::Path::new(user_path).is_absolute() {
17
- user_path.to_string()
18
- } else {
19
- user_path.trim_start_matches(['/', '\\']).to_string()
20
- };
21
-
22
- let candidate = if std::path::Path::new(&normalized).is_absolute() {
23
- std::path::PathBuf::from(&normalized)
24
- } else {
25
- sandbox.join(&normalized)
26
- };
27
-
28
- let sandbox_canon = sandbox
29
- .canonicalize()
30
- .map_err(|e| format!("Sandbox path error: {e}"))?;
31
-
32
- let clean_canon = |p: &Path| -> String {
33
- let s = p.to_string_lossy().into_owned();
34
- let s_stripped = if let Some(stripped) = s.strip_prefix(r"\\?\") {
35
- stripped.to_string()
36
- } else {
37
- s
38
- };
39
- s_stripped.replace('\\', "/").to_lowercase()
40
- };
41
-
42
- let sandbox_canon_str = clean_canon(&sandbox_canon);
43
-
44
- let mut existing = candidate.clone();
45
- let mut suffix = std::path::PathBuf::new();
46
- loop {
47
- if existing.exists() {
48
- break;
49
- }
50
- if let Some(parent) = existing.parent() {
51
- if let Some(file_name) = existing.file_name() {
52
- suffix = std::path::Path::new(file_name).join(&suffix);
53
- existing = parent.to_path_buf();
54
- } else {
55
- break;
56
- }
57
- } else {
58
- break;
59
- }
60
- }
61
-
62
- let canon_existing = existing
63
- .canonicalize()
64
- .map_err(|e| format!("Path resolution error: {e}"))?;
65
- let resolved = canon_existing.join(&suffix);
66
- let resolved_str = clean_canon(&resolved);
67
-
68
- let is_sub = resolved_str == sandbox_canon_str
69
- || (resolved_str.starts_with(&sandbox_canon_str)
70
- && (sandbox_canon_str.ends_with('/')
71
- || resolved_str.chars().nth(sandbox_canon_str.chars().count()) == Some('/')));
72
-
73
- if !is_sub {
74
- return Err(format!(
75
- "Access denied: '{}' is outside the sandbox directory",
76
- user_path
77
- ));
78
- }
79
- Ok(resolved)
33
+ resolve_sandbox_path(sandbox, user_path)
80
34
  }
81
35
 
82
36
  /// Phase 1 — silent read-only data retrieval (DB + files).
@@ -110,7 +64,7 @@ impl AgentRouter {
110
64
 
111
65
  if let Some(first_msg) = messages.get_mut(0) {
112
66
  if first_msg.role == "system" {
113
- first_msg.content = Some(format!(
67
+ let mut system_prompt = format!(
114
68
  "You are the COBOLX Filesystem Retrieval Agent. Your ONLY job is to collect \
115
69
  raw data about COBOL files using the tools below. Do NOT explain or interpret \
116
70
  — just gather and output a structured data summary.\n\
@@ -120,7 +74,9 @@ impl AgentRouter {
120
74
  \n\
121
75
  WORKFLOW:\n\
122
76
  1. query_sqlite: SELECT id, path, kind FROM files\n\
123
- 2. query_sqlite: get programs, data_items, call_edges, copybook_uses\n\
77
+ 2. query_sqlite: get programs, data_items, call_edges, copybook_uses, \
78
+ program_features, code_blocks, external_ops, identifiers, literals, \
79
+ copybook_features\n\
124
80
  3. read_file: raw source text only when needed\n\
125
81
  4. list_directory / search_in_file: locate files if needed\n\
126
82
  \n\
@@ -133,8 +89,20 @@ impl AgentRouter {
133
89
  2. programs(id, name, file_id)\n\
134
90
  3. copybook_uses(id, from_file_id, copybook_name, resolve_status)\n\
135
91
  4. call_edges(id, caller_program_id, callee_name, kind)\n\
136
- 5. data_items(id, program_id, name, level, parent_name, pic, usage_clause, section)"
137
- ));
92
+ 5. data_items(id, program_id, name, level, parent_name, pic, usage_clause, section)\n\
93
+ 6. program_features(program_id, incoming_call_count, outgoing_call_count, is_entrypoint, paragraph_count, external_op_count)\n\
94
+ 7. code_blocks(program_id, name, kind, parent_section, sequence_no, statement_count)\n\
95
+ 8. external_ops(program_id, kind, verb, target)\n\
96
+ 9. identifiers(program_id, kind, value, occurrences)\n\
97
+ 10. literals(program_id, kind, value, occurrences)\n\
98
+ 11. copybook_features(copybook_file_id, copybook_name, used_by_program_count, contains_header_fields, contains_error_fields)"
99
+ );
100
+ append_agent_skills(
101
+ &mut system_prompt,
102
+ sandbox_path,
103
+ AgentKind::FilesystemRetrieval,
104
+ )?;
105
+ first_msg.content = Some(system_prompt);
138
106
  }
139
107
  }
140
108
 
@@ -253,12 +221,20 @@ impl AgentRouter {
253
221
  function: FunctionDefinition {
254
222
  name: "query_sqlite".to_string(),
255
223
  description:
256
- "Run a read-only SELECT query against the project SQLite database \
257
- (files, programs, data_items, call_edges, copybook_uses)."
224
+ "Run one read-only SELECT query against the indexed project SQLite database. \
225
+ Use this for project facts from files, programs, data_items, call_edges, \
226
+ copybook_uses, program_features, code_blocks, external_ops, identifiers, \
227
+ literals, or copybook_features. Do not use it for writes, DDL, or guessed \
228
+ values."
258
229
  .to_string(),
259
230
  parameters: serde_json::json!({
260
231
  "type": "object",
261
- "properties": { "sql": { "type": "string" } },
232
+ "properties": {
233
+ "sql": {
234
+ "type": "string",
235
+ "description": "A single SQLite SELECT statement that reads indexed project data."
236
+ }
237
+ },
262
238
  "required": ["sql"]
263
239
  }),
264
240
  },
@@ -267,10 +243,17 @@ impl AgentRouter {
267
243
  r#type: "function".to_string(),
268
244
  function: FunctionDefinition {
269
245
  name: "read_file".to_string(),
270
- description: "Read the full text of a sandbox file.".to_string(),
246
+ description:
247
+ "Read the full text of one sandbox file. Use this when exact source content matters, and pass only a path inside the sandbox."
248
+ .to_string(),
271
249
  parameters: serde_json::json!({
272
250
  "type": "object",
273
- "properties": { "path": { "type": "string" } },
251
+ "properties": {
252
+ "path": {
253
+ "type": "string",
254
+ "description": "Relative path to one file inside the sandbox."
255
+ }
256
+ },
274
257
  "required": ["path"]
275
258
  }),
276
259
  },
@@ -279,12 +262,20 @@ impl AgentRouter {
279
262
  r#type: "function".to_string(),
280
263
  function: FunctionDefinition {
281
264
  name: "list_directory".to_string(),
282
- description: "List entries in a sandbox directory.".to_string(),
265
+ description:
266
+ "List entries in one sandbox directory, optionally filtered by extension. Use this to discover candidate files before reading them."
267
+ .to_string(),
283
268
  parameters: serde_json::json!({
284
269
  "type": "object",
285
270
  "properties": {
286
- "path": { "type": "string" },
287
- "extension": { "type": "string" }
271
+ "path": {
272
+ "type": "string",
273
+ "description": "Relative path to a directory inside the sandbox."
274
+ },
275
+ "extension": {
276
+ "type": "string",
277
+ "description": "Optional extension filter such as .cbl or .cpy."
278
+ }
288
279
  },
289
280
  "required": ["path"]
290
281
  }),
@@ -294,13 +285,20 @@ impl AgentRouter {
294
285
  r#type: "function".to_string(),
295
286
  function: FunctionDefinition {
296
287
  name: "search_in_file".to_string(),
297
- description: "Search for a text pattern (case-insensitive) in a file."
298
- .to_string(),
288
+ description:
289
+ "Search one sandbox file for a plain-text pattern, case-insensitive, and return matching lines with line numbers."
290
+ .to_string(),
299
291
  parameters: serde_json::json!({
300
292
  "type": "object",
301
293
  "properties": {
302
- "path": { "type": "string" },
303
- "pattern": { "type": "string" }
294
+ "path": {
295
+ "type": "string",
296
+ "description": "Relative path to one file inside the sandbox."
297
+ },
298
+ "pattern": {
299
+ "type": "string",
300
+ "description": "Plain-text pattern to search for."
301
+ }
304
302
  },
305
303
  "required": ["path", "pattern"]
306
304
  }),
@@ -322,8 +320,15 @@ impl AgentRouter {
322
320
  let _ = tx.send("\x01STATUS:Querying project database...".to_string());
323
321
  match MemoryStore::open_or_create(sandbox_path) {
324
322
  Err(e) => serde_json::json!({ "error": format!("DB error: {e}") }).to_string(),
325
- Ok(store) => match store.query_readonly(sql) {
326
- Ok(val) => val.to_string(),
323
+ Ok(store) => match store.project_index_is_empty() {
324
+ Ok(true) => serde_json::json!({
325
+ "error": "Project index is empty. Run /init before asking for indexed project data."
326
+ })
327
+ .to_string(),
328
+ Ok(false) => match store.query_readonly(sql) {
329
+ Ok(val) => val.to_string(),
330
+ Err(e) => serde_json::json!({ "error": e.to_string() }).to_string(),
331
+ },
327
332
  Err(e) => serde_json::json!({ "error": e.to_string() }).to_string(),
328
333
  },
329
334
  }
@@ -338,10 +343,11 @@ impl AgentRouter {
338
343
  Ok(content) => {
339
344
  const MAX: usize = 120_000;
340
345
  let body = if content.len() > MAX {
346
+ let preview = truncate_utf8_preview(&content, MAX);
341
347
  format!(
342
348
  "[truncated: first {MAX} of {} bytes]\n{}",
343
349
  content.len(),
344
- &content[..MAX]
350
+ preview
345
351
  )
346
352
  } else {
347
353
  content
@@ -423,4 +429,126 @@ impl AgentRouter {
423
429
  }
424
430
  })
425
431
  }
432
+
433
+ /// Writes a file to the sandbox. If a buffer is provided, it is pushed to the buffer instead of writing physically.
434
+ /// Returns the resolved path or an error string.
435
+ pub(crate) fn write_file(
436
+ &self,
437
+ sandbox: &Path,
438
+ user_path: &str,
439
+ content: &str,
440
+ buffer: Option<&WriteBuffer>,
441
+ ) -> Result<std::path::PathBuf, String> {
442
+ let full_path = validate_and_resolve_write(sandbox, user_path)?;
443
+ if let Some(buf) = buffer {
444
+ if let Ok(mut lock) = buf.lock() {
445
+ lock.push((full_path.clone(), content.to_string()));
446
+ } else {
447
+ return Err("Failed to lock write buffer".to_string());
448
+ }
449
+ } else {
450
+ write_validated_path(&full_path, content)?;
451
+ }
452
+ Ok(full_path)
453
+ }
454
+
455
+ /// Commits a list of buffered writes to disk.
456
+ pub(crate) fn commit_write_buffer(&self, buffer: &[WriteBufferEntry]) -> Result<(), String> {
457
+ for (full_path, content) in buffer {
458
+ write_validated_path(full_path, content)?;
459
+ }
460
+ Ok(())
461
+ }
462
+ }
463
+
464
+ #[cfg(test)]
465
+ mod tests {
466
+ use super::*;
467
+ use crate::agent::types::{FunctionCall, ToolCall};
468
+ use std::io::Write;
469
+
470
+ #[test]
471
+ fn readonly_tools_descriptions_include_usage_constraints() {
472
+ let tools = AgentRouter::build_readonly_tools();
473
+
474
+ let query_sqlite = tools
475
+ .iter()
476
+ .find(|t| t.function.name == "query_sqlite")
477
+ .unwrap();
478
+ assert!(query_sqlite.function.description.contains("SELECT"));
479
+ assert!(query_sqlite.function.description.contains("read-only"));
480
+
481
+ let read_file = tools
482
+ .iter()
483
+ .find(|t| t.function.name == "read_file")
484
+ .unwrap();
485
+ assert!(read_file.function.description.contains("sandbox"));
486
+ assert!(read_file.function.description.contains("full text"));
487
+
488
+ let list_directory = tools
489
+ .iter()
490
+ .find(|t| t.function.name == "list_directory")
491
+ .unwrap();
492
+ assert!(list_directory.function.description.contains("directory"));
493
+ assert!(list_directory.function.description.contains("extension"));
494
+
495
+ let search_in_file = tools
496
+ .iter()
497
+ .find(|t| t.function.name == "search_in_file")
498
+ .unwrap();
499
+ assert!(
500
+ search_in_file
501
+ .function
502
+ .description
503
+ .contains("case-insensitive")
504
+ );
505
+ assert!(search_in_file.function.description.contains("line"));
506
+ }
507
+
508
+ #[tokio::test]
509
+ async fn read_file_truncation_handles_utf8_boundaries_without_panicking() {
510
+ let dir = tempfile::tempdir().unwrap();
511
+ let path = dir.path().join("utf8.cbl");
512
+ let mut file = std::fs::File::create(&path).unwrap();
513
+ let content = format!("a{}", "你".repeat(40_100));
514
+ file.write_all(content.as_bytes()).unwrap();
515
+
516
+ let tc = ToolCall {
517
+ id: "1".to_string(),
518
+ r#type: "function".to_string(),
519
+ function: FunctionCall {
520
+ name: "read_file".to_string(),
521
+ arguments: serde_json::json!({ "path": "utf8.cbl" }).to_string(),
522
+ },
523
+ };
524
+ let (tx, _rx) = tokio::sync::mpsc::unbounded_channel();
525
+
526
+ let result = AgentRouter::execute_readonly_tool(&tc, dir.path(), &tx).await;
527
+ assert!(result.is_ok());
528
+
529
+ let result_json = result.unwrap();
530
+ let parsed: serde_json::Value = serde_json::from_str(&result_json).unwrap();
531
+ let body = parsed["content"].as_str().unwrap_or("");
532
+ assert!(body.contains("[truncated:"), "tool result: {}", result_json);
533
+ }
534
+
535
+ #[tokio::test]
536
+ async fn query_sqlite_returns_init_guidance_when_index_is_empty() {
537
+ let dir = tempfile::tempdir().unwrap();
538
+ let tc = ToolCall {
539
+ id: "2".to_string(),
540
+ r#type: "function".to_string(),
541
+ function: FunctionCall {
542
+ name: "query_sqlite".to_string(),
543
+ arguments: serde_json::json!({ "sql": "SELECT * FROM files" }).to_string(),
544
+ },
545
+ };
546
+ let (tx, _rx) = tokio::sync::mpsc::unbounded_channel();
547
+
548
+ let result = AgentRouter::execute_readonly_tool(&tc, dir.path(), &tx)
549
+ .await
550
+ .unwrap();
551
+ assert!(result.contains("/init"));
552
+ assert!(result.contains("index"));
553
+ }
426
554
  }