cobolx 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/agent.rs ADDED
@@ -0,0 +1 @@
1
+ pub mod client;
@@ -0,0 +1,71 @@
1
+ use crate::cobol::scanner::{CobolFileEntry, CobolFileType};
2
+ use std::collections::HashMap;
3
+ use std::path::{Path, PathBuf};
4
+
5
+ pub(crate) fn build_copybook_index(files: &[CobolFileEntry]) -> HashMap<String, Vec<PathBuf>> {
6
+ let mut index = HashMap::<String, Vec<PathBuf>>::new();
7
+ for file in files {
8
+ if file.file_type != CobolFileType::Copybook {
9
+ continue;
10
+ }
11
+ if let Some(stem) = file.path.file_stem().and_then(|s| s.to_str()) {
12
+ index
13
+ .entry(stem.to_ascii_uppercase())
14
+ .or_default()
15
+ .push(file.path.clone());
16
+ }
17
+ }
18
+ for paths in index.values_mut() {
19
+ paths.sort();
20
+ }
21
+ index
22
+ }
23
+
24
+ pub(crate) fn resolve_copybook(
25
+ root: &Path,
26
+ from_file: &Path,
27
+ name: &str,
28
+ copybook_index: &HashMap<String, Vec<PathBuf>>,
29
+ ) -> Option<PathBuf> {
30
+ let mut dirs = Vec::with_capacity(2);
31
+ if let Some(parent) = from_file.parent() {
32
+ dirs.push(parent.to_path_buf());
33
+ }
34
+ dirs.push(root.to_path_buf());
35
+
36
+ for dir in dirs {
37
+ for candidate in candidate_copybook_names(name) {
38
+ let path = dir.join(&candidate);
39
+ if path.is_file() {
40
+ return Some(path);
41
+ }
42
+ if let Some(found) = find_case_insensitive(&dir, &candidate) {
43
+ return Some(found);
44
+ }
45
+ }
46
+ }
47
+
48
+ copybook_index
49
+ .get(&name.to_ascii_uppercase())
50
+ .and_then(|paths| paths.first().cloned())
51
+ }
52
+
53
+ fn candidate_copybook_names(name: &str) -> Vec<String> {
54
+ if Path::new(name).extension().is_some() {
55
+ vec![name.to_string()]
56
+ } else {
57
+ vec![name.to_string(), format!("{}.cpy", name)]
58
+ }
59
+ }
60
+
61
+ fn find_case_insensitive(dir: &Path, file_name: &str) -> Option<PathBuf> {
62
+ let target = file_name.to_ascii_uppercase();
63
+ let entries = std::fs::read_dir(dir).ok()?;
64
+ for entry in entries.flatten() {
65
+ let name = entry.file_name();
66
+ if name.to_string_lossy().to_ascii_uppercase() == target {
67
+ return Some(entry.path());
68
+ }
69
+ }
70
+ None
71
+ }
@@ -0,0 +1,290 @@
1
+ use crate::cobol::copybook::resolve_copybook;
2
+ use crate::cobol::lexer::{clean_name, logical_lines, tokenize};
3
+ use crate::cobol::model::{LogicalLine, ParsedDataItem, Token};
4
+ use std::collections::HashMap;
5
+ use std::path::{Path, PathBuf};
6
+
7
+ #[derive(Debug, Default)]
8
+ struct DataParseState {
9
+ in_data_division: bool,
10
+ section: Option<String>,
11
+ parent_stack: Vec<(u16, String)>,
12
+ items: Vec<ParsedDataItem>,
13
+ }
14
+
15
+ pub(crate) fn collect_data_items(
16
+ root: &Path,
17
+ path: &Path,
18
+ copybook_index: &HashMap<String, Vec<PathBuf>>,
19
+ depth: usize,
20
+ ) -> std::io::Result<Vec<ParsedDataItem>> {
21
+ let mut state = DataParseState::default();
22
+ parse_data_file(root, path, copybook_index, depth, false, &mut state)?;
23
+ Ok(state.items)
24
+ }
25
+
26
+ fn parse_data_file(
27
+ root: &Path,
28
+ path: &Path,
29
+ copybook_index: &HashMap<String, Vec<PathBuf>>,
30
+ depth: usize,
31
+ is_copybook: bool,
32
+ state: &mut DataParseState,
33
+ ) -> std::io::Result<()> {
34
+ if depth > 16 {
35
+ return Ok(());
36
+ }
37
+
38
+ let content = std::fs::read_to_string(path)?;
39
+ let mut copybook_local_state;
40
+ let state = if is_copybook && !state.in_data_division {
41
+ copybook_local_state = DataParseState {
42
+ in_data_division: true,
43
+ section: None,
44
+ parent_stack: Vec::new(),
45
+ items: Vec::new(),
46
+ };
47
+ &mut copybook_local_state
48
+ } else {
49
+ state
50
+ };
51
+
52
+ for line in logical_lines(&content) {
53
+ let tokens = tokenize(&line.text);
54
+ if tokens.is_empty() {
55
+ continue;
56
+ }
57
+
58
+ if has_two_tokens(&tokens, "DATA", "DIVISION") {
59
+ state.in_data_division = true;
60
+ state.parent_stack.clear();
61
+ continue;
62
+ }
63
+ if has_two_tokens(&tokens, "PROCEDURE", "DIVISION") {
64
+ if !is_copybook {
65
+ break;
66
+ }
67
+ continue;
68
+ }
69
+ if !state.in_data_division && !is_copybook {
70
+ continue;
71
+ }
72
+ if is_section_line(&tokens) {
73
+ state.section = Some(tokens[0].text.clone());
74
+ state.parent_stack.clear();
75
+ continue;
76
+ }
77
+ if tokens[0].text == "COPY" {
78
+ if let Some(copy_name) = tokens.get(1).map(|t| clean_name(&t.text)) {
79
+ if let Some(copy_path) = resolve_copybook(root, path, &copy_name, copybook_index) {
80
+ parse_data_file(root, &copy_path, copybook_index, depth + 1, true, state)?;
81
+ }
82
+ }
83
+ continue;
84
+ }
85
+
86
+ if let Some(item) = parse_data_item_line(path, &line, &tokens, state) {
87
+ state.items.push(item);
88
+ }
89
+ }
90
+
91
+ Ok(())
92
+ }
93
+
94
+ fn parse_data_item_line(
95
+ path: &Path,
96
+ line: &LogicalLine,
97
+ tokens: &[Token],
98
+ state: &mut DataParseState,
99
+ ) -> Option<ParsedDataItem> {
100
+ let level = tokens.first()?.text.parse::<u16>().ok()?;
101
+ if !is_data_level(level) {
102
+ return None;
103
+ }
104
+ let name = tokens.get(1).map(|t| clean_name(&t.text))?;
105
+ if name.is_empty() {
106
+ return None;
107
+ }
108
+
109
+ while state
110
+ .parent_stack
111
+ .last()
112
+ .is_some_and(|(parent_level, _)| *parent_level >= level)
113
+ {
114
+ state.parent_stack.pop();
115
+ }
116
+ let parent_name = if matches!(level, 1 | 66 | 77) {
117
+ None
118
+ } else {
119
+ state.parent_stack.last().map(|(_, name)| name.clone())
120
+ };
121
+
122
+ let pic = extract_clause_text(
123
+ &line.text,
124
+ tokens,
125
+ &["PIC", "PICTURE"],
126
+ &[
127
+ "USAGE",
128
+ "OCCURS",
129
+ "REDEFINES",
130
+ "VALUE",
131
+ "VALUES",
132
+ "SIGN",
133
+ "SYNC",
134
+ "SYNCHRONIZED",
135
+ "JUST",
136
+ "JUSTIFIED",
137
+ "DISPLAY",
138
+ "BINARY",
139
+ "COMP",
140
+ "COMP-1",
141
+ "COMP-2",
142
+ "COMP-3",
143
+ "COMP-4",
144
+ "COMP-5",
145
+ "COMPUTATIONAL",
146
+ "COMPUTATIONAL-1",
147
+ "COMPUTATIONAL-2",
148
+ "COMPUTATIONAL-3",
149
+ "COMPUTATIONAL-4",
150
+ "COMPUTATIONAL-5",
151
+ "PACKED-DECIMAL",
152
+ "INDEX",
153
+ "POINTER",
154
+ "NATIONAL",
155
+ ],
156
+ );
157
+ let usage_clause = extract_usage_clause(&line.text, tokens);
158
+ let occurs = extract_occurs(tokens);
159
+ let redefines = extract_next_name(tokens, "REDEFINES");
160
+
161
+ if !matches!(level, 66 | 88) {
162
+ state.parent_stack.push((level, name.clone()));
163
+ }
164
+
165
+ Some(ParsedDataItem {
166
+ source_path: path.to_path_buf(),
167
+ name,
168
+ level,
169
+ parent_name,
170
+ pic,
171
+ usage_clause,
172
+ occurs,
173
+ redefines,
174
+ section: state.section.clone(),
175
+ byte_offset: None,
176
+ byte_size: None,
177
+ storage_kind: None,
178
+ layout_status: None,
179
+ start_offset: line.start_offset + tokens[0].start,
180
+ byte_len: line.byte_len,
181
+ })
182
+ }
183
+
184
+ fn is_data_level(level: u16) -> bool {
185
+ (1..=49).contains(&level) || matches!(level, 66 | 77 | 88)
186
+ }
187
+
188
+ fn has_two_tokens(tokens: &[Token], first: &str, second: &str) -> bool {
189
+ tokens.len() >= 2 && tokens[0].text == first && tokens[1].text == second
190
+ }
191
+
192
+ fn is_section_line(tokens: &[Token]) -> bool {
193
+ tokens.len() >= 2
194
+ && tokens[1].text == "SECTION"
195
+ && matches!(
196
+ tokens[0].text.as_str(),
197
+ "FILE" | "WORKING-STORAGE" | "LOCAL-STORAGE" | "LINKAGE"
198
+ )
199
+ }
200
+
201
+ fn extract_clause_text(
202
+ line: &str,
203
+ tokens: &[Token],
204
+ names: &[&str],
205
+ stop_keywords: &[&str],
206
+ ) -> Option<String> {
207
+ let idx = tokens
208
+ .iter()
209
+ .position(|t| names.iter().any(|name| t.text == *name))?;
210
+ let mut start_idx = idx + 1;
211
+ if tokens.get(start_idx).is_some_and(|t| t.text == "IS") {
212
+ start_idx += 1;
213
+ }
214
+ let start = tokens.get(start_idx)?.start;
215
+ let end = tokens[start_idx..]
216
+ .iter()
217
+ .find(|t| stop_keywords.iter().any(|keyword| t.text == *keyword))
218
+ .map(|t| t.start)
219
+ .unwrap_or_else(|| line.len());
220
+ Some(
221
+ line[start..end]
222
+ .trim()
223
+ .trim_end_matches('.')
224
+ .trim()
225
+ .to_ascii_uppercase(),
226
+ )
227
+ .filter(|s| !s.is_empty())
228
+ }
229
+
230
+ fn extract_usage_clause(line: &str, tokens: &[Token]) -> Option<String> {
231
+ let explicit = extract_clause_text(
232
+ line,
233
+ tokens,
234
+ &["USAGE"],
235
+ &[
236
+ "OCCURS",
237
+ "REDEFINES",
238
+ "VALUE",
239
+ "VALUES",
240
+ "SIGN",
241
+ "SYNC",
242
+ "SYNCHRONIZED",
243
+ "JUST",
244
+ "JUSTIFIED",
245
+ ],
246
+ );
247
+ if explicit.is_some() {
248
+ return explicit;
249
+ }
250
+
251
+ tokens
252
+ .iter()
253
+ .find(|t| {
254
+ matches!(
255
+ t.text.as_str(),
256
+ "DISPLAY"
257
+ | "BINARY"
258
+ | "COMP"
259
+ | "COMP-1"
260
+ | "COMP-2"
261
+ | "COMP-3"
262
+ | "COMP-4"
263
+ | "COMP-5"
264
+ | "COMPUTATIONAL"
265
+ | "COMPUTATIONAL-1"
266
+ | "COMPUTATIONAL-2"
267
+ | "COMPUTATIONAL-3"
268
+ | "COMPUTATIONAL-4"
269
+ | "COMPUTATIONAL-5"
270
+ | "PACKED-DECIMAL"
271
+ | "INDEX"
272
+ | "POINTER"
273
+ | "NATIONAL"
274
+ )
275
+ })
276
+ .map(|t| t.text.clone())
277
+ }
278
+
279
+ fn extract_occurs(tokens: &[Token]) -> Option<i64> {
280
+ let idx = tokens.iter().position(|t| t.text == "OCCURS")?;
281
+ tokens
282
+ .iter()
283
+ .skip(idx + 1)
284
+ .find_map(|t| t.text.parse::<i64>().ok())
285
+ }
286
+
287
+ fn extract_next_name(tokens: &[Token], keyword: &str) -> Option<String> {
288
+ let idx = tokens.iter().position(|t| t.text == keyword)?;
289
+ tokens.get(idx + 1).map(|t| clean_name(&t.text))
290
+ }
@@ -0,0 +1,256 @@
1
+ use crate::cobol::copybook::{build_copybook_index, resolve_copybook};
2
+ use crate::cobol::data_parser::collect_data_items;
3
+ use crate::cobol::layout::compute_physical_layout;
4
+ pub use crate::cobol::model::{
5
+ CallKind, CallSummary, CopybookSummary, IndexReport, ProgramSummary,
6
+ };
7
+ use crate::cobol::scanner::{CobolFileType, scan_sandbox};
8
+ use crate::cobol::source_parser::parse_source_file;
9
+ use crate::memory::MemoryStore;
10
+ use rusqlite::params;
11
+ use std::collections::HashMap;
12
+ use std::error::Error;
13
+ use std::path::Path;
14
+ use std::time::UNIX_EPOCH;
15
+
16
+ type IndexResult<T> = Result<T, Box<dyn Error + Send + Sync>>;
17
+
18
+ pub fn index_sandbox(root: &Path, store: &mut MemoryStore) -> IndexResult<IndexReport> {
19
+ let files = scan_sandbox(root)?;
20
+ let source_count = files
21
+ .iter()
22
+ .filter(|f| f.file_type == CobolFileType::Source)
23
+ .count();
24
+ let copybook_count = files.len() - source_count;
25
+
26
+ let parsed = files
27
+ .iter()
28
+ .filter(|f| f.file_type == CobolFileType::Source)
29
+ .map(|f| parse_source_file(&f.path))
30
+ .collect::<Result<Vec<_>, _>>()?;
31
+
32
+ let tx = store.connection_mut().transaction()?;
33
+ tx.execute_batch(
34
+ r#"
35
+ DELETE FROM call_edges;
36
+ DELETE FROM copybook_uses;
37
+ DELETE FROM data_items;
38
+ DELETE FROM programs;
39
+ DELETE FROM files;
40
+ "#,
41
+ )?;
42
+
43
+ let mut file_ids = HashMap::with_capacity(files.len());
44
+ for file in &files {
45
+ let rel = relative_path(root, &file.path);
46
+ let kind = match file.file_type {
47
+ CobolFileType::Source => "source",
48
+ CobolFileType::Copybook => "copybook",
49
+ };
50
+ tx.execute(
51
+ "INSERT INTO files(path, kind, size_bytes, mtime_unix, sha256) VALUES (?1, ?2, ?3, ?4, ?5)",
52
+ params![
53
+ rel,
54
+ kind,
55
+ file.size_bytes as i64,
56
+ mtime_unix(&file.path),
57
+ Option::<Vec<u8>>::None
58
+ ],
59
+ )?;
60
+ file_ids.insert(file.path.clone(), tx.last_insert_rowid());
61
+ }
62
+
63
+ let mut program_ids = HashMap::new();
64
+ let mut program_file = HashMap::new();
65
+ for file in &parsed {
66
+ let Some(file_id) = file_ids.get(&file.path).copied() else {
67
+ continue;
68
+ };
69
+ for program in &file.programs {
70
+ tx.execute(
71
+ "INSERT INTO programs(name, file_id, start_offset, byte_len) VALUES (?1, ?2, ?3, ?4)",
72
+ params![
73
+ program.name,
74
+ file_id,
75
+ program.start_offset as i64,
76
+ program.byte_len as i64
77
+ ],
78
+ )?;
79
+ let id = tx.last_insert_rowid();
80
+ program_ids.insert(program.name.clone(), id);
81
+ program_file.insert(program.name.clone(), file.path.clone());
82
+ }
83
+ }
84
+
85
+ let mut report_programs = HashMap::<String, ProgramSummary>::new();
86
+ for (name, path) in &program_file {
87
+ report_programs.insert(
88
+ name.clone(),
89
+ ProgramSummary {
90
+ name: name.clone(),
91
+ path: path.clone(),
92
+ copybooks: Vec::new(),
93
+ calls: Vec::new(),
94
+ data_items: 0,
95
+ },
96
+ );
97
+ }
98
+
99
+ let copybook_index = build_copybook_index(&files);
100
+ let mut copybook_uses = 0;
101
+ let mut resolved_copybooks = 0;
102
+ let mut unresolved_copybooks = Vec::new();
103
+ let mut static_calls = 0;
104
+ let mut dynamic_calls = 0;
105
+ let mut data_items = 0usize;
106
+
107
+ for file in &parsed {
108
+ let Some(from_file_id) = file_ids.get(&file.path).copied() else {
109
+ continue;
110
+ };
111
+ let default_program = file.programs.first().map(|p| p.name.as_str());
112
+
113
+ for copy in &file.copies {
114
+ copybook_uses += 1;
115
+ let resolved = resolve_copybook(root, &file.path, &copy.name, &copybook_index);
116
+ if resolved.is_some() {
117
+ resolved_copybooks += 1;
118
+ } else {
119
+ unresolved_copybooks.push(format!(
120
+ "{} from {}",
121
+ copy.name,
122
+ relative_path(root, &file.path)
123
+ ));
124
+ }
125
+
126
+ let resolved_file_id = resolved.as_ref().and_then(|p| file_ids.get(p).copied());
127
+ tx.execute(
128
+ "INSERT INTO copybook_uses(from_file_id, copybook_name, start_offset, byte_len, resolved_file_id, resolve_status, replacing_text) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
129
+ params![
130
+ from_file_id,
131
+ copy.name,
132
+ copy.start_offset as i64,
133
+ copy.byte_len as i64,
134
+ resolved_file_id,
135
+ if resolved_file_id.is_some() { "resolved" } else { "missing" },
136
+ copy.replacing_text,
137
+ ],
138
+ )?;
139
+
140
+ if let Some(program_name) = default_program {
141
+ if let Some(summary) = report_programs.get_mut(program_name) {
142
+ summary.copybooks.push(CopybookSummary {
143
+ name: copy.name.clone(),
144
+ resolved_path: resolved,
145
+ has_replacing: copy.replacing_text.is_some(),
146
+ });
147
+ }
148
+ }
149
+ }
150
+
151
+ if let Some(program_name) = default_program {
152
+ let Some(program_id) = program_ids.get(program_name).copied() else {
153
+ continue;
154
+ };
155
+ let mut expanded_items = collect_data_items(root, &file.path, &copybook_index, 0)?;
156
+ compute_physical_layout(&mut expanded_items);
157
+ data_items += expanded_items.len();
158
+
159
+ for item in expanded_items {
160
+ tx.execute(
161
+ "INSERT INTO data_items(program_id, source_file_id, name, level, parent_name, pic, usage_clause, occurs, redefines, section, byte_offset, byte_size, storage_kind, layout_status, start_offset, byte_len) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16)",
162
+ params![
163
+ program_id,
164
+ file_ids.get(&item.source_path).copied(),
165
+ item.name,
166
+ item.level as i64,
167
+ item.parent_name,
168
+ item.pic,
169
+ item.usage_clause,
170
+ item.occurs,
171
+ item.redefines,
172
+ item.section,
173
+ item.byte_offset,
174
+ item.byte_size,
175
+ item.storage_kind,
176
+ item.layout_status,
177
+ item.start_offset as i64,
178
+ item.byte_len as i64,
179
+ ],
180
+ )?;
181
+ if let Some(summary) = report_programs.get_mut(program_name) {
182
+ summary.data_items += 1;
183
+ }
184
+ }
185
+ }
186
+
187
+ for call in &file.calls {
188
+ match call.kind {
189
+ CallKind::Static => static_calls += 1,
190
+ CallKind::Dynamic => dynamic_calls += 1,
191
+ }
192
+ let caller_name = call.caller_name.as_deref().or(default_program);
193
+ let Some(caller_name) = caller_name else {
194
+ continue;
195
+ };
196
+ let Some(caller_program_id) = program_ids.get(caller_name).copied() else {
197
+ continue;
198
+ };
199
+ tx.execute(
200
+ "INSERT INTO call_edges(caller_program_id, callee_name, start_offset, byte_len, kind, using_count) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
201
+ params![
202
+ caller_program_id,
203
+ call.target,
204
+ call.start_offset as i64,
205
+ call.byte_len as i64,
206
+ call.kind.as_str(),
207
+ call.using_count as i64,
208
+ ],
209
+ )?;
210
+
211
+ if let Some(summary) = report_programs.get_mut(caller_name) {
212
+ summary.calls.push(CallSummary {
213
+ target: call.target.clone(),
214
+ kind: call.kind,
215
+ using_count: call.using_count,
216
+ });
217
+ }
218
+ }
219
+ }
220
+
221
+ tx.commit()?;
222
+
223
+ let mut programs = report_programs.into_values().collect::<Vec<_>>();
224
+ programs.sort_by(|a, b| a.name.cmp(&b.name));
225
+ unresolved_copybooks.sort();
226
+ unresolved_copybooks.dedup();
227
+
228
+ Ok(IndexReport {
229
+ files,
230
+ source_count,
231
+ copybook_count,
232
+ programs,
233
+ copybook_uses,
234
+ resolved_copybooks,
235
+ unresolved_copybooks,
236
+ static_calls,
237
+ dynamic_calls,
238
+ data_items,
239
+ })
240
+ }
241
+
242
+ fn relative_path(root: &Path, path: &Path) -> String {
243
+ path.strip_prefix(root)
244
+ .unwrap_or(path)
245
+ .to_string_lossy()
246
+ .replace('\\', "/")
247
+ }
248
+
249
+ fn mtime_unix(path: &Path) -> i64 {
250
+ std::fs::metadata(path)
251
+ .and_then(|m| m.modified())
252
+ .ok()
253
+ .and_then(|t| t.duration_since(UNIX_EPOCH).ok())
254
+ .map(|d| d.as_secs() as i64)
255
+ .unwrap_or(0)
256
+ }