cobolx 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,8 @@
1
1
  use crate::cobol::copybook::{build_copybook_index, resolve_copybook};
2
2
  use crate::cobol::data_parser::collect_data_items;
3
3
  use crate::cobol::layout::compute_physical_layout;
4
+ use crate::cobol::lexer::clean_name;
5
+ use crate::cobol::model::ParsedCodeBlock;
4
6
  pub use crate::cobol::model::{
5
7
  CallKind, CallSummary, CopybookSummary, IndexReport, ProgramSummary,
6
8
  };
@@ -8,8 +10,9 @@ use crate::cobol::scanner::{CobolFileType, scan_sandbox};
8
10
  use crate::cobol::source_parser::parse_source_file;
9
11
  use crate::memory::MemoryStore;
10
12
  use rusqlite::params;
11
- use std::collections::HashMap;
13
+ use std::collections::{HashMap, HashSet};
12
14
  use std::error::Error;
15
+ use std::hash::Hash;
13
16
  use std::path::Path;
14
17
  use std::time::UNIX_EPOCH;
15
18
 
@@ -32,6 +35,12 @@ pub fn index_sandbox(root: &Path, store: &mut MemoryStore) -> IndexResult<IndexR
32
35
  let tx = store.connection_mut().transaction()?;
33
36
  tx.execute_batch(
34
37
  r#"
38
+ DELETE FROM external_ops;
39
+ DELETE FROM code_blocks;
40
+ DELETE FROM literals;
41
+ DELETE FROM identifiers;
42
+ DELETE FROM program_features;
43
+ DELETE FROM copybook_features;
35
44
  DELETE FROM call_edges;
36
45
  DELETE FROM copybook_uses;
37
46
  DELETE FROM data_items;
@@ -104,6 +113,25 @@ pub fn index_sandbox(root: &Path, store: &mut MemoryStore) -> IndexResult<IndexR
104
113
  let mut dynamic_calls = 0;
105
114
  let mut data_items = 0usize;
106
115
 
116
+ let mut incoming_calls = HashMap::<i64, usize>::new();
117
+ let mut outgoing_calls = HashMap::<i64, usize>::new();
118
+ let mut static_calls_by_program = HashMap::<i64, usize>::new();
119
+ let mut dynamic_calls_by_program = HashMap::<i64, usize>::new();
120
+ let mut copybook_use_count_by_program = HashMap::<i64, usize>::new();
121
+ let mut distinct_copybooks_by_program = HashMap::<i64, HashSet<i64>>::new();
122
+ let mut referenced_by_files = HashMap::<i64, HashSet<i64>>::new();
123
+ let mut data_item_count_by_program = HashMap::<i64, usize>::new();
124
+ let mut paragraph_count_by_program = HashMap::<i64, usize>::new();
125
+ let mut external_op_count_by_program = HashMap::<i64, usize>::new();
126
+ let mut identifier_count_by_program = HashMap::<i64, usize>::new();
127
+ let mut literal_count_by_program = HashMap::<i64, usize>::new();
128
+ let mut copybook_programs = HashMap::<i64, HashSet<i64>>::new();
129
+ let mut copybook_files = HashMap::<i64, HashSet<i64>>::new();
130
+ let mut copybook_replacing_counts = HashMap::<i64, usize>::new();
131
+ let mut copybook_item_names = HashMap::<i64, Vec<String>>::new();
132
+ let mut identifier_mentions = HashMap::<MentionKey, MentionAggregate>::new();
133
+ let mut literal_mentions = HashMap::<MentionKey, MentionAggregate>::new();
134
+
107
135
  for file in &parsed {
108
136
  let Some(from_file_id) = file_ids.get(&file.path).copied() else {
109
137
  continue;
@@ -137,14 +165,35 @@ pub fn index_sandbox(root: &Path, store: &mut MemoryStore) -> IndexResult<IndexR
137
165
  ],
138
166
  )?;
139
167
 
140
- if let Some(program_name) = default_program {
141
- if let Some(summary) = report_programs.get_mut(program_name) {
168
+ let caller_name = copy.caller_name.as_deref().or(default_program);
169
+ if let Some(caller_name) = caller_name {
170
+ if let Some(summary) = report_programs.get_mut(caller_name) {
142
171
  summary.copybooks.push(CopybookSummary {
143
172
  name: copy.name.clone(),
144
- resolved_path: resolved,
173
+ resolved_path: resolved.clone(),
145
174
  has_replacing: copy.replacing_text.is_some(),
146
175
  });
147
176
  }
177
+ if let Some(program_id) = program_ids.get(caller_name).copied() {
178
+ increment_count(&mut copybook_use_count_by_program, program_id);
179
+ if let Some(copybook_file_id) = resolved_file_id {
180
+ distinct_copybooks_by_program
181
+ .entry(program_id)
182
+ .or_default()
183
+ .insert(copybook_file_id);
184
+ copybook_programs
185
+ .entry(copybook_file_id)
186
+ .or_default()
187
+ .insert(program_id);
188
+ copybook_files
189
+ .entry(copybook_file_id)
190
+ .or_default()
191
+ .insert(from_file_id);
192
+ if copy.replacing_text.is_some() {
193
+ increment_count(&mut copybook_replacing_counts, copybook_file_id);
194
+ }
195
+ }
196
+ }
148
197
  }
149
198
  }
150
199
 
@@ -157,11 +206,12 @@ pub fn index_sandbox(root: &Path, store: &mut MemoryStore) -> IndexResult<IndexR
157
206
  data_items += expanded_items.len();
158
207
 
159
208
  for item in expanded_items {
209
+ let source_file_id = file_ids.get(&item.source_path).copied();
160
210
  tx.execute(
161
211
  "INSERT INTO data_items(program_id, source_file_id, name, level, parent_name, pic, usage_clause, occurs, redefines, section, byte_offset, byte_size, storage_kind, layout_status, start_offset, byte_len) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16)",
162
212
  params![
163
213
  program_id,
164
- file_ids.get(&item.source_path).copied(),
214
+ source_file_id,
165
215
  item.name,
166
216
  item.level as i64,
167
217
  item.parent_name,
@@ -178,12 +228,122 @@ pub fn index_sandbox(root: &Path, store: &mut MemoryStore) -> IndexResult<IndexR
178
228
  item.byte_len as i64,
179
229
  ],
180
230
  )?;
231
+
232
+ increment_count(&mut data_item_count_by_program, program_id);
233
+ increment_count(&mut identifier_count_by_program, program_id);
234
+ if let Some(source_file_id) = source_file_id {
235
+ aggregate_mention(
236
+ &mut identifier_mentions,
237
+ program_id,
238
+ source_file_id,
239
+ "data_name",
240
+ &item.name,
241
+ item.start_offset,
242
+ );
243
+ if source_file_id != from_file_id {
244
+ copybook_item_names
245
+ .entry(source_file_id)
246
+ .or_default()
247
+ .push(item.name.clone());
248
+ }
249
+ }
181
250
  if let Some(summary) = report_programs.get_mut(program_name) {
182
251
  summary.data_items += 1;
183
252
  }
184
253
  }
185
254
  }
186
255
 
256
+ for code_block in &file.code_blocks {
257
+ let Some(program_id) = resolve_program_id(
258
+ &program_ids,
259
+ code_block.caller_name.as_deref(),
260
+ default_program,
261
+ ) else {
262
+ continue;
263
+ };
264
+ tx.execute(
265
+ "INSERT INTO code_blocks(program_id, source_file_id, name, kind, parent_section, sequence_no, statement_count, start_offset, byte_len) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
266
+ params![
267
+ program_id,
268
+ from_file_id,
269
+ code_block.name,
270
+ code_block.kind.as_str(),
271
+ code_block.parent_section,
272
+ code_block_sequence(code_block, &file.code_blocks) as i64,
273
+ code_block.statement_count as i64,
274
+ code_block.start_offset as i64,
275
+ code_block.byte_len as i64,
276
+ ],
277
+ )?;
278
+ if matches!(
279
+ code_block.kind,
280
+ crate::cobol::model::CodeBlockKind::Paragraph
281
+ ) {
282
+ increment_count(&mut paragraph_count_by_program, program_id);
283
+ }
284
+ }
285
+
286
+ for external_op in &file.external_ops {
287
+ let Some(program_id) = resolve_program_id(
288
+ &program_ids,
289
+ external_op.caller_name.as_deref(),
290
+ default_program,
291
+ ) else {
292
+ continue;
293
+ };
294
+ tx.execute(
295
+ "INSERT INTO external_ops(program_id, source_file_id, kind, verb, target, start_offset, byte_len) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)",
296
+ params![
297
+ program_id,
298
+ from_file_id,
299
+ external_op.kind.as_str(),
300
+ external_op.verb,
301
+ external_op.target,
302
+ external_op.start_offset as i64,
303
+ external_op.byte_len as i64,
304
+ ],
305
+ )?;
306
+ increment_count(&mut external_op_count_by_program, program_id);
307
+ }
308
+
309
+ for identifier in &file.identifiers {
310
+ let Some(program_id) = resolve_program_id(
311
+ &program_ids,
312
+ identifier.caller_name.as_deref(),
313
+ default_program,
314
+ ) else {
315
+ continue;
316
+ };
317
+ increment_count(&mut identifier_count_by_program, program_id);
318
+ aggregate_mention(
319
+ &mut identifier_mentions,
320
+ program_id,
321
+ from_file_id,
322
+ &identifier.kind,
323
+ &identifier.value,
324
+ identifier.start_offset,
325
+ );
326
+ }
327
+
328
+ for literal in &file.literals {
329
+ let Some(program_id) = resolve_program_id(
330
+ &program_ids,
331
+ literal.caller_name.as_deref(),
332
+ default_program,
333
+ ) else {
334
+ continue;
335
+ };
336
+ increment_count(&mut literal_count_by_program, program_id);
337
+ aggregate_mention(
338
+ &mut literal_mentions,
339
+ program_id,
340
+ from_file_id,
341
+ &literal.kind,
342
+ &literal.value,
343
+ literal.start_offset,
344
+ );
345
+ }
346
+
187
347
  for call in &file.calls {
188
348
  match call.kind {
189
349
  CallKind::Static => static_calls += 1,
@@ -208,6 +368,26 @@ pub fn index_sandbox(root: &Path, store: &mut MemoryStore) -> IndexResult<IndexR
208
368
  ],
209
369
  )?;
210
370
 
371
+ increment_count(&mut outgoing_calls, caller_program_id);
372
+ match call.kind {
373
+ CallKind::Static => {
374
+ increment_count(&mut static_calls_by_program, caller_program_id)
375
+ }
376
+ CallKind::Dynamic => {
377
+ increment_count(&mut dynamic_calls_by_program, caller_program_id)
378
+ }
379
+ }
380
+
381
+ if matches!(call.kind, CallKind::Static) {
382
+ if let Some(callee_program_id) = program_ids.get(&call.target).copied() {
383
+ increment_count(&mut incoming_calls, callee_program_id);
384
+ referenced_by_files
385
+ .entry(callee_program_id)
386
+ .or_default()
387
+ .insert(from_file_id);
388
+ }
389
+ }
390
+
211
391
  if let Some(summary) = report_programs.get_mut(caller_name) {
212
392
  summary.calls.push(CallSummary {
213
393
  target: call.target.clone(),
@@ -218,6 +398,114 @@ pub fn index_sandbox(root: &Path, store: &mut MemoryStore) -> IndexResult<IndexR
218
398
  }
219
399
  }
220
400
 
401
+ for (key, aggregate) in identifier_mentions {
402
+ tx.execute(
403
+ "INSERT INTO identifiers(program_id, source_file_id, kind, value, occurrences, first_offset) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
404
+ params![
405
+ key.program_id,
406
+ key.source_file_id,
407
+ key.kind,
408
+ key.value,
409
+ aggregate.occurrences as i64,
410
+ aggregate.first_offset as i64,
411
+ ],
412
+ )?;
413
+ }
414
+
415
+ for (key, aggregate) in literal_mentions {
416
+ tx.execute(
417
+ "INSERT INTO literals(program_id, source_file_id, kind, value, occurrences, first_offset) VALUES (?1, ?2, ?3, ?4, ?5, ?6)",
418
+ params![
419
+ key.program_id,
420
+ key.source_file_id,
421
+ key.kind,
422
+ key.value,
423
+ aggregate.occurrences as i64,
424
+ aggregate.first_offset as i64,
425
+ ],
426
+ )?;
427
+ }
428
+
429
+ for file in files
430
+ .iter()
431
+ .filter(|f| f.file_type == CobolFileType::Copybook)
432
+ {
433
+ let Some(copybook_file_id) = file_ids.get(&file.path).copied() else {
434
+ continue;
435
+ };
436
+ let copybook_name = file
437
+ .path
438
+ .file_stem()
439
+ .and_then(|s| s.to_str())
440
+ .map(clean_name)
441
+ .filter(|s| !s.is_empty())
442
+ .unwrap_or_else(|| relative_path(root, &file.path).to_ascii_uppercase());
443
+ let item_names = copybook_item_names
444
+ .remove(&copybook_file_id)
445
+ .unwrap_or_default();
446
+ let contains_header_fields = item_names.iter().any(|name| looks_like_header_field(name));
447
+ let contains_error_fields = item_names.iter().any(|name| looks_like_error_field(name));
448
+ tx.execute(
449
+ "INSERT INTO copybook_features(copybook_file_id, copybook_name, used_by_program_count, used_by_file_count, replacing_use_count, data_item_count, contains_header_fields, contains_error_fields) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
450
+ params![
451
+ copybook_file_id,
452
+ copybook_name,
453
+ copybook_programs
454
+ .get(&copybook_file_id)
455
+ .map_or(0_i64, |ids| ids.len() as i64),
456
+ copybook_files
457
+ .get(&copybook_file_id)
458
+ .map_or(0_i64, |ids| ids.len() as i64),
459
+ copybook_replacing_counts
460
+ .get(&copybook_file_id)
461
+ .copied()
462
+ .unwrap_or(0) as i64,
463
+ item_names.len() as i64,
464
+ bool_to_int(contains_header_fields),
465
+ bool_to_int(contains_error_fields),
466
+ ],
467
+ )?;
468
+ }
469
+
470
+ for (program_name, path) in &program_file {
471
+ let Some(program_id) = program_ids.get(program_name).copied() else {
472
+ continue;
473
+ };
474
+ let Some(source_file_id) = file_ids.get(path).copied() else {
475
+ continue;
476
+ };
477
+ let incoming = incoming_calls.get(&program_id).copied().unwrap_or(0);
478
+ let copybook_use_count = copybook_use_count_by_program
479
+ .get(&program_id)
480
+ .copied()
481
+ .unwrap_or(0);
482
+ tx.execute(
483
+ "INSERT INTO program_features(program_id, source_file_id, incoming_call_count, outgoing_call_count, static_call_count, dynamic_call_count, copybook_use_count, distinct_copybook_count, referenced_by_file_count, is_entrypoint, has_heavy_copy_usage, data_item_count, paragraph_count, external_op_count, identifier_count, literal_count) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16)",
484
+ params![
485
+ program_id,
486
+ source_file_id,
487
+ incoming as i64,
488
+ outgoing_calls.get(&program_id).copied().unwrap_or(0) as i64,
489
+ static_calls_by_program.get(&program_id).copied().unwrap_or(0) as i64,
490
+ dynamic_calls_by_program.get(&program_id).copied().unwrap_or(0) as i64,
491
+ copybook_use_count as i64,
492
+ distinct_copybooks_by_program
493
+ .get(&program_id)
494
+ .map_or(0_i64, |ids| ids.len() as i64),
495
+ referenced_by_files
496
+ .get(&program_id)
497
+ .map_or(0_i64, |ids| ids.len() as i64),
498
+ bool_to_int(incoming == 0),
499
+ bool_to_int(copybook_use_count >= 3),
500
+ data_item_count_by_program.get(&program_id).copied().unwrap_or(0) as i64,
501
+ paragraph_count_by_program.get(&program_id).copied().unwrap_or(0) as i64,
502
+ external_op_count_by_program.get(&program_id).copied().unwrap_or(0) as i64,
503
+ identifier_count_by_program.get(&program_id).copied().unwrap_or(0) as i64,
504
+ literal_count_by_program.get(&program_id).copied().unwrap_or(0) as i64,
505
+ ],
506
+ )?;
507
+ }
508
+
221
509
  tx.commit()?;
222
510
 
223
511
  let mut programs = report_programs.into_values().collect::<Vec<_>>();
@@ -254,3 +542,85 @@ fn mtime_unix(path: &Path) -> i64 {
254
542
  .map(|d| d.as_secs() as i64)
255
543
  .unwrap_or(0)
256
544
  }
545
+
546
+ fn increment_count<K>(map: &mut HashMap<K, usize>, key: K)
547
+ where
548
+ K: Eq + Hash,
549
+ {
550
+ *map.entry(key).or_default() += 1;
551
+ }
552
+
553
+ fn resolve_program_id(
554
+ program_ids: &HashMap<String, i64>,
555
+ explicit_name: Option<&str>,
556
+ default_name: Option<&str>,
557
+ ) -> Option<i64> {
558
+ explicit_name
559
+ .or(default_name)
560
+ .and_then(|name| program_ids.get(name).copied())
561
+ }
562
+
563
+ fn aggregate_mention(
564
+ map: &mut HashMap<MentionKey, MentionAggregate>,
565
+ program_id: i64,
566
+ source_file_id: i64,
567
+ kind: &str,
568
+ value: &str,
569
+ start_offset: usize,
570
+ ) {
571
+ let key = MentionKey {
572
+ program_id,
573
+ source_file_id,
574
+ kind: kind.to_string(),
575
+ value: value.to_string(),
576
+ };
577
+ let entry = map.entry(key).or_insert(MentionAggregate {
578
+ occurrences: 0,
579
+ first_offset: start_offset,
580
+ });
581
+ entry.occurrences += 1;
582
+ entry.first_offset = entry.first_offset.min(start_offset);
583
+ }
584
+
585
+ fn code_block_sequence(code_block: &ParsedCodeBlock, all_blocks: &[ParsedCodeBlock]) -> usize {
586
+ all_blocks
587
+ .iter()
588
+ .position(|candidate| {
589
+ candidate.start_offset == code_block.start_offset
590
+ && candidate.name == code_block.name
591
+ && candidate.kind == code_block.kind
592
+ })
593
+ .map_or(1, |idx| idx + 1)
594
+ }
595
+
596
+ fn looks_like_header_field(name: &str) -> bool {
597
+ let upper = name.to_ascii_uppercase();
598
+ upper.contains("HEADER") || upper.contains("HDR")
599
+ }
600
+
601
+ fn looks_like_error_field(name: &str) -> bool {
602
+ let upper = name.to_ascii_uppercase();
603
+ upper.contains("ERROR")
604
+ || upper.contains("ERR")
605
+ || upper.contains("SQLCODE")
606
+ || upper.contains("RETURN-CODE")
607
+ || upper.contains("RESP-CODE")
608
+ }
609
+
610
+ fn bool_to_int(value: bool) -> i64 {
611
+ if value { 1 } else { 0 }
612
+ }
613
+
614
+ #[derive(Debug, Clone, PartialEq, Eq, Hash)]
615
+ struct MentionKey {
616
+ program_id: i64,
617
+ source_file_id: i64,
618
+ kind: String,
619
+ value: String,
620
+ }
621
+
622
+ #[derive(Debug, Clone, Copy)]
623
+ struct MentionAggregate {
624
+ occurrences: usize,
625
+ first_offset: usize,
626
+ }
@@ -15,6 +15,42 @@ impl CallKind {
15
15
  }
16
16
  }
17
17
 
18
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
19
+ pub enum CodeBlockKind {
20
+ Section,
21
+ Paragraph,
22
+ }
23
+
24
+ impl CodeBlockKind {
25
+ pub(crate) fn as_str(self) -> &'static str {
26
+ match self {
27
+ CodeBlockKind::Section => "section",
28
+ CodeBlockKind::Paragraph => "paragraph",
29
+ }
30
+ }
31
+ }
32
+
33
+ #[derive(Debug, Clone, Copy, PartialEq, Eq)]
34
+ pub enum ExternalOpKind {
35
+ ExecSql,
36
+ ExecCics,
37
+ FileIo,
38
+ CallLiteral,
39
+ CallIdentifier,
40
+ }
41
+
42
+ impl ExternalOpKind {
43
+ pub(crate) fn as_str(self) -> &'static str {
44
+ match self {
45
+ ExternalOpKind::ExecSql => "exec_sql",
46
+ ExternalOpKind::ExecCics => "exec_cics",
47
+ ExternalOpKind::FileIo => "file_io",
48
+ ExternalOpKind::CallLiteral => "call_literal",
49
+ ExternalOpKind::CallIdentifier => "call_identifier",
50
+ }
51
+ }
52
+ }
53
+
18
54
  #[derive(Debug, Clone)]
19
55
  pub struct CallSummary {
20
56
  pub target: String,
@@ -138,6 +174,7 @@ pub(crate) struct ParsedProgram {
138
174
 
139
175
  #[derive(Debug)]
140
176
  pub(crate) struct ParsedCopy {
177
+ pub(crate) caller_name: Option<String>,
141
178
  pub(crate) name: String,
142
179
  pub(crate) start_offset: usize,
143
180
  pub(crate) byte_len: usize,
@@ -154,6 +191,43 @@ pub(crate) struct ParsedCall {
154
191
  pub(crate) using_count: usize,
155
192
  }
156
193
 
194
+ #[derive(Debug)]
195
+ pub(crate) struct ParsedCodeBlock {
196
+ pub(crate) caller_name: Option<String>,
197
+ pub(crate) name: String,
198
+ pub(crate) kind: CodeBlockKind,
199
+ pub(crate) parent_section: Option<String>,
200
+ pub(crate) start_offset: usize,
201
+ pub(crate) byte_len: usize,
202
+ pub(crate) statement_count: usize,
203
+ }
204
+
205
+ #[derive(Debug)]
206
+ pub(crate) struct ParsedExternalOp {
207
+ pub(crate) caller_name: Option<String>,
208
+ pub(crate) kind: ExternalOpKind,
209
+ pub(crate) verb: String,
210
+ pub(crate) target: Option<String>,
211
+ pub(crate) start_offset: usize,
212
+ pub(crate) byte_len: usize,
213
+ }
214
+
215
+ #[derive(Debug)]
216
+ pub(crate) struct ParsedIdentifier {
217
+ pub(crate) caller_name: Option<String>,
218
+ pub(crate) kind: String,
219
+ pub(crate) value: String,
220
+ pub(crate) start_offset: usize,
221
+ }
222
+
223
+ #[derive(Debug)]
224
+ pub(crate) struct ParsedLiteral {
225
+ pub(crate) caller_name: Option<String>,
226
+ pub(crate) kind: String,
227
+ pub(crate) value: String,
228
+ pub(crate) start_offset: usize,
229
+ }
230
+
157
231
  #[derive(Debug)]
158
232
  pub(crate) struct ParsedDataItem {
159
233
  pub(crate) source_path: PathBuf,
@@ -179,6 +253,10 @@ pub(crate) struct ParsedFile {
179
253
  pub(crate) programs: Vec<ParsedProgram>,
180
254
  pub(crate) copies: Vec<ParsedCopy>,
181
255
  pub(crate) calls: Vec<ParsedCall>,
256
+ pub(crate) code_blocks: Vec<ParsedCodeBlock>,
257
+ pub(crate) external_ops: Vec<ParsedExternalOp>,
258
+ pub(crate) identifiers: Vec<ParsedIdentifier>,
259
+ pub(crate) literals: Vec<ParsedLiteral>,
182
260
  }
183
261
 
184
262
  #[derive(Debug)]
@@ -29,6 +29,8 @@ fn should_exclude_dir(name: &str) -> bool {
29
29
  || name == "node_modules"
30
30
  || name == "vendor"
31
31
  || name == "build"
32
+ || name == "tests"
33
+ || name == "test"
32
34
  }
33
35
 
34
36
  /// Scans entries within `dir`. Does NOT check exclusion on `dir` itself —