sigmap 6.10.7 → 6.10.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,176 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Parsers for R package manifest files (DESCRIPTION + NAMESPACE).
5
+ *
6
+ * Zero-dependency, regex/line-based. Both readers are safe on missing files
7
+ * (return null) and on malformed input (return whatever could be parsed).
8
+ *
9
+ * Exports:
10
+ * readDescription(cwd) → { package, version, imports[], depends[], suggests[], linkingTo[] } | null
11
+ * readNamespace(cwd) → { exports: Set, exportPatterns: RegExp[], s3methods: [{generic,class}], importFrom: Map<pkg, Set<name>> } | null
12
+ * collectLocalDefs(rFiles) → Map<defName, absPath>
13
+ */
14
+
15
+ const fs = require('fs');
16
+ const path = require('path');
17
+
18
+ /**
19
+ * Parse a DESCRIPTION file (Debian control format).
20
+ *
21
+ * Continuation lines start with whitespace and are appended to the previous
22
+ * field. Comma-separated dependency lists are split and version constraints
23
+ * `pkg (>= 1.0.0)` are stripped to just `pkg`.
24
+ *
25
+ * @param {string} cwd - project root containing DESCRIPTION
26
+ * @returns {object|null}
27
+ */
28
+ function readDescription(cwd) {
29
+ const p = path.join(cwd, 'DESCRIPTION');
30
+ if (!fs.existsSync(p)) return null;
31
+ let raw;
32
+ try { raw = fs.readFileSync(p, 'utf8'); } catch (_) { return null; }
33
+
34
+ const fields = {};
35
+ let currentKey = null;
36
+ for (const rawLine of raw.split('\n')) {
37
+ if (/^\s/.test(rawLine) && currentKey) {
38
+ // Continuation: append (with a space) to the current field.
39
+ fields[currentKey] += ' ' + rawLine.trim();
40
+ continue;
41
+ }
42
+ const m = rawLine.match(/^([A-Za-z][\w.]*)\s*:\s*(.*)$/);
43
+ if (m) {
44
+ currentKey = m[1];
45
+ fields[currentKey] = m[2].trim();
46
+ } else {
47
+ currentKey = null;
48
+ }
49
+ }
50
+
51
+ return {
52
+ package: fields.Package || null,
53
+ version: fields.Version || null,
54
+ imports: splitDeps(fields.Imports),
55
+ depends: splitDeps(fields.Depends),
56
+ suggests: splitDeps(fields.Suggests),
57
+ linkingTo: splitDeps(fields.LinkingTo),
58
+ };
59
+ }
60
+
61
+ /**
62
+ * Split a DESCRIPTION dep list like
63
+ * "dplyr (>= 1.0.0), ggplot2, R (>= 4.0)"
64
+ * into a clean array of package names, dropping `R` itself.
65
+ */
66
+ function splitDeps(value) {
67
+ if (!value) return [];
68
+ return value.split(',')
69
+ .map((s) => s.trim().replace(/\s*\([^)]*\)\s*$/, '').trim())
70
+ .filter((s) => s && s !== 'R');
71
+ }
72
+
73
+ /**
74
+ * Parse a NAMESPACE file.
75
+ *
76
+ * Recognised directives (R writes NAMESPACE in Lisp-y notation):
77
+ * export(name) / export("name")
78
+ * exportPattern("^foo")
79
+ * exportMethods(generic1, generic2)
80
+ * S3method(generic, class)
81
+ * importFrom(pkg, name1, name2, ...)
82
+ * import(pkg)
83
+ * useDynLib(pkg) — ignored (compiled code)
84
+ *
85
+ * @param {string} cwd
86
+ * @returns {object|null}
87
+ */
88
+ function readNamespace(cwd) {
89
+ const p = path.join(cwd, 'NAMESPACE');
90
+ if (!fs.existsSync(p)) return null;
91
+ let raw;
92
+ try { raw = fs.readFileSync(p, 'utf8'); } catch (_) { return null; }
93
+
94
+ // Strip comments.
95
+ const text = raw.replace(/#.*$/gm, '');
96
+
97
+ const exports = new Set();
98
+ const exportPatterns = [];
99
+ const s3methods = [];
100
+ const importFrom = new Map();
101
+
102
+ for (const m of text.matchAll(/\bexport\s*\(\s*([^)]+)\)/g)) {
103
+ for (const name of splitArgs(m[1])) {
104
+ const clean = stripQuotes(name);
105
+ if (clean) exports.add(clean);
106
+ }
107
+ }
108
+ for (const m of text.matchAll(/\bexportMethods\s*\(\s*([^)]+)\)/g)) {
109
+ for (const name of splitArgs(m[1])) {
110
+ const clean = stripQuotes(name);
111
+ if (clean) exports.add(clean);
112
+ }
113
+ }
114
+ for (const m of text.matchAll(/\bexportPattern\s*\(\s*["']([^"']+)["']\s*\)/g)) {
115
+ try { exportPatterns.push(new RegExp(m[1])); } catch (_) {}
116
+ }
117
+ for (const m of text.matchAll(/\bS3method\s*\(\s*([\w.]+)\s*,\s*([\w.]+)\s*\)/g)) {
118
+ s3methods.push({ generic: m[1], class: m[2] });
119
+ // The generic itself is implicitly exported for the registered class.
120
+ exports.add(m[1]);
121
+ }
122
+ for (const m of text.matchAll(/\bimportFrom\s*\(\s*([\w.]+)\s*,\s*([^)]+)\)/g)) {
123
+ const pkg = m[1];
124
+ if (!importFrom.has(pkg)) importFrom.set(pkg, new Set());
125
+ for (const name of splitArgs(m[2])) {
126
+ const clean = stripQuotes(name);
127
+ if (clean) importFrom.get(pkg).add(clean);
128
+ }
129
+ }
130
+
131
+ return { exports, exportPatterns, s3methods, importFrom };
132
+ }
133
+
134
+ function splitArgs(raw) {
135
+ return raw.split(',').map((s) => s.trim()).filter(Boolean);
136
+ }
137
+
138
+ function stripQuotes(s) {
139
+ return s.replace(/^["']|["']$/g, '').trim();
140
+ }
141
+
142
+ /**
143
+ * Build a Map<symbolName, absFilePath> from the top-level definitions in a
144
+ * set of R files. Used by the graph builder to resolve `localPkg::fn` to a
145
+ * concrete file. Reads each file once and runs a single regex.
146
+ *
147
+ * @param {string[]} rFiles - absolute paths to .R/.r files
148
+ * @returns {Map<string, string>}
149
+ */
150
+ function collectLocalDefs(rFiles) {
151
+ const defs = new Map();
152
+ // Matches `name <- function(`, `name <- R6Class(`, `name <- new_class(`,
153
+ // `setGeneric("name"`, `setClass("name"`. First-write-wins.
154
+ const reAssign = /^(?:[ \t]*)([\w.]+)\s*(?:<<-|<-|=)\s*(?:(?:R6::)?R6Class|(?:S7::)?new_class|function)\b/gm;
155
+ const reS4Generic = /^[ \t]*setGeneric\s*\(\s*["']([\w.]+)["']/gm;
156
+ const reS4Class = /^[ \t]*setClass\s*\(\s*["']([\w.]+)["']/gm;
157
+ for (const filePath of rFiles) {
158
+ let content;
159
+ try { content = fs.readFileSync(filePath, 'utf8'); } catch (_) { continue; }
160
+ const stripped = content.replace(/#.*$/gm, '');
161
+ let m;
162
+ while ((m = reAssign.exec(stripped)) !== null) {
163
+ if (m[1].startsWith('.')) continue;
164
+ if (!defs.has(m[1])) defs.set(m[1], filePath);
165
+ }
166
+ while ((m = reS4Generic.exec(stripped)) !== null) {
167
+ if (!defs.has(m[1])) defs.set(m[1], filePath);
168
+ }
169
+ while ((m = reS4Class.exec(stripped)) !== null) {
170
+ if (!defs.has(m[1])) defs.set(m[1], filePath);
171
+ }
172
+ }
173
+ return defs;
174
+ }
175
+
176
+ module.exports = { readDescription, readNamespace, collectLocalDefs };
@@ -60,6 +60,35 @@ function extractTSDeps(src) {
60
60
  return [...deps].slice(0, 5);
61
61
  }
62
62
 
63
+ // R base packages — present in every install, not informative as deps.
64
+ const R_BASE_PKGS = new Set([
65
+ 'base', 'stats', 'utils', 'graphics', 'grDevices', 'methods', 'datasets',
66
+ 'parallel', 'splines', 'stats4', 'tools', 'tcltk', 'grid', 'compiler',
67
+ ]);
68
+
69
+ /**
70
+ * Extract project-level import dependencies from R source.
71
+ * Captures `library(pkg)`, `require(pkg)`, `requireNamespace("pkg")`, and
72
+ * `pkg::fn` references, skipping base packages.
73
+ * @param {string} src
74
+ * @returns {string[]}
75
+ */
76
+ function extractRDeps(src) {
77
+ const deps = new Set();
78
+ // Strip line comments so commented-out library() calls don't match.
79
+ const stripped = (src || '').replace(/#.*$/gm, '');
80
+ for (const m of stripped.matchAll(/\b(?:library|require)\s*\(\s*["']?([\w.]+)["']?\s*\)/g)) {
81
+ if (m[1] && !R_BASE_PKGS.has(m[1])) deps.add(m[1]);
82
+ }
83
+ for (const m of stripped.matchAll(/\brequireNamespace\s*\(\s*["']([\w.]+)["']/g)) {
84
+ if (m[1] && !R_BASE_PKGS.has(m[1])) deps.add(m[1]);
85
+ }
86
+ for (const m of stripped.matchAll(/\b([A-Za-z][\w.]*)::[A-Za-z]/g)) {
87
+ if (m[1] && !R_BASE_PKGS.has(m[1])) deps.add(m[1]);
88
+ }
89
+ return [...deps].slice(0, 5);
90
+ }
91
+
63
92
  /**
64
93
  * Build reverse dependency map from forward map.
65
94
  * @param {Map<string, string[]>} forwardMap
@@ -78,4 +107,4 @@ function buildReverseDepMap(forwardMap) {
78
107
  return reverse;
79
108
  }
80
109
 
81
- module.exports = { extractPythonDeps, extractTSDeps, buildReverseDepMap };
110
+ module.exports = { extractPythonDeps, extractTSDeps, extractRDeps, buildReverseDepMap };
@@ -2,6 +2,18 @@
2
2
 
3
3
  /**
4
4
  * Extract signatures from R source code.
5
+ *
6
+ * Recognised constructs:
7
+ * - Function definitions: `name <- function(args)`, `name = function(args)`,
8
+ * `name <<- function(args)`
9
+ * - S4: setClass / setGeneric / setMethod
10
+ * - R6: `Name <- R6Class("Name", public = list(method = function(...)))`
11
+ * - S7: `Name <- new_class("Name", ...)` and `method(generic, Name) <- function(...)`
12
+ * - roxygen2 docstring hint (first non-tag `#'` line) appended as ` # hint`
13
+ *
14
+ * The extractor stays regex-only and zero-dependency. Output uses two-space
15
+ * indentation for class members (matching python/typescript/scala fixtures).
16
+ *
5
17
  * @param {string} src - Raw file content
6
18
  * @returns {string[]} Array of signature strings
7
19
  */
@@ -9,58 +21,202 @@ function extract(src) {
9
21
  if (!src || typeof src !== 'string') return [];
10
22
  const sigs = [];
11
23
 
12
- // Strip line comments. R uses # comments. Roxygen2 (#') comments are
13
- // stripped along with regular ones; Phase 2 may parse them.
24
+ // Collect roxygen2 hints from the original source (before stripping `#`).
25
+ const docHints = collectRoxygenHints(src);
26
+
27
+ // Strip line comments for the rest of the parsing. R uses `#` comments and
28
+ // roxygen2 `#'` is consumed alongside them — its content already lives in
29
+ // docHints.
14
30
  const stripped = src.replace(/#.*$/gm, '');
15
31
 
16
- // Function definitions:
17
- // name <- function(args) { ... }
18
- // name = function(args) { ... }
19
- // name <<- function(args) { ... }
20
- // Args may span multiple lines and contain default values, so we need to
21
- // match a balanced parenthesis group rather than a single line.
22
- const funcRe = /^(?:[ \t]*)([\w.]+)\s*(?:<<-|<-|=)\s*function\s*\(/gm;
32
+ // Track byte ranges already accounted for by R6 / setClass blocks so the
33
+ // top-level function regex doesn't re-emit their methods as bare functions.
34
+ const consumedRanges = [];
35
+
36
+ // ── R6 classes ────────────────────────────────────────────────────────────
37
+ // ClassName <- R6Class("ClassName", public = list(method = function(...)))
38
+ // ClassName <- R6::R6Class(...)
39
+ const r6Re = /([\w.]+)\s*(?:<<-|<-|=)\s*(?:R6::)?R6Class\s*\(/g;
23
40
  let m;
24
- while ((m = funcRe.exec(stripped)) !== null) {
41
+ while ((m = r6Re.exec(stripped)) !== null && sigs.length < 30) {
42
+ const name = m[1];
43
+ if (name.startsWith('.')) continue;
44
+ const openIdx = r6Re.lastIndex - 1;
45
+ const body = readBalancedParens(stripped, openIdx);
46
+ if (body === null) continue;
47
+ const closeIdx = openIdx + body.length + 1;
48
+ const classNameLit = readFirstStringArg(body) || name;
49
+ sigs.push(`${name} <- R6Class("${classNameLit}")` + applyHint(docHints, name));
50
+ for (const memberSig of extractListMethods(body, 8)) {
51
+ sigs.push(' ' + memberSig);
52
+ if (sigs.length >= 30) break;
53
+ }
54
+ consumedRanges.push([m.index, closeIdx]);
55
+ r6Re.lastIndex = closeIdx;
56
+ }
57
+
58
+ // ── S7 classes ────────────────────────────────────────────────────────────
59
+ // ClassName <- new_class("ClassName", properties = list(...))
60
+ const s7Classes = new Set();
61
+ const s7Re = /([\w.]+)\s*(?:<<-|<-|=)\s*(?:S7::)?new_class\s*\(/g;
62
+ while ((m = s7Re.exec(stripped)) !== null && sigs.length < 30) {
63
+ const name = m[1];
64
+ if (name.startsWith('.')) continue;
65
+ const openIdx = s7Re.lastIndex - 1;
66
+ const body = readBalancedParens(stripped, openIdx);
67
+ if (body === null) continue;
68
+ const closeIdx = openIdx + body.length + 1;
69
+ const classNameLit = readFirstStringArg(body) || name;
70
+ s7Classes.add(classNameLit);
71
+ s7Classes.add(name);
72
+ sigs.push(`${name} <- new_class("${classNameLit}")` + applyHint(docHints, name));
73
+ consumedRanges.push([m.index, closeIdx]);
74
+ s7Re.lastIndex = closeIdx;
75
+ }
76
+
77
+ // S7 method dispatch: `method(generic, ClassName) <- function(args)`
78
+ const s7MethodRe = /^[ \t]*method\s*\(\s*([\w.]+)\s*,\s*([\w.]+)\s*\)\s*(?:<<-|<-|=)\s*function\s*\(/gm;
79
+ while ((m = s7MethodRe.exec(stripped)) !== null && sigs.length < 30) {
80
+ if (!s7Classes.has(m[2])) continue;
81
+ const argsStart = s7MethodRe.lastIndex - 1;
82
+ const args = readBalancedParens(stripped, argsStart);
83
+ if (args === null) continue;
84
+ sigs.push(` method(${m[1]}, ${m[2]}) <- function(${normalizeParams(args)})`);
85
+ }
86
+
87
+ // ── Top-level function definitions ────────────────────────────────────────
88
+ // name <- function(args), name = function(args), name <<- function(args)
89
+ // Skip matches whose position falls inside an R6/S7 class body — those have
90
+ // already been emitted as indented members.
91
+ const funcRe = /^(?:[ \t]*)([\w.]+)\s*(?:<<-|<-|=)\s*function\s*\(/gm;
92
+ while ((m = funcRe.exec(stripped)) !== null && sigs.length < 30) {
25
93
  const name = m[1];
26
- if (name.startsWith('.')) continue; // private convention
27
- const argsStart = funcRe.lastIndex;
28
- const args = readBalancedParens(stripped, argsStart - 1);
94
+ if (name.startsWith('.')) continue;
95
+ if (inAnyRange(m.index, consumedRanges)) continue;
96
+ const argsStart = funcRe.lastIndex - 1;
97
+ const args = readBalancedParens(stripped, argsStart);
29
98
  if (args === null) continue;
30
- sigs.push(`${name} <- function(${normalizeParams(args)})`);
99
+ sigs.push(`${name} <- function(${normalizeParams(args)})` + applyHint(docHints, name));
31
100
  }
32
101
 
33
- // S4 setMethod / setGeneric:
34
- // setGeneric("name", function(args) standardGeneric("name"))
35
- // setMethod("name", "ClassName", function(args) { ... })
102
+ // ── S4 ────────────────────────────────────────────────────────────────────
36
103
  for (const sm of stripped.matchAll(/^[ \t]*setGeneric\s*\(\s*["']([\w.]+)["']/gm)) {
104
+ if (sigs.length >= 30) break;
37
105
  sigs.push(`setGeneric("${sm[1]}")`);
38
106
  }
39
107
  for (const sm of stripped.matchAll(/^[ \t]*setMethod\s*\(\s*["']([\w.]+)["']\s*,\s*["']([\w.]+)["']/gm)) {
108
+ if (sigs.length >= 30) break;
40
109
  sigs.push(`setMethod("${sm[1]}", "${sm[2]}")`);
41
110
  }
42
-
43
- // S4 class definitions:
44
- // setClass("Name", representation(...), ...)
45
111
  for (const sm of stripped.matchAll(/^[ \t]*setClass\s*\(\s*["']([\w.]+)["']/gm)) {
112
+ if (sigs.length >= 30) break;
46
113
  sigs.push(`setClass("${sm[1]}")`);
47
114
  }
48
115
 
49
116
  return sigs.slice(0, 30);
50
117
  }
51
118
 
119
+ /**
120
+ * Collect roxygen2 docstring hints from the original (uncommented) source.
121
+ * Returns Map<symbolName, hint> where hint is the first @title line, else
122
+ * @description, else the first non-tag content line. Trimmed to 60 chars,
123
+ * trailing punctuation removed.
124
+ */
125
+ function collectRoxygenHints(src) {
126
+ const hints = new Map();
127
+ const lines = src.split('\n');
128
+ let block = [];
129
+ for (let i = 0; i < lines.length; i++) {
130
+ const line = lines[i];
131
+ if (/^\s*#'/.test(line)) {
132
+ block.push(line.replace(/^\s*#'\s?/, ''));
133
+ continue;
134
+ }
135
+ if (block.length > 0) {
136
+ const m = line.match(/^[ \t]*([\w.]+)\s*(?:<<-|<-|=)\s*(?:R6::)?R6Class\b/)
137
+ || line.match(/^[ \t]*([\w.]+)\s*(?:<<-|<-|=)\s*(?:S7::)?new_class\b/)
138
+ || line.match(/^[ \t]*([\w.]+)\s*(?:<<-|<-|=)\s*function\b/);
139
+ if (m) {
140
+ const name = m[1];
141
+ let hint = pickRoxygenLine(block, '@title')
142
+ || pickRoxygenLine(block, '@description')
143
+ || pickRoxygenLine(block, null);
144
+ if (hint) {
145
+ hint = hint.replace(/\s+/g, ' ').trim().slice(0, 60).replace(/[.,;:!?]+$/, '').trim();
146
+ if (hint) hints.set(name, hint);
147
+ }
148
+ }
149
+ block = [];
150
+ }
151
+ }
152
+ return hints;
153
+ }
154
+
155
+ function pickRoxygenLine(block, tag) {
156
+ for (const raw of block) {
157
+ const b = raw.trim();
158
+ if (!b) continue;
159
+ if (tag) {
160
+ if (b.startsWith(tag)) {
161
+ const rest = b.slice(tag.length).trim();
162
+ if (rest) return rest;
163
+ }
164
+ } else if (!b.startsWith('@')) {
165
+ return b;
166
+ }
167
+ }
168
+ return null;
169
+ }
170
+
171
+ function applyHint(hints, name) {
172
+ const h = hints.get(name);
173
+ return h ? ` # ${h}` : '';
174
+ }
175
+
176
+ /**
177
+ * Extract method-like entries from the body of an R6/S7 list(...) argument.
178
+ * Matches `name = function(args)` at any indentation. Caps at `cap` entries.
179
+ */
180
+ function extractListMethods(body, cap) {
181
+ const out = [];
182
+ const re = /(?:^|[\n,])\s*([\w.]+)\s*=\s*function\s*\(/g;
183
+ let m;
184
+ while ((m = re.exec(body)) !== null && out.length < cap) {
185
+ const name = m[1];
186
+ if (name.startsWith('.')) continue;
187
+ const argsStart = re.lastIndex - 1;
188
+ const args = readBalancedParens(body, argsStart);
189
+ if (args === null) continue;
190
+ out.push(`${name} <- function(${normalizeParams(args)})`);
191
+ }
192
+ return out;
193
+ }
194
+
195
+ function inAnyRange(pos, ranges) {
196
+ for (const [s, e] of ranges) {
197
+ if (pos >= s && pos < e) return true;
198
+ }
199
+ return false;
200
+ }
201
+
202
+ /** Extract the first quoted string from a comma-separated argument body. */
203
+ function readFirstStringArg(body) {
204
+ const m = body.match(/^\s*["']([\w.]+)["']/);
205
+ return m ? m[1] : null;
206
+ }
207
+
52
208
  /**
53
209
  * Read a parenthesis-balanced substring starting at the position of the
54
210
  * opening '(' character, returning the inner content (without the outer
55
211
  * parens). Returns null if no matching close paren is found within `cap`
56
212
  * characters, which guards against runaway scans on malformed input.
57
213
  */
58
- function readBalancedParens(src, openIdx, cap = 4096) {
214
+ function readBalancedParens(src, openIdx, cap = 16384) {
59
215
  if (src[openIdx] !== '(') return null;
60
216
  let depth = 1;
61
217
  let i = openIdx + 1;
62
218
  const end = Math.min(src.length, openIdx + cap);
63
- let inString = null; // null | '"' | "'"
219
+ let inString = null;
64
220
  while (i < end) {
65
221
  const ch = src[i];
66
222
  if (inString) {
@@ -82,36 +238,17 @@ function readBalancedParens(src, openIdx, cap = 4096) {
82
238
 
83
239
  /**
84
240
  * Compress whitespace inside a parameter list, collapse multi-line default
85
- * expressions onto a single line, and trim. The goal is one-line readable
86
- * signatures, not a faithful AST.
87
- *
88
- * String literals are protected so that commas/equals inside default values
89
- * like sep = "," don't get respaced.
241
+ * expressions onto a single line, and trim. String literals are protected so
242
+ * that commas/equals inside default values like `sep = ","` don't get respaced.
90
243
  */
91
244
  function normalizeParams(raw) {
92
- const tokens = [];
93
- let buf = '';
245
+ let out = '';
94
246
  let inString = null;
95
247
  for (let i = 0; i < raw.length; i++) {
96
248
  const ch = raw[i];
97
- if (inString) {
98
- buf += ch;
99
- if (ch === '\\' && i + 1 < raw.length) { buf += raw[i + 1]; i++; continue; }
100
- if (ch === inString) inString = null;
101
- continue;
102
- }
103
- if (ch === '"' || ch === "'") { inString = ch; buf += ch; continue; }
104
- buf += ch;
105
- }
106
- // Now buf === raw with strings preserved character-for-character.
107
- // Walk again: collapse non-string runs of whitespace, normalize ', ' and ' = '.
108
- let out = '';
109
- inString = null;
110
- for (let i = 0; i < buf.length; i++) {
111
- const ch = buf[i];
112
249
  if (inString) {
113
250
  out += ch;
114
- if (ch === '\\' && i + 1 < buf.length) { out += buf[i + 1]; i++; continue; }
251
+ if (ch === '\\' && i + 1 < raw.length) { out += raw[i + 1]; i++; continue; }
115
252
  if (ch === inString) inString = null;
116
253
  continue;
117
254
  }