moult 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +44 -0
- data/LICENSE.txt +201 -0
- data/NOTICE +4 -0
- data/README.md +331 -0
- data/exe/moult +6 -0
- data/lib/moult/abc.rb +133 -0
- data/lib/moult/boundaries/packwerk.rb +114 -0
- data/lib/moult/boundaries/severity.rb +87 -0
- data/lib/moult/boundaries.rb +77 -0
- data/lib/moult/boundaries_report.rb +106 -0
- data/lib/moult/churn.rb +52 -0
- data/lib/moult/cli/boundaries_command.rb +83 -0
- data/lib/moult/cli/coverage_command.rb +101 -0
- data/lib/moult/cli/dead_code_command.rb +112 -0
- data/lib/moult/cli/duplication_command.rb +92 -0
- data/lib/moult/cli/flags_command.rb +95 -0
- data/lib/moult/cli/gate_command.rb +113 -0
- data/lib/moult/cli/health_command.rb +117 -0
- data/lib/moult/cli/hotspots_command.rb +104 -0
- data/lib/moult/cli.rb +102 -0
- data/lib/moult/clones.rb +91 -0
- data/lib/moult/cloud_upload.rb +29 -0
- data/lib/moult/confidence/rules.rb +128 -0
- data/lib/moult/confidence.rb +106 -0
- data/lib/moult/coverage/resolver.rb +56 -0
- data/lib/moult/coverage.rb +176 -0
- data/lib/moult/coverage_report.rb +98 -0
- data/lib/moult/dead_code.rb +119 -0
- data/lib/moult/dead_code_report.rb +65 -0
- data/lib/moult/diff.rb +177 -0
- data/lib/moult/discovery.rb +38 -0
- data/lib/moult/duplication/confidence.rb +92 -0
- data/lib/moult/duplication.rb +112 -0
- data/lib/moult/duplication_report.rb +89 -0
- data/lib/moult/flag_scanner.rb +150 -0
- data/lib/moult/flags/classification.rb +79 -0
- data/lib/moult/flags/snapshot.rb +162 -0
- data/lib/moult/flags/staleness.rb +145 -0
- data/lib/moult/flags.rb +131 -0
- data/lib/moult/flags_report.rb +136 -0
- data/lib/moult/formatters/boundaries_json.rb +20 -0
- data/lib/moult/formatters/boundaries_table.rb +53 -0
- data/lib/moult/formatters/coverage_json.rb +19 -0
- data/lib/moult/formatters/coverage_table.rb +60 -0
- data/lib/moult/formatters/dead_code_json.rb +20 -0
- data/lib/moult/formatters/dead_code_table.rb +66 -0
- data/lib/moult/formatters/duplication_json.rb +20 -0
- data/lib/moult/formatters/duplication_table.rb +55 -0
- data/lib/moult/formatters/flags_json.rb +20 -0
- data/lib/moult/formatters/flags_table.rb +76 -0
- data/lib/moult/formatters/gate_github.rb +52 -0
- data/lib/moult/formatters/gate_json.rb +20 -0
- data/lib/moult/formatters/gate_message.rb +19 -0
- data/lib/moult/formatters/gate_sarif.rb +78 -0
- data/lib/moult/formatters/gate_table.rb +71 -0
- data/lib/moult/formatters/health_json.rb +20 -0
- data/lib/moult/formatters/health_table.rb +80 -0
- data/lib/moult/formatters/json.rb +23 -0
- data/lib/moult/formatters/table.rb +70 -0
- data/lib/moult/formatters/text_table.rb +39 -0
- data/lib/moult/gate/config.rb +55 -0
- data/lib/moult/gate/evaluation.rb +172 -0
- data/lib/moult/gate/policy.rb +103 -0
- data/lib/moult/gate.rb +199 -0
- data/lib/moult/gate_report.rb +97 -0
- data/lib/moult/git.rb +83 -0
- data/lib/moult/health/score.rb +291 -0
- data/lib/moult/health.rb +320 -0
- data/lib/moult/health_report.rb +97 -0
- data/lib/moult/index.rb +228 -0
- data/lib/moult/parser.rb +101 -0
- data/lib/moult/rails_conventions.rb +124 -0
- data/lib/moult/report.rb +114 -0
- data/lib/moult/scoring.rb +82 -0
- data/lib/moult/span.rb +17 -0
- data/lib/moult/symbol_id.rb +30 -0
- data/lib/moult/symbol_scanner.rb +100 -0
- data/lib/moult/version.rb +5 -0
- data/lib/moult.rb +84 -0
- data/schema/boundaries.schema.json +125 -0
- data/schema/common.schema.json +76 -0
- data/schema/coverage.schema.json +83 -0
- data/schema/deadcode.schema.json +106 -0
- data/schema/duplication.schema.json +128 -0
- data/schema/flags.schema.json +157 -0
- data/schema/gate.schema.json +165 -0
- data/schema/health.schema.json +157 -0
- data/schema/hotspots.schema.json +106 -0
- metadata +185 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "optparse"
|
|
4
|
+
require "time"
|
|
5
|
+
|
|
6
|
+
module Moult
|
|
7
|
+
class CLI
|
|
8
|
+
# `moult hotspots [PATH]` — rank files by complexity x churn. Thin layer:
|
|
9
|
+
# parse options, drive the library, hand the {Report} to a formatter.
|
|
10
|
+
# Report-only: exit 0 on success, non-zero only on error.
|
|
11
|
+
class HotspotsCommand
|
|
12
|
+
DEFAULT_LIMIT = 20
|
|
13
|
+
|
|
14
|
+
# @return [Integer] process exit status
|
|
15
|
+
def run(argv)
|
|
16
|
+
options = parse(argv)
|
|
17
|
+
return puts_help(options) if options[:help]
|
|
18
|
+
|
|
19
|
+
root = File.expand_path(options[:path])
|
|
20
|
+
unless File.exist?(root)
|
|
21
|
+
warn "moult: no such file or directory: #{options[:path]}"
|
|
22
|
+
return 1
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
report = analyze(root, options)
|
|
26
|
+
puts render(report, options)
|
|
27
|
+
0
|
|
28
|
+
rescue OptionParser::ParseError => e
|
|
29
|
+
warn "moult: #{e.message}"
|
|
30
|
+
1
|
|
31
|
+
rescue => e
|
|
32
|
+
warn "moult: #{e.message}"
|
|
33
|
+
1
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def parse(argv)
|
|
39
|
+
options = {format: :table, limit: DEFAULT_LIMIT, since: Churn::DEFAULT_SINCE, quiet: false}
|
|
40
|
+
@parser = OptionParser.new do |o|
|
|
41
|
+
o.banner = "Usage: moult hotspots [PATH] [options]"
|
|
42
|
+
o.separator ""
|
|
43
|
+
o.separator "Options:"
|
|
44
|
+
o.on("--format FORMAT", [:table, :json], "Output format: table (default) or json") { |v| options[:format] = v }
|
|
45
|
+
o.on("--limit N", Integer, "Show top N hotspots (default #{DEFAULT_LIMIT}; 0 for all)") { |v| options[:limit] = v }
|
|
46
|
+
o.on("--since DATE", "Churn window start, any git --since value (default '#{Churn::DEFAULT_SINCE}')") { |v| options[:since] = v }
|
|
47
|
+
o.on("--quiet", "Suppress informational notes on stderr") { options[:quiet] = true }
|
|
48
|
+
o.on("-h", "--help", "Show this message") { options[:help] = true }
|
|
49
|
+
end
|
|
50
|
+
# permute! processes options regardless of position, so `PATH` may come
|
|
51
|
+
# before or after flags; remaining non-options are left in argv.
|
|
52
|
+
@parser.permute!(argv)
|
|
53
|
+
options[:path] = argv.shift || "."
|
|
54
|
+
options
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def puts_help(_options)
|
|
58
|
+
puts @parser
|
|
59
|
+
0
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def analyze(root, options)
|
|
63
|
+
root_dir = File.directory?(root) ? root : File.dirname(root)
|
|
64
|
+
files = File.directory?(root) ? Discovery.ruby_files(root) : [root]
|
|
65
|
+
|
|
66
|
+
unless Git.repo?(root_dir)
|
|
67
|
+
note(options, "#{root_dir} is not a git repository; churn is 0 for all files.")
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
Scoring.build_report(
|
|
71
|
+
root: root_dir,
|
|
72
|
+
files: files,
|
|
73
|
+
churn: Churn.collect(root: root_dir, since: options[:since]),
|
|
74
|
+
git_ref: Git.head_ref(root_dir),
|
|
75
|
+
generated_at: Time.now.utc.iso8601,
|
|
76
|
+
churn_window: window_label(options[:since]),
|
|
77
|
+
churn_since: explicit_since(options[:since])
|
|
78
|
+
)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def render(report, options)
|
|
82
|
+
limit = (options[:limit] && options[:limit] > 0) ? options[:limit] : nil
|
|
83
|
+
case options[:format]
|
|
84
|
+
when :json then Formatters::Json.render(report, limit: limit)
|
|
85
|
+
else Formatters::Table.render(report, limit: limit)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def window_label(since)
|
|
90
|
+
(since == Churn::DEFAULT_SINCE) ? "last 12 months" : "since #{since}"
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Only surface a concrete --since boundary when the user gave a fixed one;
|
|
94
|
+
# the relative default ("12 months ago") has no stable date.
|
|
95
|
+
def explicit_since(since)
|
|
96
|
+
(since == Churn::DEFAULT_SINCE) ? nil : since
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def note(options, message)
|
|
100
|
+
warn "moult: #{message}" unless options[:quiet]
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
data/lib/moult/cli.rb
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "optparse"
|
|
4
|
+
|
|
5
|
+
module Moult
|
|
6
|
+
# Thin command-line layer. Holds no analysis logic of its own: it parses
|
|
7
|
+
# options, delegates to the library, and hands the resulting {Report} to a
|
|
8
|
+
# formatter. Returns a process exit status (0 success, non-zero on error).
|
|
9
|
+
class CLI
|
|
10
|
+
# Subcommand => [require path, command class name]. Lazily required so a single
|
|
11
|
+
# command run never loads every analysis. Adding a slice is one entry here.
|
|
12
|
+
COMMANDS = {
|
|
13
|
+
"hotspots" => ["moult/cli/hotspots_command", :HotspotsCommand],
|
|
14
|
+
"deadcode" => ["moult/cli/dead_code_command", :DeadCodeCommand],
|
|
15
|
+
"coverage" => ["moult/cli/coverage_command", :CoverageCommand],
|
|
16
|
+
"duplication" => ["moult/cli/duplication_command", :DuplicationCommand],
|
|
17
|
+
"health" => ["moult/cli/health_command", :HealthCommand],
|
|
18
|
+
"boundaries" => ["moult/cli/boundaries_command", :BoundariesCommand],
|
|
19
|
+
"flags" => ["moult/cli/flags_command", :FlagsCommand],
|
|
20
|
+
"gate" => ["moult/cli/gate_command", :GateCommand]
|
|
21
|
+
}.freeze
|
|
22
|
+
|
|
23
|
+
# Tiny shared helpers for the command layer, so each command doesn't re-implement
|
|
24
|
+
# the same option plumbing. Lives on the always-loaded dispatcher.
|
|
25
|
+
module Support
|
|
26
|
+
module_function
|
|
27
|
+
|
|
28
|
+
# Resolve a PATH argument to its analysis root and the Ruby files under it:
|
|
29
|
+
# a directory analyses its tree, a single file analyses just itself.
|
|
30
|
+
# @return [Array(String, Array<String>)] [root_dir, files]
|
|
31
|
+
def discover(path)
|
|
32
|
+
if File.directory?(path)
|
|
33
|
+
[path, Discovery.ruby_files(path)]
|
|
34
|
+
else
|
|
35
|
+
[File.dirname(path), [path]]
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Build Rails entrypoint awareness, honouring a command's --[no-]rails option.
|
|
40
|
+
def build_rails(root_dir, files, enabled:)
|
|
41
|
+
return RailsConventions.new(rails: false) unless enabled
|
|
42
|
+
|
|
43
|
+
RailsConventions.build(root: root_dir, files: files)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def self.start(argv)
|
|
48
|
+
new.run(argv)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# @return [Integer] process exit status
|
|
52
|
+
def run(argv)
|
|
53
|
+
argv = argv.dup
|
|
54
|
+
|
|
55
|
+
# Top-level flags that short-circuit before subcommand dispatch.
|
|
56
|
+
case argv.first
|
|
57
|
+
when "--version", "-v"
|
|
58
|
+
puts Moult::VERSION
|
|
59
|
+
return 0
|
|
60
|
+
when nil, "--help", "-h"
|
|
61
|
+
puts usage
|
|
62
|
+
return 0
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
dispatch(argv.shift, argv)
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def dispatch(command, argv)
|
|
71
|
+
spec = COMMANDS[command]
|
|
72
|
+
unless spec
|
|
73
|
+
warn "moult: unknown command #{command.inspect}"
|
|
74
|
+
warn usage
|
|
75
|
+
return 1
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
require spec[0]
|
|
79
|
+
CLI.const_get(spec[1]).new.run(argv)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
public
|
|
83
|
+
|
|
84
|
+
def usage
|
|
85
|
+
<<~USAGE
|
|
86
|
+
moult #{Moult::VERSION} — codebase intelligence for Ruby
|
|
87
|
+
|
|
88
|
+
Usage:
|
|
89
|
+
moult hotspots [PATH] [options] Rank files by complexity x churn
|
|
90
|
+
moult deadcode [PATH] [options] List confidence-graded dead-code candidates
|
|
91
|
+
moult coverage [PATH] [options] Map symbols hot/cold/untracked from coverage
|
|
92
|
+
moult duplication [PATH] [options] List confidence-graded structural-clone groups
|
|
93
|
+
moult health [PATH] [options] Aggregate the analyses into a composite health score
|
|
94
|
+
moult boundaries [PATH] [options] List recorded architecture-boundary violations (packwerk)
|
|
95
|
+
moult flags [PATH] [options] Catalogue OpenFeature feature-flag references (usage)
|
|
96
|
+
moult gate [PATH] [options] Diff-aware PR risk gate: verdict over the changed code
|
|
97
|
+
moult --version Print version
|
|
98
|
+
moult --help Show this message
|
|
99
|
+
USAGE
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
data/lib/moult/clones.rb
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "flay"
|
|
4
|
+
require_relative "symbol_id"
|
|
5
|
+
|
|
6
|
+
module Moult
|
|
7
|
+
# The structural-clone detector — Moult's adapter over the +flay+ gem and the
|
|
8
|
+
# *only* file that names +Flay+. Everything downstream consumes the Moult-owned
|
|
9
|
+
# {Clones::Result} value object, never a flay type, so the backend is swappable
|
|
10
|
+
# (the "swap, not rewrite" invariant). This is the duplication-slice
|
|
11
|
+
# analogue of {Index} (rubydex) and {Coverage} (SimpleCov/stdlib).
|
|
12
|
+
#
|
|
13
|
+
# flay reports the *largest* duplicated S-expression node, grouping structurally
|
|
14
|
+
# equivalent code (literal values, variable/method/class names and whitespace are
|
|
15
|
+
# all ignored when hashing). Two distinctions it draws map onto our confidence
|
|
16
|
+
# grade:
|
|
17
|
+
#
|
|
18
|
+
# * +bonus+ truthy => the nodes are byte-for-byte IDENTICAL (names and all) —
|
|
19
|
+
# the clearest copy-paste signal. We surface this as +kind: :identical+.
|
|
20
|
+
# * +bonus+ nil => structurally SIMILAR (same shape, differing names/literals) —
|
|
21
|
+
# real duplication but weaker (could be parallel-by-design). +kind: :similar+.
|
|
22
|
+
#
|
|
23
|
+
# As of flay 2.14 the default parser is +Flay::NotRubyParser+, which parses with
|
|
24
|
+
# Prism (the same parser Moult uses); no parallel parser stack is pulled in.
|
|
25
|
+
module Clones
|
|
26
|
+
module_function
|
|
27
|
+
|
|
28
|
+
# One structurally-equivalent clone group. +node_type+ is flay's sexp type
|
|
29
|
+
# (e.g. :defn, :call, :class). +occurrences+ are the sites, in source order.
|
|
30
|
+
CloneSet = Struct.new(:structural_hash, :node_type, :kind, :mass, :occurrences)
|
|
31
|
+
|
|
32
|
+
# A single site of a clone group. +path+ is root-relative; +line+ is flay's
|
|
33
|
+
# reported start line (flay works at line granularity). +fuzzy+ is true only
|
|
34
|
+
# for a near-match node surfaced in fuzzy mode.
|
|
35
|
+
Occurrence = Struct.new(:path, :line, :fuzzy)
|
|
36
|
+
|
|
37
|
+
# The Moult-owned result of a detection run. Carries the provenance the
|
|
38
|
+
# contract records; +backend+/+backend_version+ originate here so "flay" stays
|
|
39
|
+
# isolated to this file.
|
|
40
|
+
Result = Struct.new(:sets, :backend, :backend_version, :min_mass, :fuzzy)
|
|
41
|
+
|
|
42
|
+
# @param root [String] absolute analysis root (occurrence paths are relative to it)
|
|
43
|
+
# @param files [Array<String>] absolute Ruby file paths to scan
|
|
44
|
+
# @param min_mass [Integer] flay's mass threshold; smaller fragments are ignored
|
|
45
|
+
# @param fuzzy [Boolean] also report near-matches (off by default: deterministic)
|
|
46
|
+
# @return [Result]
|
|
47
|
+
def detect(root:, files:, min_mass: DEFAULT_MIN_MASS, fuzzy: false)
|
|
48
|
+
sets = files.empty? ? [] : run_flay(files, min_mass, fuzzy).filter_map { |item| clone_set(item, root) }
|
|
49
|
+
Result.new(
|
|
50
|
+
sets: sets,
|
|
51
|
+
backend: "flay",
|
|
52
|
+
backend_version: backend_version,
|
|
53
|
+
min_mass: min_mass,
|
|
54
|
+
fuzzy: fuzzy
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# flay's own default mass threshold; small enough to catch a duplicated method,
|
|
59
|
+
# large enough to skip incidental structural rhymes.
|
|
60
|
+
DEFAULT_MIN_MASS = 16
|
|
61
|
+
|
|
62
|
+
def run_flay(files, min_mass, fuzzy)
|
|
63
|
+
flay = Flay.new(Flay.default_options.merge(mass: min_mass, fuzzy: fuzzy))
|
|
64
|
+
flay.process(*files)
|
|
65
|
+
flay.analyze
|
|
66
|
+
rescue => e
|
|
67
|
+
raise Moult::Error, "flay duplication scan failed: #{e.class}: #{e.message}"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def clone_set(item, root)
|
|
71
|
+
occurrences = item.locations.map do |loc|
|
|
72
|
+
Occurrence.new(
|
|
73
|
+
path: SymbolId.relative_path(loc.file, root),
|
|
74
|
+
line: loc.line,
|
|
75
|
+
fuzzy: !loc.fuzzy.nil?
|
|
76
|
+
)
|
|
77
|
+
end
|
|
78
|
+
CloneSet.new(
|
|
79
|
+
structural_hash: item.structural_hash,
|
|
80
|
+
node_type: item.name.to_s,
|
|
81
|
+
kind: item.bonus ? :identical : :similar,
|
|
82
|
+
mass: item.mass,
|
|
83
|
+
occurrences: occurrences
|
|
84
|
+
)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def backend_version
|
|
88
|
+
defined?(Flay::VERSION) ? Flay::VERSION : nil
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moult
|
|
4
|
+
# Builds the payload uploaded from CI to Moult Cloud out of a parsed
|
|
5
|
+
# `moult gate --format json` report.
|
|
6
|
+
#
|
|
7
|
+
# The gate report is already SOURCE-FREE by contract (a finding is
|
|
8
|
+
# category/path/symbol_id/line/value -- no code text), so this is not where
|
|
9
|
+
# "no source leaves the repo" is enforced; that is structural. This projection
|
|
10
|
+
# does two narrower jobs:
|
|
11
|
+
# 1. Allow-list the top-level keys -- defence-in-depth so a future formatter
|
|
12
|
+
# addition cannot silently exfiltrate a new field.
|
|
13
|
+
# 2. Normalise analysis.root to "." -- the raw value is the absolute local
|
|
14
|
+
# path, which leaks the developer's filesystem layout and is meaningless
|
|
15
|
+
# to the cloud (it derives the repo from the CI OIDC token).
|
|
16
|
+
# The result stays valid against schema/gate.schema.json (root remains a string).
|
|
17
|
+
module CloudUpload
|
|
18
|
+
TOP_LEVEL_KEYS = %w[
|
|
19
|
+
schema_version tool analysis policy verdict reasons summary rules
|
|
20
|
+
].freeze
|
|
21
|
+
|
|
22
|
+
def self.projection(report)
|
|
23
|
+
allowed = report.slice(*TOP_LEVEL_KEYS)
|
|
24
|
+
analysis = allowed["analysis"]
|
|
25
|
+
allowed["analysis"] = analysis.merge("root" => ".") if analysis.is_a?(Hash)
|
|
26
|
+
allowed
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moult
|
|
4
|
+
module Confidence
|
|
5
|
+
# The named, ordered adjusters {Confidence.score} applies on top of the base
|
|
6
|
+
# score. Each is a small value object so a single rule can be tested in
|
|
7
|
+
# isolation and the set can be extended without touching the scorer.
|
|
8
|
+
#
|
|
9
|
+
# Direction is encoded in +delta+: positive raises confidence-of-death,
|
|
10
|
+
# negative lowers it. A rule may instead (or also) impose a +cap+ — an upper
|
|
11
|
+
# bound on the final confidence — used when a factor means "we genuinely
|
|
12
|
+
# cannot be sure", e.g. an unresolved index. No rule ever removes a finding:
|
|
13
|
+
# consistent with "never assert certain death", uncertainty *lowers*
|
|
14
|
+
# confidence and records a reason, it never hides the candidate.
|
|
15
|
+
module Rules
|
|
16
|
+
# @!attribute applies [Proc] ctx -> Boolean
|
|
17
|
+
# @!attribute delta [Float] signed adjustment when it applies
|
|
18
|
+
# @!attribute cap [Float, nil] optional upper bound on final confidence
|
|
19
|
+
# @!attribute detail [String, Proc] human-readable reason (Proc gets ctx)
|
|
20
|
+
Rule = Struct.new(:name, :applies, :delta, :cap, :detail) do
|
|
21
|
+
def applies?(ctx)
|
|
22
|
+
applies.call(ctx)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def detail_for(ctx)
|
|
26
|
+
detail.respond_to?(:call) ? detail.call(ctx) : detail
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
DEFAULT_RULES = [
|
|
31
|
+
Rule.new(
|
|
32
|
+
name: :no_references,
|
|
33
|
+
applies: ->(c) { c.reference_count.to_i.zero? },
|
|
34
|
+
delta: 0.0,
|
|
35
|
+
detail: "no resolvable references found"
|
|
36
|
+
),
|
|
37
|
+
Rule.new(
|
|
38
|
+
name: :has_test_only_references,
|
|
39
|
+
applies: ->(c) { c.test_only },
|
|
40
|
+
delta: -0.2,
|
|
41
|
+
detail: "only referenced from test/spec files"
|
|
42
|
+
),
|
|
43
|
+
Rule.new(
|
|
44
|
+
name: :rails_entrypoint,
|
|
45
|
+
applies: ->(c) { !Array(c.rails_signals).empty? },
|
|
46
|
+
delta: -0.5,
|
|
47
|
+
detail: ->(c) { "Rails framework entrypoint: #{Array(c.rails_signals).map(&:detail).join("; ")}" }
|
|
48
|
+
),
|
|
49
|
+
Rule.new(
|
|
50
|
+
name: :dynamic_dispatch_present,
|
|
51
|
+
applies: ->(c) { c.dynamic_dispatch },
|
|
52
|
+
delta: -0.35,
|
|
53
|
+
detail: "dynamic dispatch (send/define_method/method_missing/const_get/eval) present in file"
|
|
54
|
+
),
|
|
55
|
+
# Constructors are invoked implicitly by `.new`, not by a call to
|
|
56
|
+
# `initialize`, so the index never records a reference. Universal Ruby
|
|
57
|
+
# (not Rails); kept narrow to this one near-certain implicit entrypoint.
|
|
58
|
+
Rule.new(
|
|
59
|
+
name: :implicit_constructor,
|
|
60
|
+
applies: ->(c) { c.kind == :method && c.name.to_s.end_with?("#initialize") },
|
|
61
|
+
delta: -0.4,
|
|
62
|
+
detail: "constructor invoked implicitly via .new"
|
|
63
|
+
),
|
|
64
|
+
# A method that overrides/implements an ancestor's method is reachable
|
|
65
|
+
# through that ancestor's interface (polymorphic dispatch) even with no
|
|
66
|
+
# by-name call site — the same signal a typed tool gets free from its
|
|
67
|
+
# inheritance graph. Covers framework hooks (visitor #visit_*, job
|
|
68
|
+
# #perform) when the ancestor's source is indexed.
|
|
69
|
+
Rule.new(
|
|
70
|
+
name: :overrides_ancestor,
|
|
71
|
+
applies: ->(c) { c.override_of },
|
|
72
|
+
delta: -0.4,
|
|
73
|
+
detail: ->(c) { "overrides #{c.override_of} (reachable via that interface)" }
|
|
74
|
+
),
|
|
75
|
+
Rule.new(
|
|
76
|
+
name: :private_unused,
|
|
77
|
+
applies: ->(c) { c.kind == :method && c.visibility == :private && c.reference_count.to_i.zero? },
|
|
78
|
+
delta: 0.1,
|
|
79
|
+
detail: "private method with no caller in the codebase"
|
|
80
|
+
),
|
|
81
|
+
Rule.new(
|
|
82
|
+
name: :public_api,
|
|
83
|
+
applies: ->(c) { c.kind == :method && c.visibility == :public },
|
|
84
|
+
delta: -0.1,
|
|
85
|
+
detail: "public method may be an external API entrypoint"
|
|
86
|
+
),
|
|
87
|
+
Rule.new(
|
|
88
|
+
name: :deprecated_marked,
|
|
89
|
+
applies: ->(c) { c.deprecated },
|
|
90
|
+
delta: 0.1,
|
|
91
|
+
detail: "marked deprecated"
|
|
92
|
+
),
|
|
93
|
+
Rule.new(
|
|
94
|
+
name: :index_unresolved,
|
|
95
|
+
applies: ->(c) { c.index_resolved == false },
|
|
96
|
+
delta: 0.0,
|
|
97
|
+
cap: 0.5,
|
|
98
|
+
detail: "index did not fully resolve; confidence capped"
|
|
99
|
+
),
|
|
100
|
+
# Phase 3 runtime evidence. Applied last so it is the headline reason and,
|
|
101
|
+
# for the rescue case, caps over every static signal. Methods only — a
|
|
102
|
+
# constant's line runs at load regardless of use, so the resolver returns
|
|
103
|
+
# :untracked for constants and neither rule fires.
|
|
104
|
+
#
|
|
105
|
+
# runtime-cold corroborates a static candidate: the body never executed in
|
|
106
|
+
# the supplied run. Additive (not a cap) — coverage can be incomplete or
|
|
107
|
+
# stale (stale-detection deferred), so it raises confidence, never asserts.
|
|
108
|
+
Rule.new(
|
|
109
|
+
name: :runtime_cold,
|
|
110
|
+
applies: ->(c) { c.runtime == :cold },
|
|
111
|
+
delta: 0.2,
|
|
112
|
+
detail: "never executed in the supplied coverage run (runtime-cold corroborates)"
|
|
113
|
+
),
|
|
114
|
+
# runtime-hot overrides: the symbol executed despite no resolvable static
|
|
115
|
+
# reference — the false positive static analysis missed (send / dynamic
|
|
116
|
+
# dispatch / metaprogramming). The cap drives it below default confidence
|
|
117
|
+
# gates while leaving a sliver, since coverage may be stale/incomplete.
|
|
118
|
+
Rule.new(
|
|
119
|
+
name: :runtime_hot,
|
|
120
|
+
applies: ->(c) { c.runtime == :hot },
|
|
121
|
+
delta: -0.6,
|
|
122
|
+
cap: 0.1,
|
|
123
|
+
detail: "executed at runtime (coverage) despite no static reference; rescued"
|
|
124
|
+
)
|
|
125
|
+
].freeze
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "span"
|
|
4
|
+
|
|
5
|
+
module Moult
|
|
6
|
+
# The per-finding confidence model — one of Moult's two protected APIs (the
|
|
7
|
+
# other being the JSON output contract). It answers a single, deliberately
|
|
8
|
+
# humble question: *how likely is this definition to actually be dead?* It
|
|
9
|
+
# never asserts certain death (Moult's core principle); the highest a finding
|
|
10
|
+
# can score is still a confidence, and every contributing factor is recorded
|
|
11
|
+
# as a {Reason} so the judgement is auditable.
|
|
12
|
+
#
|
|
13
|
+
# {score} is a pure function of a {Context} of already-gathered facts: no IO,
|
|
14
|
+
# no rubydex, no Rails detection happens here. That keeps it trivially
|
|
15
|
+
# unit-testable and lets each {Rules::Rule} be exercised in isolation. The fact
|
|
16
|
+
# gathering lives in {DeadCode}; the conventions live in {RailsConventions}.
|
|
17
|
+
module Confidence
|
|
18
|
+
CATEGORY = "dead_code"
|
|
19
|
+
|
|
20
|
+
# Base likelihood before any rule fires, keyed by [kind, visibility]. A
|
|
21
|
+
# private method with no caller is the strongest candidate (nothing outside
|
|
22
|
+
# its class can reach it); public symbols are weakest because they are the
|
|
23
|
+
# natural API surface and the place metaprogramming/Rails reach in.
|
|
24
|
+
BASE = {
|
|
25
|
+
[:method, :private] => 0.75,
|
|
26
|
+
[:method, :protected] => 0.6,
|
|
27
|
+
[:method, :public] => 0.4,
|
|
28
|
+
[:constant, :private] => 0.6,
|
|
29
|
+
[:constant, :public] => 0.5
|
|
30
|
+
}.freeze
|
|
31
|
+
DEFAULT_BASE = 0.45
|
|
32
|
+
|
|
33
|
+
# The facts a finding is scored from. Assembled by {DeadCode#gather_context}.
|
|
34
|
+
Context = Struct.new(
|
|
35
|
+
:symbol_id, :kind, :name, :span, :path,
|
|
36
|
+
:visibility, :reference_count, :test_only,
|
|
37
|
+
:rails_signals, # Array<RailsConventions::Signal>
|
|
38
|
+
:dynamic_dispatch, # Boolean: metaprogramming present in the owning file
|
|
39
|
+
:override_of, # String, nil: ancestor whose method this overrides
|
|
40
|
+
:deprecated, # Boolean
|
|
41
|
+
:index_resolved,
|
|
42
|
+
:runtime # Symbol, nil: :hot/:cold/:untracked from coverage (Phase 3)
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# One auditable contribution to a finding's confidence.
|
|
46
|
+
Reason = Struct.new(:rule, :delta, :detail) do
|
|
47
|
+
def to_h
|
|
48
|
+
{rule: rule.to_s, delta: delta, detail: detail}
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# A confidence-graded dead-code candidate. Carries its reasons so no claim
|
|
53
|
+
# is ever made without a recorded justification.
|
|
54
|
+
Finding = Struct.new(
|
|
55
|
+
:symbol_id, :kind, :name, :span, :path, :confidence, :category, :reasons, :runtime
|
|
56
|
+
) do
|
|
57
|
+
def to_h
|
|
58
|
+
{
|
|
59
|
+
symbol_id: symbol_id,
|
|
60
|
+
kind: kind.to_s,
|
|
61
|
+
name: name,
|
|
62
|
+
span: span.to_h,
|
|
63
|
+
confidence: confidence,
|
|
64
|
+
category: category,
|
|
65
|
+
runtime: runtime&.to_s,
|
|
66
|
+
reasons: reasons.map(&:to_h)
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
module_function
|
|
72
|
+
|
|
73
|
+
# @param ctx [Context]
|
|
74
|
+
# @param rules [Array<Rules::Rule>] injectable for isolated testing
|
|
75
|
+
# @return [Finding]
|
|
76
|
+
def score(ctx, rules: Rules::DEFAULT_RULES)
|
|
77
|
+
base = BASE.fetch([ctx.kind, ctx.visibility], DEFAULT_BASE)
|
|
78
|
+
reasons = [Reason.new(rule: :base_score, delta: base, detail: "base for #{ctx.kind}/#{ctx.visibility}")]
|
|
79
|
+
caps = []
|
|
80
|
+
|
|
81
|
+
rules.each do |rule|
|
|
82
|
+
next unless rule.applies?(ctx)
|
|
83
|
+
reasons << Reason.new(rule: rule.name, delta: rule.delta, detail: rule.detail_for(ctx))
|
|
84
|
+
caps << rule.cap if rule.cap
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
raw = reasons.sum(&:delta)
|
|
88
|
+
bounded = caps.empty? ? raw : [raw, caps.min].min
|
|
89
|
+
confidence = bounded.clamp(0.0, 1.0).round(2)
|
|
90
|
+
|
|
91
|
+
Finding.new(
|
|
92
|
+
symbol_id: ctx.symbol_id,
|
|
93
|
+
kind: ctx.kind,
|
|
94
|
+
name: ctx.name,
|
|
95
|
+
span: ctx.span,
|
|
96
|
+
path: ctx.path,
|
|
97
|
+
confidence: confidence,
|
|
98
|
+
category: CATEGORY,
|
|
99
|
+
reasons: reasons,
|
|
100
|
+
runtime: ctx.runtime
|
|
101
|
+
)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
require_relative "confidence/rules"
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moult
|
|
4
|
+
module Coverage
|
|
5
|
+
# The line->symbol resolver: turns line-keyed coverage into a per-symbol
|
|
6
|
+
# runtime classification. This is the one genuinely novel
|
|
7
|
+
# component, so its rules are precise and fixture-pinned — drift is a bug,
|
|
8
|
+
# exactly like the ABC metric.
|
|
9
|
+
#
|
|
10
|
+
# For a method definition spanning +span.start_line..span.end_line+ in a
|
|
11
|
+
# tracked file, it inspects the *body* lines and returns:
|
|
12
|
+
#
|
|
13
|
+
# * +:hot+ — at least one executable body line was executed
|
|
14
|
+
# * +:cold+ — the file is tracked, body has executable lines, none ran
|
|
15
|
+
# * +:untracked+ — no usable signal (see below)
|
|
16
|
+
#
|
|
17
|
+
# The defining rule is that the +def+ signature line is EXCLUDED: stdlib
|
|
18
|
+
# +Coverage+ counts it at definition (load) time, not per call, so counting
|
|
19
|
+
# it would mark every loaded method hot. Only the body reflects real calls.
|
|
20
|
+
module Resolver
|
|
21
|
+
module_function
|
|
22
|
+
|
|
23
|
+
# @param dataset [Dataset]
|
|
24
|
+
# @param path [String] root-relative path (a symbol_id component)
|
|
25
|
+
# @param span [Span] 1-based definition span
|
|
26
|
+
# @param kind [Symbol] :method or :constant
|
|
27
|
+
# @return [Symbol] :hot, :cold, or :untracked
|
|
28
|
+
def classify(dataset, path:, span:, kind:)
|
|
29
|
+
# A constant's only line is its assignment, executed at load regardless
|
|
30
|
+
# of whether the constant is ever read — so it carries no runtime signal.
|
|
31
|
+
return :untracked unless kind == :method
|
|
32
|
+
lines = dataset.entries[path]
|
|
33
|
+
return :untracked unless lines
|
|
34
|
+
|
|
35
|
+
executable = body_values(lines, span)
|
|
36
|
+
# No executable body line to judge: one-line methods (def f = x), empty
|
|
37
|
+
# methods, abstract stubs. Their only line is the def line (load-time
|
|
38
|
+
# coverage), so they are genuinely unclassifiable in :lines mode.
|
|
39
|
+
return :untracked if executable.empty?
|
|
40
|
+
|
|
41
|
+
executable.any?(&:positive?) ? :hot : :cold
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Coverage values for the executable (non-nil) body lines, excluding the
|
|
45
|
+
# +def+ signature line at +span.start_line+. The +end+ line and blanks are
|
|
46
|
+
# nil and so fall out naturally.
|
|
47
|
+
# @return [Array<Integer>]
|
|
48
|
+
def body_values(lines, span)
|
|
49
|
+
first = span.start_line + 1
|
|
50
|
+
last = span.end_line
|
|
51
|
+
return [] if first > last
|
|
52
|
+
(first..last).filter_map { |line| lines[line - 1] }
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|