moult 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +44 -0
- data/LICENSE.txt +201 -0
- data/NOTICE +4 -0
- data/README.md +331 -0
- data/exe/moult +6 -0
- data/lib/moult/abc.rb +133 -0
- data/lib/moult/boundaries/packwerk.rb +114 -0
- data/lib/moult/boundaries/severity.rb +87 -0
- data/lib/moult/boundaries.rb +77 -0
- data/lib/moult/boundaries_report.rb +106 -0
- data/lib/moult/churn.rb +52 -0
- data/lib/moult/cli/boundaries_command.rb +83 -0
- data/lib/moult/cli/coverage_command.rb +101 -0
- data/lib/moult/cli/dead_code_command.rb +112 -0
- data/lib/moult/cli/duplication_command.rb +92 -0
- data/lib/moult/cli/flags_command.rb +95 -0
- data/lib/moult/cli/gate_command.rb +113 -0
- data/lib/moult/cli/health_command.rb +117 -0
- data/lib/moult/cli/hotspots_command.rb +104 -0
- data/lib/moult/cli.rb +102 -0
- data/lib/moult/clones.rb +91 -0
- data/lib/moult/cloud_upload.rb +29 -0
- data/lib/moult/confidence/rules.rb +128 -0
- data/lib/moult/confidence.rb +106 -0
- data/lib/moult/coverage/resolver.rb +56 -0
- data/lib/moult/coverage.rb +176 -0
- data/lib/moult/coverage_report.rb +98 -0
- data/lib/moult/dead_code.rb +119 -0
- data/lib/moult/dead_code_report.rb +65 -0
- data/lib/moult/diff.rb +177 -0
- data/lib/moult/discovery.rb +38 -0
- data/lib/moult/duplication/confidence.rb +92 -0
- data/lib/moult/duplication.rb +112 -0
- data/lib/moult/duplication_report.rb +89 -0
- data/lib/moult/flag_scanner.rb +150 -0
- data/lib/moult/flags/classification.rb +79 -0
- data/lib/moult/flags/snapshot.rb +162 -0
- data/lib/moult/flags/staleness.rb +145 -0
- data/lib/moult/flags.rb +131 -0
- data/lib/moult/flags_report.rb +136 -0
- data/lib/moult/formatters/boundaries_json.rb +20 -0
- data/lib/moult/formatters/boundaries_table.rb +53 -0
- data/lib/moult/formatters/coverage_json.rb +19 -0
- data/lib/moult/formatters/coverage_table.rb +60 -0
- data/lib/moult/formatters/dead_code_json.rb +20 -0
- data/lib/moult/formatters/dead_code_table.rb +66 -0
- data/lib/moult/formatters/duplication_json.rb +20 -0
- data/lib/moult/formatters/duplication_table.rb +55 -0
- data/lib/moult/formatters/flags_json.rb +20 -0
- data/lib/moult/formatters/flags_table.rb +76 -0
- data/lib/moult/formatters/gate_github.rb +52 -0
- data/lib/moult/formatters/gate_json.rb +20 -0
- data/lib/moult/formatters/gate_message.rb +19 -0
- data/lib/moult/formatters/gate_sarif.rb +78 -0
- data/lib/moult/formatters/gate_table.rb +71 -0
- data/lib/moult/formatters/health_json.rb +20 -0
- data/lib/moult/formatters/health_table.rb +80 -0
- data/lib/moult/formatters/json.rb +23 -0
- data/lib/moult/formatters/table.rb +70 -0
- data/lib/moult/formatters/text_table.rb +39 -0
- data/lib/moult/gate/config.rb +55 -0
- data/lib/moult/gate/evaluation.rb +172 -0
- data/lib/moult/gate/policy.rb +103 -0
- data/lib/moult/gate.rb +199 -0
- data/lib/moult/gate_report.rb +97 -0
- data/lib/moult/git.rb +83 -0
- data/lib/moult/health/score.rb +291 -0
- data/lib/moult/health.rb +320 -0
- data/lib/moult/health_report.rb +97 -0
- data/lib/moult/index.rb +228 -0
- data/lib/moult/parser.rb +101 -0
- data/lib/moult/rails_conventions.rb +124 -0
- data/lib/moult/report.rb +114 -0
- data/lib/moult/scoring.rb +82 -0
- data/lib/moult/span.rb +17 -0
- data/lib/moult/symbol_id.rb +30 -0
- data/lib/moult/symbol_scanner.rb +100 -0
- data/lib/moult/version.rb +5 -0
- data/lib/moult.rb +84 -0
- data/schema/boundaries.schema.json +125 -0
- data/schema/common.schema.json +76 -0
- data/schema/coverage.schema.json +83 -0
- data/schema/deadcode.schema.json +106 -0
- data/schema/duplication.schema.json +128 -0
- data/schema/flags.schema.json +157 -0
- data/schema/gate.schema.json +165 -0
- data/schema/health.schema.json +157 -0
- data/schema/hotspots.schema.json +106 -0
- metadata +185 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moult
|
|
4
|
+
module Duplication
|
|
5
|
+
# The per-finding confidence model for duplication — the duplication slice's
|
|
6
|
+
# realisation of Moult's protected confidence API. It answers a deliberately
|
|
7
|
+
# humble question: *how confident are we that this clone group is genuine,
|
|
8
|
+
# consolidatable duplication* rather than an incidental structural rhyme? It
|
|
9
|
+
# never asserts certainty; every contributing factor is recorded as a {Reason}
|
|
10
|
+
# so the judgement is auditable.
|
|
11
|
+
#
|
|
12
|
+
# {assess} is a pure function of the signals flay hands us (already extracted
|
|
13
|
+
# by {Clones}): no IO, no flay objects. That keeps it trivially unit-testable
|
|
14
|
+
# and lets the scoring be pinned against hand-built inputs — drift is a bug,
|
|
15
|
+
# the same treatment {ABC} and the coverage {Resolver} get.
|
|
16
|
+
module Confidence
|
|
17
|
+
CATEGORY = "duplication"
|
|
18
|
+
|
|
19
|
+
# Base likelihood before any adjustment, keyed by kind. An *identical*
|
|
20
|
+
# (byte-for-byte) match is near-certain duplication; a merely *similar*
|
|
21
|
+
# match (names/literals differ) is weaker and could be parallel-by-design.
|
|
22
|
+
BASE = {identical: 0.6, similar: 0.45}.freeze
|
|
23
|
+
|
|
24
|
+
# A structurally-similar (not identical) match never reaches high confidence:
|
|
25
|
+
# shared shape is not proof of shared intent.
|
|
26
|
+
SIMILAR_CAP = 0.75
|
|
27
|
+
|
|
28
|
+
# Larger duplicated structures are far less likely to be coincidental.
|
|
29
|
+
MASS_LARGE = 100
|
|
30
|
+
MASS_MEDIUM = 40
|
|
31
|
+
|
|
32
|
+
# sexp node types that are whole, cleanly-extractable definitions. A
|
|
33
|
+
# duplicated whole method/class is the least ambiguous "consolidate me".
|
|
34
|
+
WHOLE_DEFINITION = %w[defn defs class module sclass].freeze
|
|
35
|
+
|
|
36
|
+
# One auditable contribution to a finding's confidence. Mirrors the shared
|
|
37
|
+
# rule/delta/detail reason shape used across Moult's contracts; kept local so
|
|
38
|
+
# the duplication slice does not couple to the dead-code Confidence module.
|
|
39
|
+
Reason = Struct.new(:rule, :delta, :detail) do
|
|
40
|
+
def to_h
|
|
41
|
+
{rule: rule.to_s, delta: delta, detail: detail}
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# The graded result: a confidence in [0, 1] and the reasons behind it.
|
|
46
|
+
Assessment = Struct.new(:confidence, :reasons)
|
|
47
|
+
|
|
48
|
+
module_function
|
|
49
|
+
|
|
50
|
+
# @param kind [Symbol] :identical or :similar
|
|
51
|
+
# @param mass [Integer] flay's mass for the duplicated node
|
|
52
|
+
# @param occurrence_count [Integer] number of sites (>= 2)
|
|
53
|
+
# @param node_type [String] flay sexp type, e.g. "defn", "call"
|
|
54
|
+
# @return [Assessment]
|
|
55
|
+
def assess(kind:, mass:, occurrence_count:, node_type:)
|
|
56
|
+
base = BASE.fetch(kind, BASE[:similar])
|
|
57
|
+
reasons = [Reason.new(rule: :base_score, delta: base, detail: base_detail(kind))]
|
|
58
|
+
|
|
59
|
+
mass_contribution = mass_reason(mass)
|
|
60
|
+
reasons << mass_contribution if mass_contribution
|
|
61
|
+
reasons << Reason.new(rule: :many_occurrences, delta: 0.07, detail: "duplicated across #{occurrence_count} locations") if occurrence_count >= 3
|
|
62
|
+
reasons << Reason.new(rule: :whole_definition, delta: 0.08, detail: "duplicates a whole #{node_type}") if WHOLE_DEFINITION.include?(node_type)
|
|
63
|
+
|
|
64
|
+
raw = reasons.sum(&:delta)
|
|
65
|
+
if kind == :similar && raw > SIMILAR_CAP
|
|
66
|
+
reasons << Reason.new(rule: :similar_cap, delta: 0.0, detail: "structural similarity is not proof of duplication; capped at #{SIMILAR_CAP}")
|
|
67
|
+
raw = SIMILAR_CAP
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
Assessment.new(confidence: raw.clamp(0.0, 1.0).round(2), reasons: reasons)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def base_detail(kind)
|
|
74
|
+
if kind == :identical
|
|
75
|
+
"identical structural match (byte-for-byte)"
|
|
76
|
+
else
|
|
77
|
+
"structurally-similar match (names/literals differ)"
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Bucketed so the contribution is stable and pinnable regardless of the
|
|
82
|
+
# run's configurable --min-mass.
|
|
83
|
+
def mass_reason(mass)
|
|
84
|
+
if mass >= MASS_LARGE
|
|
85
|
+
Reason.new(rule: :large_mass, delta: 0.2, detail: "large duplicated mass (#{mass})")
|
|
86
|
+
elsif mass >= MASS_MEDIUM
|
|
87
|
+
Reason.new(rule: :medium_mass, delta: 0.1, detail: "moderate duplicated mass (#{mass})")
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moult
|
|
4
|
+
# Orchestrates the duplication analysis: it asks the {Clones} adapter (flay) for
|
|
5
|
+
# every structural clone group, attributes each occurrence to its enclosing
|
|
6
|
+
# method (best-effort, for the cross-analysis join), and grades each group
|
|
7
|
+
# through the pure {Duplication::Confidence} model. The result is a ranked
|
|
8
|
+
# {DuplicationReport} of confidence-graded clone groups — never an assertion
|
|
9
|
+
# that duplication is certainly removable.
|
|
10
|
+
#
|
|
11
|
+
# This is the only layer that knows where the facts come from; {Confidence}
|
|
12
|
+
# stays a pure function of the extracted signals so it can be pinned in
|
|
13
|
+
# isolation.
|
|
14
|
+
module Duplication
|
|
15
|
+
module_function
|
|
16
|
+
|
|
17
|
+
# @param root [String] absolute analysis root
|
|
18
|
+
# @param files [Array<String>] absolute Ruby file paths to scan
|
|
19
|
+
# @param min_mass [Integer] flay mass threshold; smaller fragments are ignored
|
|
20
|
+
# @param fuzzy [Boolean] include near-matches (off by default)
|
|
21
|
+
# @param min_confidence [Float] drop findings below this confidence
|
|
22
|
+
# @return [DuplicationReport]
|
|
23
|
+
def build_report(root:, files:, min_mass: Clones::DEFAULT_MIN_MASS, fuzzy: false,
|
|
24
|
+
min_confidence: 0.0, git_ref: nil, generated_at: nil)
|
|
25
|
+
clones = Clones.detect(root: root, files: files, min_mass: min_mass, fuzzy: fuzzy)
|
|
26
|
+
methods = MethodIndex.new(root: root, files: files)
|
|
27
|
+
|
|
28
|
+
findings = clones.sets.map { |set| finding_for(set, methods) }
|
|
29
|
+
findings.select! { |f| f.confidence >= min_confidence }
|
|
30
|
+
# Highest-confidence first, then heaviest, with node type as a deterministic
|
|
31
|
+
# tie-break so output is stable across runs.
|
|
32
|
+
findings.sort_by! { |f| [-f.confidence, -f.mass, f.node_type] }
|
|
33
|
+
|
|
34
|
+
DuplicationReport.new(
|
|
35
|
+
root: root,
|
|
36
|
+
findings: findings,
|
|
37
|
+
git_ref: git_ref,
|
|
38
|
+
generated_at: generated_at,
|
|
39
|
+
backend: clones.backend,
|
|
40
|
+
backend_version: clones.backend_version,
|
|
41
|
+
min_mass: clones.min_mass,
|
|
42
|
+
fuzzy: clones.fuzzy
|
|
43
|
+
)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def finding_for(set, methods)
|
|
47
|
+
assessment = Confidence.assess(
|
|
48
|
+
kind: set.kind,
|
|
49
|
+
mass: set.mass,
|
|
50
|
+
occurrence_count: set.occurrences.size,
|
|
51
|
+
node_type: set.node_type
|
|
52
|
+
)
|
|
53
|
+
occurrences = set.occurrences.map do |occ|
|
|
54
|
+
DuplicationReport::Occurrence.new(
|
|
55
|
+
symbol_id: methods.symbol_id_at(occ.path, occ.line),
|
|
56
|
+
path: occ.path,
|
|
57
|
+
line: occ.line,
|
|
58
|
+
fuzzy: occ.fuzzy
|
|
59
|
+
)
|
|
60
|
+
end
|
|
61
|
+
DuplicationReport::Finding.new(
|
|
62
|
+
confidence: assessment.confidence,
|
|
63
|
+
kind: set.kind,
|
|
64
|
+
node_type: set.node_type,
|
|
65
|
+
mass: set.mass,
|
|
66
|
+
reasons: assessment.reasons,
|
|
67
|
+
occurrences: occurrences
|
|
68
|
+
)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Best-effort line -> enclosing-method resolution, reusing the Prism {Parser}
|
|
72
|
+
# so the minted ids are byte-identical to the hotspots/deadcode join keys.
|
|
73
|
+
# flay reports a clone's start line only; we attribute it to the innermost
|
|
74
|
+
# method whose span contains that line. Files are parsed lazily and memoised;
|
|
75
|
+
# a fragment outside any method (top-level code, a whole class) resolves to nil.
|
|
76
|
+
class MethodIndex
|
|
77
|
+
def initialize(root:, files:)
|
|
78
|
+
@abs_by_rel = files.to_h { |abs| [SymbolId.relative_path(abs, root), abs] }
|
|
79
|
+
@cache = {}
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# @return [String, nil] symbol_id of the innermost containing method, or nil
|
|
83
|
+
def symbol_id_at(rel_path, line)
|
|
84
|
+
method = enclosing_method(rel_path, line)
|
|
85
|
+
return nil unless method
|
|
86
|
+
SymbolId.for(path: rel_path, start_line: method.span.start_line, fqname: method.name)
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
def enclosing_method(rel_path, line)
|
|
92
|
+
methods_for(rel_path)
|
|
93
|
+
.select { |m| line.between?(m.span.start_line, m.span.end_line) }
|
|
94
|
+
.min_by { |m| m.span.end_line - m.span.start_line }
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def methods_for(rel_path)
|
|
98
|
+
@cache[rel_path] ||= parse(rel_path)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def parse(rel_path)
|
|
102
|
+
abs = @abs_by_rel[rel_path]
|
|
103
|
+
return [] unless abs
|
|
104
|
+
Parser.parse_file(abs)
|
|
105
|
+
rescue
|
|
106
|
+
[]
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
require_relative "duplication/confidence"
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moult
|
|
4
|
+
# The serialized result model for `moult duplication` (schema/duplication.schema.json),
|
|
5
|
+
# sibling to {DeadCodeReport} and {CoverageReport}. It owns the JSON envelope and
|
|
6
|
+
# leaves the other protected contracts untouched. Each {Finding} is a confidence-
|
|
7
|
+
# graded clone group carrying its {Reason}s and {Occurrence}s; nothing here asserts
|
|
8
|
+
# that duplication is certainly removable.
|
|
9
|
+
class DuplicationReport
|
|
10
|
+
# Bump only on a breaking change to the serialized shape.
|
|
11
|
+
SCHEMA_VERSION = 1
|
|
12
|
+
|
|
13
|
+
# One site of a clone group. +symbol_id+ is the best-effort enclosing method
|
|
14
|
+
# (the shared cross-analysis join key), nil when the fragment is not inside a
|
|
15
|
+
# known method (top-level code, or a duplicated whole class). +line+ is flay's
|
|
16
|
+
# reported start line — line granularity is all flay provides.
|
|
17
|
+
Occurrence = Struct.new(:symbol_id, :path, :line, :fuzzy) do
|
|
18
|
+
def to_h
|
|
19
|
+
{symbol_id: symbol_id, path: path, line: line, fuzzy: fuzzy}
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# A confidence-graded clone group. Carries its reasons so no claim is made
|
|
24
|
+
# without a recorded justification.
|
|
25
|
+
Finding = Struct.new(:confidence, :kind, :node_type, :mass, :reasons, :occurrences) do
|
|
26
|
+
def to_h
|
|
27
|
+
{
|
|
28
|
+
category: Duplication::Confidence::CATEGORY,
|
|
29
|
+
confidence: confidence,
|
|
30
|
+
kind: kind.to_s,
|
|
31
|
+
node_type: node_type,
|
|
32
|
+
mass: mass,
|
|
33
|
+
reasons: reasons.map(&:to_h),
|
|
34
|
+
occurrences: occurrences.map(&:to_h)
|
|
35
|
+
}
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
attr_reader :root, :findings, :git_ref, :generated_at,
|
|
40
|
+
:backend, :backend_version, :min_mass, :fuzzy
|
|
41
|
+
|
|
42
|
+
# @param root [String] absolute analysis root
|
|
43
|
+
# @param findings [Array<Finding>] ranked, highest-confidence first
|
|
44
|
+
# @param backend [String] detector backend name (e.g. "flay")
|
|
45
|
+
# @param backend_version [String, nil] backend gem version
|
|
46
|
+
# @param min_mass [Integer] the mass threshold used
|
|
47
|
+
# @param fuzzy [Boolean] whether near-matches were included
|
|
48
|
+
def initialize(root:, findings:, git_ref: nil, generated_at: nil,
|
|
49
|
+
backend: "flay", backend_version: nil, min_mass: nil, fuzzy: false)
|
|
50
|
+
@root = root
|
|
51
|
+
@findings = findings
|
|
52
|
+
@git_ref = git_ref
|
|
53
|
+
@generated_at = generated_at
|
|
54
|
+
@backend = backend
|
|
55
|
+
@backend_version = backend_version
|
|
56
|
+
@min_mass = min_mass
|
|
57
|
+
@fuzzy = fuzzy
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# @return [Hash] aggregate counts across all clone groups
|
|
61
|
+
def summary
|
|
62
|
+
{
|
|
63
|
+
sets: findings.size,
|
|
64
|
+
occurrences: findings.sum { |f| f.occurrences.size },
|
|
65
|
+
total_mass: findings.sum(&:mass)
|
|
66
|
+
}
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def to_h
|
|
70
|
+
{
|
|
71
|
+
schema_version: SCHEMA_VERSION,
|
|
72
|
+
tool: {name: "moult", version: Moult::VERSION},
|
|
73
|
+
analysis: {
|
|
74
|
+
root: root,
|
|
75
|
+
git_ref: git_ref,
|
|
76
|
+
generated_at: generated_at,
|
|
77
|
+
detector: {
|
|
78
|
+
backend: backend,
|
|
79
|
+
backend_version: backend_version,
|
|
80
|
+
min_mass: min_mass,
|
|
81
|
+
fuzzy: fuzzy
|
|
82
|
+
}
|
|
83
|
+
},
|
|
84
|
+
summary: summary,
|
|
85
|
+
findings: findings.map(&:to_h)
|
|
86
|
+
}
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "prism"
|
|
4
|
+
|
|
5
|
+
module Moult
|
|
6
|
+
# The OpenFeature flag-evaluation SCANNER — the *only* file that knows the
|
|
7
|
+
# OpenFeature client call shape, so a future SDK or provider shift is a swap,
|
|
8
|
+
# not a rewrite (the same isolation {Clones} gives flay and {Boundaries::Packwerk}
|
|
9
|
+
# gives packwerk). It is a pure Prism scan over source, shaped like
|
|
10
|
+
# {SymbolScanner}: there is no external-tool output to ingest here.
|
|
11
|
+
#
|
|
12
|
+
# OpenFeature (github.com/open-feature/ruby-sdk, gem +openfeature-sdk+) is the
|
|
13
|
+
# provider-agnostic feature-flag *standard*: a client is built via
|
|
14
|
+
# +OpenFeature::SDK.build_client+ and flags are evaluated with
|
|
15
|
+
# +client.fetch_<type>_value(flag_key:, default_value:, evaluation_context:)+
|
|
16
|
+
# (and the +fetch_<type>_details+ variants). Scanning that client surface catches
|
|
17
|
+
# flag usage behind *any* provider (flagd, LaunchDarkly, GO Feature Flag, ...).
|
|
18
|
+
#
|
|
19
|
+
# We detect by AST only and take NO dependency on the openfeature-sdk gem — we
|
|
20
|
+
# read the call shape, we never call the SDK. A call is an OpenFeature evaluation
|
|
21
|
+
# when its method name is one of the known +fetch_*+ names AND it passes a
|
|
22
|
+
# +flag_key:+ keyword argument (the keyword uniquely disambiguates it from any
|
|
23
|
+
# unrelated same-named method, since the receiver is a runtime value).
|
|
24
|
+
module FlagScanner
|
|
25
|
+
# Provenance recorded in the report's `analysis.scanner` block. The swap point:
|
|
26
|
+
# retarget these (and {METHOD_VALUE_TYPES}) for a different SDK/standard.
|
|
27
|
+
TARGET = "openfeature"
|
|
28
|
+
SDK_GEM = "openfeature-sdk"
|
|
29
|
+
CLIENT_BUILDER = "OpenFeature::SDK.build_client"
|
|
30
|
+
|
|
31
|
+
# The fetch_<type>_(value|details) method names mapped to the contract's
|
|
32
|
+
# value_type. integer/float collapse to "number" (the value_type enum is
|
|
33
|
+
# coarser than the SDK's fetch types); the precise method name is kept on each
|
|
34
|
+
# call site so nothing is lost.
|
|
35
|
+
FETCH_TYPES = {
|
|
36
|
+
"boolean" => "boolean",
|
|
37
|
+
"string" => "string",
|
|
38
|
+
"number" => "number",
|
|
39
|
+
"integer" => "number",
|
|
40
|
+
"float" => "number",
|
|
41
|
+
"object" => "object"
|
|
42
|
+
}.freeze
|
|
43
|
+
|
|
44
|
+
METHOD_VALUE_TYPES = FETCH_TYPES.each_with_object({}) do |(fetch_type, value_type), acc|
|
|
45
|
+
acc["fetch_#{fetch_type}_value"] = value_type
|
|
46
|
+
acc["fetch_#{fetch_type}_details"] = value_type
|
|
47
|
+
end.freeze
|
|
48
|
+
|
|
49
|
+
# The literal default_value node types we render. A non-literal default (a
|
|
50
|
+
# variable, method call, array/hash) renders to nil — recorded as "no observed
|
|
51
|
+
# literal default" rather than guessed.
|
|
52
|
+
LITERAL_NODES = [
|
|
53
|
+
Prism::StringNode, Prism::SymbolNode, Prism::IntegerNode,
|
|
54
|
+
Prism::FloatNode, Prism::TrueNode, Prism::FalseNode, Prism::NilNode
|
|
55
|
+
].freeze
|
|
56
|
+
|
|
57
|
+
# One detected OpenFeature flag-evaluation call site. +flag_key+ is nil when the
|
|
58
|
+
# key is not a string/symbol literal (a *dynamic* reference: counted by the
|
|
59
|
+
# report, never catalogued, since a static scan cannot resolve it).
|
|
60
|
+
CallSite = Struct.new(:flag_key, :value_type, :default_value, :method_name, :path, :line)
|
|
61
|
+
|
|
62
|
+
module_function
|
|
63
|
+
|
|
64
|
+
# @param path [String] file to read
|
|
65
|
+
# @param rel_path [String] root-relative path stamped onto each call site
|
|
66
|
+
# @return [Array<CallSite>]
|
|
67
|
+
def scan_file(path, rel_path)
|
|
68
|
+
scan_source(File.read(path), rel_path)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# @param source [String] Ruby source
|
|
72
|
+
# @param path [String] path stamped onto each call site
|
|
73
|
+
# @return [Array<CallSite>]
|
|
74
|
+
def scan_source(source, path)
|
|
75
|
+
result = Prism.parse(source)
|
|
76
|
+
visitor = Visitor.new(path)
|
|
77
|
+
result.value.accept(visitor)
|
|
78
|
+
visitor.call_sites
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Walks the AST collecting OpenFeature flag-evaluation calls. No namespace
|
|
82
|
+
# tracking is needed: line→enclosing-method attribution is the orchestration's
|
|
83
|
+
# job (a {Flags::MethodIndex}, reusing the Prism {Parser}), keyed on the line
|
|
84
|
+
# recorded here.
|
|
85
|
+
class Visitor < Prism::Visitor
|
|
86
|
+
attr_reader :call_sites
|
|
87
|
+
|
|
88
|
+
def initialize(path)
|
|
89
|
+
@path = path
|
|
90
|
+
@call_sites = []
|
|
91
|
+
super()
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def visit_call_node(node)
|
|
95
|
+
capture(node)
|
|
96
|
+
super
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
private
|
|
100
|
+
|
|
101
|
+
def capture(node)
|
|
102
|
+
value_type = METHOD_VALUE_TYPES[node.name.to_s]
|
|
103
|
+
return unless value_type
|
|
104
|
+
|
|
105
|
+
kwargs = keyword_arguments(node)
|
|
106
|
+
return unless kwargs.key?("flag_key")
|
|
107
|
+
|
|
108
|
+
@call_sites << CallSite.new(
|
|
109
|
+
literal_key(kwargs["flag_key"]),
|
|
110
|
+
value_type,
|
|
111
|
+
literal_default(kwargs["default_value"]),
|
|
112
|
+
node.name.to_s,
|
|
113
|
+
@path,
|
|
114
|
+
node.location.start_line
|
|
115
|
+
)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Map of keyword name (String) => value node, for the trailing keyword hash.
|
|
119
|
+
def keyword_arguments(node)
|
|
120
|
+
args = node.arguments&.arguments || []
|
|
121
|
+
hash = args.find { |a| a.is_a?(Prism::KeywordHashNode) || a.is_a?(Prism::HashNode) }
|
|
122
|
+
return {} unless hash
|
|
123
|
+
|
|
124
|
+
hash.elements.each_with_object({}) do |assoc, acc|
|
|
125
|
+
next unless assoc.is_a?(Prism::AssocNode)
|
|
126
|
+
key = assoc.key
|
|
127
|
+
acc[key.unescaped] = assoc.value if key.is_a?(Prism::SymbolNode)
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# The flag key when it is a string/symbol literal; nil otherwise (dynamic).
|
|
132
|
+
def literal_key(node)
|
|
133
|
+
case node
|
|
134
|
+
when Prism::StringNode, Prism::SymbolNode then node.unescaped
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# A string rendering of a literal default value, or nil when not a literal.
|
|
139
|
+
def literal_default(node)
|
|
140
|
+
return nil unless node && LITERAL_NODES.any? { |k| node.is_a?(k) }
|
|
141
|
+
|
|
142
|
+
case node
|
|
143
|
+
when Prism::StringNode then node.unescaped
|
|
144
|
+
when Prism::SymbolNode then ":#{node.unescaped}"
|
|
145
|
+
else node.slice
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moult
|
|
4
|
+
module Flags
|
|
5
|
+
# The per-finding model for feature flags — this slice's realisation of Moult's
|
|
6
|
+
# protected per-finding API. Like a packwerk boundary violation (see
|
|
7
|
+
# {Boundaries::Severity}), a flag *reference* is a recorded FACT, not a
|
|
8
|
+
# probabilistic candidate: the scanner saw the call site. So we never manufacture
|
|
9
|
+
# a fake confidence (the finding's +confidence+ is null); the per-finding signal
|
|
10
|
+
# is a categorical CLASSIFICATION instead — the flag's value_type, how many times
|
|
11
|
+
# it is referenced, and the literal default value(s) observed.
|
|
12
|
+
#
|
|
13
|
+
# The genuinely confidence-graded judgement — *staleness* (is this flag dead /
|
|
14
|
+
# obsolete?) — needs a live OpenFeature provider to know which keys still exist,
|
|
15
|
+
# and is deferred (like the Coverband/Flipper live stores). So the humility
|
|
16
|
+
# invariant holds in this register too: a static scan can never prove a flag is
|
|
17
|
+
# unused (it may be referenced dynamically, via provider config, or from outside
|
|
18
|
+
# the codebase), and nothing here says it is.
|
|
19
|
+
#
|
|
20
|
+
# {classify} is a pure function of the observed signals — no IO, no Prism nodes —
|
|
21
|
+
# so it is pinned against hand-built inputs exactly like {ABC}, the coverage
|
|
22
|
+
# {Resolver}, the duplication {Confidence} model, and {Boundaries::Severity}.
|
|
23
|
+
# Drift is a bug.
|
|
24
|
+
module Classification
|
|
25
|
+
CATEGORY = "feature_flag"
|
|
26
|
+
|
|
27
|
+
# The value-type classification. boolean/string/number/object are read from the
|
|
28
|
+
# fetch_<type>_* method; +unknown+ is reserved for a flag referenced with more
|
|
29
|
+
# than one type (an ambiguity we record rather than resolve).
|
|
30
|
+
VALUE_TYPES = %w[boolean string number object unknown].freeze
|
|
31
|
+
MIXED = "unknown"
|
|
32
|
+
|
|
33
|
+
# One auditable note behind a classification. Mirrors the shared rule/detail
|
|
34
|
+
# reason shape; a classification is categorical (not a delta-sum) so it carries
|
|
35
|
+
# no +delta+, like {Boundaries::Severity::Reason}. Kept local so the flags slice
|
|
36
|
+
# does not couple to the dead-code/duplication Reason structs.
|
|
37
|
+
Reason = Struct.new(:rule, :detail) do
|
|
38
|
+
def to_h
|
|
39
|
+
{rule: rule.to_s, detail: detail}
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# The classified result: the resolved value_type, the reference count, the
|
|
44
|
+
# distinct literal defaults, and the reasons behind them.
|
|
45
|
+
Assessment = Struct.new(:value_type, :reference_count, :default_values, :reasons)
|
|
46
|
+
|
|
47
|
+
module_function
|
|
48
|
+
|
|
49
|
+
# @param value_types [Array<String>] one observed value_type per call site
|
|
50
|
+
# @param default_values [Array<String, nil>] one observed literal default per
|
|
51
|
+
# call site (nil where the default was not a literal)
|
|
52
|
+
# @return [Assessment]
|
|
53
|
+
def classify(value_types:, default_values:)
|
|
54
|
+
observed = value_types.uniq.sort
|
|
55
|
+
value_type = (observed.size == 1) ? observed.first : MIXED
|
|
56
|
+
reference_count = value_types.size
|
|
57
|
+
defaults = default_values.compact.uniq.sort
|
|
58
|
+
|
|
59
|
+
reasons = [type_reason(value_type, observed, reference_count)]
|
|
60
|
+
reasons << Reason.new(rule: :reference_count, detail: "referenced at #{pluralize(reference_count, "call site")}")
|
|
61
|
+
reasons << Reason.new(rule: :default_values, detail: "observed default value(s): #{defaults.join(", ")}") unless defaults.empty?
|
|
62
|
+
|
|
63
|
+
Assessment.new(value_type: value_type, reference_count: reference_count, default_values: defaults, reasons: reasons)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def type_reason(value_type, observed, reference_count)
|
|
67
|
+
if value_type == MIXED
|
|
68
|
+
Reason.new(rule: :mixed_value_types, detail: "referenced with differing value types (#{observed.join(", ")}); the flag type is ambiguous")
|
|
69
|
+
else
|
|
70
|
+
Reason.new(rule: :"#{value_type}_flag", detail: "evaluated as a #{value_type} flag across #{pluralize(reference_count, "reference")}")
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def pluralize(count, noun)
|
|
75
|
+
"#{count} #{noun}#{"s" unless count == 1}"
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "time"
|
|
5
|
+
|
|
6
|
+
module Moult
|
|
7
|
+
module Flags
|
|
8
|
+
# Ingests a LOCAL OpenFeature provider flag-state export and normalises it into
|
|
9
|
+
# one Moult-owned value object ({FlagSet}) the {Staleness} model can read. This is
|
|
10
|
+
# the flags analogue of {Coverage} (which ingests a SimpleCov/stdlib dump) and of
|
|
11
|
+
# {Boundaries::Packwerk} (which ingests packwerk's committed artifacts): an
|
|
12
|
+
# external format comes in, only Moult types go out, so the provider is swappable
|
|
13
|
+
# and nothing downstream depends on its on-disk shape.
|
|
14
|
+
#
|
|
15
|
+
# One on-disk format is understood today (auto-detected, or forced via +format:+):
|
|
16
|
+
#
|
|
17
|
+
# * +:flagd+ — a flagd flag-definition JSON, the OpenFeature-native provider-
|
|
18
|
+
# agnostic representation of flag state:
|
|
19
|
+
# <tt>{"flags" => {key => {"state" => "ENABLED"|"DISABLED", "variants" => {...},
|
|
20
|
+
# "defaultVariant" => "...", "targeting" => {...}, "metadata" => {...}}},
|
|
21
|
+
# "metadata" => {...}}</tt>.
|
|
22
|
+
#
|
|
23
|
+
# flagd quirks normalised HERE and nowhere else (the swap point for a future
|
|
24
|
+
# provider/standard):
|
|
25
|
+
#
|
|
26
|
+
# * +state+ "ENABLED"/"DISABLED" maps to +enabled+ true/false.
|
|
27
|
+
# * A non-empty +targeting+ object means the flag serves more than the default
|
|
28
|
+
# variant; an empty/absent one means it is fully rolled out to one variant.
|
|
29
|
+
# * flagd has no native archival/lifecycle state, so it is read from the standard
|
|
30
|
+
# per-flag +metadata+ extension point: +metadata.archived == true+, or
|
|
31
|
+
# +metadata.lifecycle+ in {"archived", "deprecated"}.
|
|
32
|
+
# * Timestamps (+metadata.updatedAt+/+lastModified+ per flag; the flag-set
|
|
33
|
+
# +metadata+ export stamp) are captured as evidence only; the live, streaming
|
|
34
|
+
# provider connection that would make them authoritative is deferred, exactly
|
|
35
|
+
# like the live Coverband store.
|
|
36
|
+
#
|
|
37
|
+
# This adapter takes NO dependency on the +openfeature-sdk+ or any vendor SDK; it
|
|
38
|
+
# reads the export with stdlib JSON, mirroring how {Coverage} needs no simplecov
|
|
39
|
+
# and {Boundaries::Packwerk} no packwerk gem.
|
|
40
|
+
module Snapshot
|
|
41
|
+
module_function
|
|
42
|
+
|
|
43
|
+
# Provenance of a merged provider snapshot. Captured into the protected
|
|
44
|
+
# contract (analysis.provider) so a consumer can see where the staleness
|
|
45
|
+
# evidence came from. +exported_at+ also seeds the deferred time-decay slice.
|
|
46
|
+
Source = Struct.new(:backend, :version, :exported_at) do
|
|
47
|
+
def to_h
|
|
48
|
+
{backend: backend, version: version, exported_at: exported_at}
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# The provider's normalised state for one flag key. +enabled+/+archived+/
|
|
53
|
+
# +has_targeting+ are the facts {Staleness} judges; +default_variant+ and
|
|
54
|
+
# +updated_at+ are captured for context and the deferred time-decay seed.
|
|
55
|
+
FlagState = Struct.new(:key, :enabled, :archived, :has_targeting, :default_variant, :updated_at)
|
|
56
|
+
|
|
57
|
+
# A normalised snapshot: the provider's state per flag key, plus provenance.
|
|
58
|
+
FlagSet = Struct.new(:states, :source) do
|
|
59
|
+
# @return [Boolean] whether the provider knows this key
|
|
60
|
+
def key?(flag_key)
|
|
61
|
+
states.key?(flag_key)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# @return [FlagState, nil] the provider's state for the key, or nil if absent
|
|
65
|
+
def state_for(flag_key)
|
|
66
|
+
states[flag_key]
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
ARCHIVED_LIFECYCLES = %w[archived deprecated].freeze
|
|
71
|
+
|
|
72
|
+
# @param path [String] path to the provider snapshot file
|
|
73
|
+
# @param format [Symbol] :auto (default) or :flagd
|
|
74
|
+
# @return [FlagSet]
|
|
75
|
+
def load(path, format: :auto)
|
|
76
|
+
raw = JSON.parse(File.read(path))
|
|
77
|
+
fmt = (format == :auto) ? detect_format(raw) : format
|
|
78
|
+
case fmt
|
|
79
|
+
when :flagd then from_flagd(raw, path)
|
|
80
|
+
else raise Moult::Error, "unknown provider snapshot format: #{fmt}"
|
|
81
|
+
end
|
|
82
|
+
rescue JSON::ParserError => e
|
|
83
|
+
raise Moult::Error, "could not parse provider snapshot #{path}: #{e.message}"
|
|
84
|
+
rescue Errno::ENOENT
|
|
85
|
+
raise Moult::Error, "no such provider snapshot: #{path}"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# A flagd export is a JSON object with a top-level "flags" map whose every
|
|
89
|
+
# entry carries a "state" — the unambiguous discriminator.
|
|
90
|
+
def detect_format(raw)
|
|
91
|
+
raise Moult::Error, "provider snapshot is not a JSON object" unless raw.is_a?(Hash)
|
|
92
|
+
flags = raw["flags"]
|
|
93
|
+
if flags.is_a?(Hash) && flags.values.all? { |f| f.is_a?(Hash) && f.key?("state") }
|
|
94
|
+
:flagd
|
|
95
|
+
else
|
|
96
|
+
raise Moult::Error, "could not auto-detect provider snapshot format; pass --provider-format flagd"
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# @return [FlagSet]
|
|
101
|
+
def from_flagd(raw, path)
|
|
102
|
+
flags = raw["flags"].is_a?(Hash) ? raw["flags"] : {}
|
|
103
|
+
meta = raw["metadata"].is_a?(Hash) ? raw["metadata"] : {}
|
|
104
|
+
states = flags.each_with_object({}) do |(key, defn), acc|
|
|
105
|
+
acc[key] = flag_state(key, defn)
|
|
106
|
+
end
|
|
107
|
+
source = Source.new(
|
|
108
|
+
backend: "flagd",
|
|
109
|
+
version: stringify(meta["version"]),
|
|
110
|
+
exported_at: snapshot_timestamp(meta, path)
|
|
111
|
+
)
|
|
112
|
+
FlagSet.new(states: states, source: source)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def flag_state(key, defn)
|
|
116
|
+
defn = {} unless defn.is_a?(Hash)
|
|
117
|
+
meta = defn["metadata"].is_a?(Hash) ? defn["metadata"] : {}
|
|
118
|
+
FlagState.new(
|
|
119
|
+
key: key,
|
|
120
|
+
enabled: enabled?(defn["state"]),
|
|
121
|
+
archived: archived?(meta),
|
|
122
|
+
has_targeting: targeting?(defn["targeting"]),
|
|
123
|
+
default_variant: defn["defaultVariant"],
|
|
124
|
+
updated_at: stringify(meta["updatedAt"] || meta["lastModified"])
|
|
125
|
+
)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# ENABLED/DISABLED -> true/false; any other (or missing) state -> nil, which
|
|
129
|
+
# {Staleness} treats as neither rolled-out nor disabled (it falls through to
|
|
130
|
+
# active rather than inventing a verdict).
|
|
131
|
+
def enabled?(state)
|
|
132
|
+
case state.to_s.upcase
|
|
133
|
+
when "ENABLED" then true
|
|
134
|
+
when "DISABLED" then false
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def archived?(meta)
|
|
139
|
+
return true if meta["archived"] == true
|
|
140
|
+
ARCHIVED_LIFECYCLES.include?(meta["lifecycle"].to_s.downcase)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def targeting?(targeting)
|
|
144
|
+
targeting.is_a?(Hash) && !targeting.empty?
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Best-effort export stamp: an explicit flag-set metadata timestamp if present,
|
|
148
|
+
# else the file mtime (noted only as a fallback; it seeds deferred time-decay).
|
|
149
|
+
def snapshot_timestamp(meta, path)
|
|
150
|
+
stamp = meta["exportedAt"] || meta["exported_at"] || meta["updatedAt"]
|
|
151
|
+
return stringify(stamp) if stamp
|
|
152
|
+
File.mtime(path).utc.iso8601
|
|
153
|
+
rescue
|
|
154
|
+
nil
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def stringify(value)
|
|
158
|
+
value&.to_s
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|