ecoportal-api-graphql 1.3.10 → 1.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ai-assistance/code/diff_pairing_engine.md +243 -0
- data/.ai-assistance/code/graphql_domain_knowledge.md +20 -10
- data/.ai-assistance/code/template_diff_pairing_domain.md +175 -0
- data/.ai-assistance/code/workflow-command-guide.md +28 -0
- data/.ai-assistance/projects/ooze-graphql-native-migration/INVENTORY.md +136 -0
- data/.ai-assistance/projects/ooze-graphql-native-migration/TODO.md +6 -1
- data/.ai-assistance/projects/qa-services-delivery/DECISIONS.md +93 -0
- data/.ai-assistance/projects/qa-services-delivery/INTENT.md +76 -0
- data/.ai-assistance/projects/qa-services-delivery/PHASE3-SCOPE.md +115 -0
- data/.ai-assistance/projects/qa-services-delivery/ROADMAP.md +99 -0
- data/.ai-assistance/projects/qa-services-delivery/TODO.md +81 -0
- data/.ai-assistance/projects/template-automatic-build-maintenance/INTENT.md +77 -0
- data/.ai-assistance/projects/template-automatic-build-maintenance/TODO.md +97 -0
- data/.ai-assistance/projects/template-diff-deploy/INTENT.md +12 -0
- data/.ai-assistance/projects/template-diff-deploy/TODO.md +9 -0
- data/.ai-assistance/projects/template-maintenance/PHASE0-FINDINGS.md +93 -0
- data/.ai-assistance/projects/template-maintenance/README.md +14 -0
- data/CHANGELOG.md +87 -0
- data/docs/worklog.md +279 -0
- data/ecoportal-api-graphql.gemspec +1 -1
- data/lib/ecoportal/api/graphql/base/page/data_field.rb +1 -1
- data/lib/ecoportal/api/graphql/builder/template_builder.rb +174 -0
- data/lib/ecoportal/api/graphql/builder.rb +17 -16
- data/lib/ecoportal/api/graphql/diff/change.rb +59 -0
- data/lib/ecoportal/api/graphql/diff/command_synthesizer.rb +329 -0
- data/lib/ecoportal/api/graphql/diff/cross_object_diff.rb +165 -0
- data/lib/ecoportal/api/graphql/diff/deploy.rb +121 -0
- data/lib/ecoportal/api/graphql/diff/id_resolver.rb +64 -0
- data/lib/ecoportal/api/graphql/diff/pairing/candidate.rb +32 -0
- data/lib/ecoportal/api/graphql/diff/pairing/engine.rb +173 -0
- data/lib/ecoportal/api/graphql/diff/pairing/ledger.rb +119 -0
- data/lib/ecoportal/api/graphql/diff/pairing/signals.rb +104 -0
- data/lib/ecoportal/api/graphql/diff/strategy.rb +113 -0
- data/lib/ecoportal/api/graphql/diff/version_diff.rb +332 -0
- data/lib/ecoportal/api/graphql/diff.rb +34 -0
- data/lib/ecoportal/api/graphql/fragment/pages/common_page_union.rb +1 -0
- data/lib/ecoportal/api/graphql/input/workflow_command/add_field.rb +27 -18
- data/lib/ecoportal/api/graphql/mutation/action/archive.rb +1 -1
- data/lib/ecoportal/api/graphql/mutation/action/create.rb +1 -1
- data/lib/ecoportal/api/graphql/mutation/action/update.rb +1 -1
- data/lib/ecoportal/api/graphql/mutation/contractor_entity/create.rb +1 -1
- data/lib/ecoportal/api/graphql/mutation/contractor_entity/destroy.rb +1 -1
- data/lib/ecoportal/api/graphql/mutation/contractor_entity/update.rb +1 -1
- data/lib/ecoportal/api/graphql/mutation/kickstand/fail_workflow.rb +1 -1
- data/lib/ecoportal/api/graphql/mutation/kickstand/start_workflow.rb +1 -1
- data/lib/ecoportal/api/graphql/mutation/kickstand/stop_workflow.rb +1 -1
- data/lib/ecoportal/api/graphql.rb +1 -0
- data/lib/ecoportal/api/graphql_version.rb +1 -1
- data/tests/dump_template_model.rb +90 -0
- data/tests/validate_queries.rb +31 -9
- metadata +31 -3
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
module Ecoportal
|
|
2
|
+
module API
|
|
3
|
+
class GraphQL
|
|
4
|
+
module Diff
|
|
5
|
+
# Cross-object diff: two templates/pages that DO NOT share Mongo ids (UAT<->PROD,
|
|
6
|
+
# page<->template). Because nothing has the same id as its counterpart, we cannot match by id
|
|
7
|
+
# (that is `VersionDiff`). Instead we PAIR the fields as an equivalence problem
|
|
8
|
+
# (`Pairing::Engine` — genome + type + label + options, human-assisted, ledger-backed), build
|
|
9
|
+
# an id-correspondence map from the accepted pairs, then emit the SAME `Change` output against
|
|
10
|
+
# that map so the existing `CommandSynthesizer` / `Deploy` layer can consume it unchanged.
|
|
11
|
+
#
|
|
12
|
+
# This deliberately operates at the FIELD level — the load-bearing simplifying principle
|
|
13
|
+
# (domain ref §6): sections/stages are scaffolding, customer data lives in the data-fields, so
|
|
14
|
+
# a cross-object diff pairs FIELDS precisely and treats structure as context. Structure-level
|
|
15
|
+
# (stage/section) reconciliation across id-spaces is not attempted here (no reliable pairing
|
|
16
|
+
# signal for scaffolding) — it is left to the human review the engine already routes to.
|
|
17
|
+
#
|
|
18
|
+
# engine = Pairing::Engine.new(ledger: ledger) # optional
|
|
19
|
+
# diff = CrossObjectDiff.new(uat_doc, prod_doc, engine: engine,
|
|
20
|
+
# strategy: Strategy.new(pairing: :assisted, scope: :data_migration))
|
|
21
|
+
# diff.changes # => [Change, ...] (field label/type changes on paired fields; +added/-removed)
|
|
22
|
+
# diff.pairing # => Pairing::Engine::Result (accepted / ambiguous / unmatched — for review)
|
|
23
|
+
# diff.unresolved # => [field-doc, ...] sources the human must adjudicate (ambiguous+unmatched)
|
|
24
|
+
#
|
|
25
|
+
# SAFETY — only ACCEPTED pairs are treated as equivalences; ambiguous/unmatched sources are
|
|
26
|
+
# surfaced in `unresolved` and NEVER auto-paired or auto-removed. A same-label field whose
|
|
27
|
+
# genome contradicts is escalated, not silently matched (the engine guarantees this).
|
|
28
|
+
class CrossObjectDiff
|
|
29
|
+
attr_reader :strategy
|
|
30
|
+
|
|
31
|
+
# @param source_doc [Hash] the "before" template/page (e.g. the desired/UAT state).
|
|
32
|
+
# @param target_doc [Hash] the "after"/destination template/page (e.g. PROD).
|
|
33
|
+
# @param engine [Pairing::Engine,nil] the equivalence matcher; a default one is built when
|
|
34
|
+
# nil. Provide one wired to a `Ledger` to reuse confirmed pairings and learn over time.
|
|
35
|
+
# @param strategy [Strategy] scope + move-sensitivity. Pairing is inherently cross-object
|
|
36
|
+
# here; `strategy.pairing` is informational (which signal set the caller intends).
|
|
37
|
+
def initialize(source_doc, target_doc, engine: nil, strategy: Strategy.new(pairing: :assisted))
|
|
38
|
+
@source = source_doc || {}
|
|
39
|
+
@target = target_doc || {}
|
|
40
|
+
@engine = engine || Pairing::Engine.new
|
|
41
|
+
@strategy = strategy || Strategy.new(pairing: :assisted)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# The `Pairing::Engine::Result` for the field sets (accepted / ambiguous / unmatched).
|
|
45
|
+
def pairing
|
|
46
|
+
@pairing ||= @engine.pair(source_fields, target_fields)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Source field docs the engine could not confidently pair — the human review set. These are
|
|
50
|
+
# NOT emitted as removals (we do not know they were deleted vs merely unpaired).
|
|
51
|
+
def unresolved
|
|
52
|
+
pairing.ambiguous.map(&:source) + pairing.unmatched
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Strategy-filtered change-set, emitted against the pairing map (target ids translated into
|
|
56
|
+
# the source id-space so the changes replay coherently through the synthesizer).
|
|
57
|
+
def changes
|
|
58
|
+
@changes ||= @strategy.filter(all_changes)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def changelog
|
|
62
|
+
changes.map(&:description)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def summary
|
|
66
|
+
by_op = changes.group_by(&:op)
|
|
67
|
+
{
|
|
68
|
+
added: by_op.fetch(:added, []).size,
|
|
69
|
+
removed: by_op.fetch(:removed, []).size,
|
|
70
|
+
changed: by_op.fetch(:changed, []).size,
|
|
71
|
+
moved: by_op.fetch(:moved, []).size,
|
|
72
|
+
total: changes.size,
|
|
73
|
+
paired: pairing.accepted.size,
|
|
74
|
+
unresolved: unresolved.size
|
|
75
|
+
}
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def to_h
|
|
79
|
+
{ summary: summary, changes: changes.map(&:to_h), pairing: pairing.to_h }
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
# Emit field-level changes off the pairing:
|
|
85
|
+
# * accepted pair (src<->tgt): compare label/type/config; the change id is the SOURCE
|
|
86
|
+
# field id (the id-space the synthesizer/target-resolver expects to translate);
|
|
87
|
+
# * source with no accepted target: :removed (only from the confidently-paired frame —
|
|
88
|
+
# unresolved sources are held back for review, never auto-removed);
|
|
89
|
+
# * target with no accepted source: :added.
|
|
90
|
+
def all_changes
|
|
91
|
+
accepted = pairing.accepted
|
|
92
|
+
paired_src = accepted.map(&:source_id)
|
|
93
|
+
paired_tgt = accepted.map(&:target_id)
|
|
94
|
+
|
|
95
|
+
compared = accepted.flat_map { |c| compare_pair(c) }
|
|
96
|
+
removed = removed_changes(paired_src)
|
|
97
|
+
added = added_changes(paired_tgt)
|
|
98
|
+
|
|
99
|
+
compared + removed + added
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def compare_pair(candidate)
|
|
103
|
+
src = candidate.source
|
|
104
|
+
tgt = candidate.target
|
|
105
|
+
id = candidate.source_id
|
|
106
|
+
cmp = []
|
|
107
|
+
cmp << field_change(id, src, tgt, 'label', src['label'], tgt['label']) if src['label'] != tgt['label']
|
|
108
|
+
cmp << field_change(id, src, tgt, 'type', field_type(src), field_type(tgt)) if field_type(src) != field_type(tgt)
|
|
109
|
+
cmp
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Source fields with no accepted counterpart AND that were confidently classified (i.e. not
|
|
113
|
+
# sitting in the unresolved review set) → removed. A source in `unresolved` is never emitted
|
|
114
|
+
# as a removal.
|
|
115
|
+
def removed_changes(paired_src)
|
|
116
|
+
held = unresolved.map { |f| f['id'] }
|
|
117
|
+
source_fields.reject { |f| paired_src.include?(f['id']) || held.include?(f['id']) }.map do |f|
|
|
118
|
+
Change.new(op: :removed, kind: :field, id: f['id'], label: f['label'])
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Target fields with no accepted counterpart → added, EXCEPT those tangled in an ambiguous
|
|
123
|
+
# candidate (a target the engine proposed but did not confirm): those are held for review,
|
|
124
|
+
# never auto-added (they may be the same field as an unresolved source, relabelled).
|
|
125
|
+
def added_changes(paired_tgt)
|
|
126
|
+
held = pairing.ambiguous.map(&:target_id)
|
|
127
|
+
target_fields.reject { |f| paired_tgt.include?(f['id']) || held.include?(f['id']) }.map do |f|
|
|
128
|
+
Change.new(op: :added, kind: :field, id: f['id'], label: f['label'])
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def field_change(id, _src, tgt, attribute, before, after) # rubocop:disable Metrics/ParameterLists
|
|
133
|
+
Change.new(op: :changed, kind: :field, id: id, label: tgt['label'],
|
|
134
|
+
attribute: attribute, before: before, after: after)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def field_type(field)
|
|
138
|
+
field['__typename'] || field['type']
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def source_fields
|
|
142
|
+
@source_fields ||= fields_of(@source)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def target_fields
|
|
146
|
+
@target_fields ||= fields_of(@target)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Flatten every data-field doc out of a page/template doc (stages -> sections -> fields).
|
|
150
|
+
def fields_of(doc)
|
|
151
|
+
out = []
|
|
152
|
+
Array(doc['stages']).each do |st|
|
|
153
|
+
Array(st['sections']).each do |sec|
|
|
154
|
+
out.concat(Array(sec['dataFields']))
|
|
155
|
+
out.concat(Array(sec['leftDataFields']))
|
|
156
|
+
out.concat(Array(sec['rightDataFields']))
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
out.select { |f| f.is_a?(Hash) && f['id'] }
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
module Ecoportal
|
|
2
|
+
module API
|
|
3
|
+
class GraphQL
|
|
4
|
+
module Diff
|
|
5
|
+
# Deploy orchestration: turns a diff into the replayable, ordered WorkflowCommand batch that
|
|
6
|
+
# `executeWorkflowCommands` consumes — plus the honest list of changes that could NOT be
|
|
7
|
+
# synthesised (needs-human), so a review step can gate the apply.
|
|
8
|
+
#
|
|
9
|
+
# Two entry points:
|
|
10
|
+
#
|
|
11
|
+
# # self-version replay (same object, real ids) — the "commit" replayed onto itself/a clone
|
|
12
|
+
# plan = Deploy.from_versions(before_doc, after_doc)
|
|
13
|
+
#
|
|
14
|
+
# # cross-object deploy (UAT delta -> a real PROD target): the diff gives the delta, but the
|
|
15
|
+
# # move targets (section/stage) must resolve against the PROD doc's ids.
|
|
16
|
+
# plan = Deploy.from_versions(uat_v1, uat_v2, target_doc: prod_doc)
|
|
17
|
+
#
|
|
18
|
+
# plan.commands # => ordered Array of built command hashes, ready to send
|
|
19
|
+
# plan.unsupported # => [Change, ...] needing human handling (never guessed)
|
|
20
|
+
# plan.changelog # => human one-liners for a ticket / review checklist
|
|
21
|
+
# plan.execute!(page) # => sends via the given executor (page.execute_workflow_commands)
|
|
22
|
+
#
|
|
23
|
+
# SAFETY — `Deploy` is inert until `execute!` is called with an explicit executor. It never
|
|
24
|
+
# applies anything on its own, and it surfaces `unsupported` so a human gates the deploy.
|
|
25
|
+
class Deploy
|
|
26
|
+
attr_reader :diff, :synthesizer
|
|
27
|
+
|
|
28
|
+
# @param diff [VersionDiff]
|
|
29
|
+
# @param resolver [#resolve, nil] maps move targets (stage name / section heading) to ids.
|
|
30
|
+
# @param thread_placeholders [Boolean] thread client-chosen placeholderIds so that an
|
|
31
|
+
# add-then-reference sequence (e.g. add a Select field, then add its options) stays
|
|
32
|
+
# self-consistent within one executeWorkflowCommands call. ON by default for deploy —
|
|
33
|
+
# the whole batch is applied to a target where the source ids are not valid.
|
|
34
|
+
def initialize(diff, resolver: nil, thread_placeholders: true)
|
|
35
|
+
@diff = diff
|
|
36
|
+
@synthesizer = CommandSynthesizer.new(
|
|
37
|
+
diff.changes, resolver: resolver, thread_placeholders: thread_placeholders
|
|
38
|
+
)
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Build a plan from two snapshots. When `target_doc` is given (cross-object deploy), a
|
|
42
|
+
# resolver is derived from it so field/section moves can address the target's real ids.
|
|
43
|
+
# An explicit `resolver:` overrides the derived one.
|
|
44
|
+
#
|
|
45
|
+
# `strategy:` selects the diff modality (scope / move-sensitivity / intent). It defaults to
|
|
46
|
+
# the self-version structural changelog strategy — the historical behaviour. When the
|
|
47
|
+
# strategy's `intent` is `:deploy` (or unspecified) placeholder threading stays ON.
|
|
48
|
+
def self.from_versions(before_doc, after_doc, target_doc: nil, resolver: nil,
|
|
49
|
+
thread_placeholders: true, strategy: Strategy.default)
|
|
50
|
+
resolver ||= (IdResolver.from_doc(target_doc) if target_doc)
|
|
51
|
+
diff = VersionDiff.new(before_doc, after_doc, strategy: strategy)
|
|
52
|
+
new(diff, resolver: resolver, thread_placeholders: thread_placeholders)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Build a plan for a CROSS-OBJECT deploy (UAT<->PROD, page<->template): the two docs do not
|
|
56
|
+
# share ids, so fields are PAIRED (equivalence) before the delta is emitted. The pairing
|
|
57
|
+
# (accepted/ambiguous/unmatched) is exposed via `#pairing` so a human can adjudicate the
|
|
58
|
+
# unresolved set before applying. `engine:` lets the caller pass a `Pairing::Engine` wired
|
|
59
|
+
# to a `Ledger` (learns over time); `strategy:` defaults to the assisted/data-migration mode.
|
|
60
|
+
#
|
|
61
|
+
# plan = Deploy.from_cross_object(uat_doc, prod_doc, engine: engine)
|
|
62
|
+
# plan.pairing.ambiguous # review before deploy
|
|
63
|
+
# plan.commands # the paired delta as WorkflowCommands
|
|
64
|
+
def self.from_cross_object(source_doc, target_doc, engine: nil, resolver: nil,
|
|
65
|
+
thread_placeholders: true,
|
|
66
|
+
strategy: Strategy.new(pairing: :assisted, scope: :data_migration, intent: :deploy))
|
|
67
|
+
resolver ||= (IdResolver.from_doc(target_doc) if target_doc)
|
|
68
|
+
diff = CrossObjectDiff.new(source_doc, target_doc, engine: engine, strategy: strategy)
|
|
69
|
+
new(diff, resolver: resolver, thread_placeholders: thread_placeholders)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# The pairing result when this plan is cross-object (nil for a self-version plan). Lets a
|
|
73
|
+
# caller inspect/adjudicate ambiguous + unmatched pairings before `execute!`.
|
|
74
|
+
def pairing
|
|
75
|
+
@diff.respond_to?(:pairing) ? @diff.pairing : nil
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# Ordered, dependency-safe Array of built command hashes ready for executeWorkflowCommands.
|
|
79
|
+
def commands
|
|
80
|
+
@synthesizer.commands
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Changes with no faithful command — surfaced for human review, never guessed.
|
|
84
|
+
def unsupported
|
|
85
|
+
@synthesizer.unsupported
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def changelog
|
|
89
|
+
@diff.changelog
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def summary
|
|
93
|
+
{
|
|
94
|
+
changes: @diff.summary,
|
|
95
|
+
commands: commands.size,
|
|
96
|
+
unsupported: unsupported.size
|
|
97
|
+
}
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# True when every detected change was synthesised into a command (nothing needs a human).
|
|
101
|
+
def fully_supported?
|
|
102
|
+
unsupported.empty?
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def to_h
|
|
106
|
+
{ summary: summary, commands: commands, unsupported: unsupported.map(&:to_h) }
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Apply the batch. `executor` must respond to `execute_workflow_commands(commands)` (the
|
|
110
|
+
# gem's page/template mutation facade). Raises unless the plan is fully supported — the
|
|
111
|
+
# caller must review/clear `unsupported` first — unless `allow_partial: true`.
|
|
112
|
+
def execute!(executor, allow_partial: false)
|
|
113
|
+
raise ArgumentError, "#{unsupported.size} unsupported change(s); review before deploy" unless fully_supported? || allow_partial
|
|
114
|
+
|
|
115
|
+
executor.execute_workflow_commands(commands)
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
module Ecoportal
|
|
2
|
+
module API
|
|
3
|
+
class GraphQL
|
|
4
|
+
module Diff
|
|
5
|
+
# Resolves the human keys a `VersionDiff` records (stage NAME, section HEADING) into the
|
|
6
|
+
# target-doc ids that move commands need (moveField -> section id, add/removeStageSection ->
|
|
7
|
+
# stage id). A structural diff only knows those human keys; this fills the gap WITHOUT
|
|
8
|
+
# guessing — it looks them up in a real target doc (the deploy destination), so
|
|
9
|
+
# `CommandSynthesizer` can emit faithful move commands.
|
|
10
|
+
#
|
|
11
|
+
# Answers `resolve(kind, key) => id | nil`. A nil (ambiguous / not found) keeps the move
|
|
12
|
+
# UNSUPPORTED rather than picking a wrong id.
|
|
13
|
+
#
|
|
14
|
+
# resolver = IdResolver.from_doc(prod_template_doc)
|
|
15
|
+
# CommandSynthesizer.new(changes, resolver: resolver)
|
|
16
|
+
#
|
|
17
|
+
# AMBIGUITY — if two stages share a name (or two sections a heading), the key is ambiguous
|
|
18
|
+
# and resolves to nil (the caller must disambiguate). Uniqueness is the safe default.
|
|
19
|
+
class IdResolver
|
|
20
|
+
# Build from an explicit map: { stage: { 'Report' => 'stg1' }, section: { 'Location' => 'sec1' } }
|
|
21
|
+
def initialize(index = {})
|
|
22
|
+
@index = index
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Build a resolver by indexing a page/template doc's stages (by name) and sections
|
|
26
|
+
# (by heading). Duplicate keys are dropped (resolve -> nil) so we never pick arbitrarily.
|
|
27
|
+
def self.from_doc(doc)
|
|
28
|
+
doc ||= {}
|
|
29
|
+
stages = {}
|
|
30
|
+
sections = {}
|
|
31
|
+
Array(doc['stages']).each do |st|
|
|
32
|
+
mark(stages, st['name'], st['id'])
|
|
33
|
+
Array(st['sections']).each { |sec| mark(sections, sec['heading'], sec['id']) }
|
|
34
|
+
end
|
|
35
|
+
new(stage: strip_ambiguous(stages), section: strip_ambiguous(sections))
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# @return [String,nil] the target id for (kind, key), or nil if unknown/ambiguous.
|
|
39
|
+
def resolve(kind, key)
|
|
40
|
+
@index.dig(kind.to_sym, key)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
class << self
|
|
44
|
+
private
|
|
45
|
+
|
|
46
|
+
def mark(bucket, key, id)
|
|
47
|
+
return if key.nil? || id.nil?
|
|
48
|
+
|
|
49
|
+
(bucket[key] ||= []) << id
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Keep only keys that map to exactly one id; ambiguous keys are omitted (resolve -> nil).
|
|
53
|
+
def strip_ambiguous(bucket)
|
|
54
|
+
bucket.each_with_object({}) do |(key, ids), out|
|
|
55
|
+
uniq = ids.uniq
|
|
56
|
+
out[key] = uniq.first if uniq.size == 1
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
module Ecoportal
|
|
2
|
+
module API
|
|
3
|
+
class GraphQL
|
|
4
|
+
module Diff
|
|
5
|
+
module Pairing
|
|
6
|
+
# A scored pairing proposal: source object (id-space A) <-> target object (id-space B),
|
|
7
|
+
# with the aggregate confidence and the per-signal breakdown that produced it.
|
|
8
|
+
#
|
|
9
|
+
# `source`/`target` are the raw field docs (Hashes) being paired. `score` is 0.0..1.0.
|
|
10
|
+
# `signals` maps signal-name => contribution (for transparency + the ledger + Product's
|
|
11
|
+
# Field-ID data). `matched_by` names the dominant signal (e.g. :genome, :label, :ledger).
|
|
12
|
+
Candidate = Struct.new(:source, :target, :score, :signals, :matched_by, keyword_init: true) do
|
|
13
|
+
def source_id
|
|
14
|
+
source && source['id']
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def target_id
|
|
18
|
+
target && target['id']
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def to_h
|
|
22
|
+
{
|
|
23
|
+
source_id: source_id, target_id: target_id, score: score,
|
|
24
|
+
matched_by: matched_by, signals: signals
|
|
25
|
+
}.compact
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
module Ecoportal
|
|
2
|
+
module API
|
|
3
|
+
class GraphQL
|
|
4
|
+
module Diff
|
|
5
|
+
module Pairing
|
|
6
|
+
# The equivalence-matching engine for CROSS-OBJECT pairing (UAT<->PROD, page<->template).
|
|
7
|
+
#
|
|
8
|
+
# Given two lists of field docs living in different id-spaces, it proposes pairings by:
|
|
9
|
+
# 1. consulting the LEDGER first — a previously-confirmed pair auto-resolves (method
|
|
10
|
+
# :ledger, confidence 1.0), so pairing improves over time and only novelty is scored;
|
|
11
|
+
# 2. otherwise scoring every remaining source×target candidate with multi-signal
|
|
12
|
+
# confidence (genome + type + label + options — see Signals), picking the best target
|
|
13
|
+
# per source in a stable, greedy, one-to-one assignment (highest scores first).
|
|
14
|
+
#
|
|
15
|
+
# It then CLASSIFIES each best candidate:
|
|
16
|
+
# - `accepted` — score >= accept_threshold: high confidence, auto-paired;
|
|
17
|
+
# - `ambiguous` — accept > score >= review_threshold, OR the top two candidates are within
|
|
18
|
+
# `tie_margin` (genuinely close): route to a HUMAN to adjudicate;
|
|
19
|
+
# - `unmatched` — no target scored >= review_threshold: escalate as a novelty.
|
|
20
|
+
#
|
|
21
|
+
# NEVER guesses: only `accepted` pairs are safe to auto-apply / auto-record; ambiguous and
|
|
22
|
+
# unmatched are surfaced for human resolution. Confirmed decisions are written back to the
|
|
23
|
+
# ledger by the caller (or via #confirm!) so the next run needs no human on them.
|
|
24
|
+
class Engine
|
|
25
|
+
DEFAULTS = { accept_threshold: 0.85, review_threshold: 0.5, tie_margin: 0.1 }.freeze
|
|
26
|
+
|
|
27
|
+
Result = Struct.new(:accepted, :ambiguous, :unmatched, keyword_init: true) do
|
|
28
|
+
# source field docs with no acceptable target — the pure novelty set.
|
|
29
|
+
def resolved?
|
|
30
|
+
ambiguous.empty? && unmatched.empty?
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def to_h
|
|
34
|
+
{
|
|
35
|
+
accepted: accepted.map(&:to_h),
|
|
36
|
+
ambiguous: ambiguous.map(&:to_h),
|
|
37
|
+
unmatched: unmatched.map { |f| { source_id: f['id'], label: f['label'] } }
|
|
38
|
+
}
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# @param ledger [Ledger,nil] consulted first + written to on #confirm!.
|
|
43
|
+
# @param kind [Symbol] entity kind recorded in the ledger (default :field).
|
|
44
|
+
def initialize(ledger: nil, kind: :field, **thresholds)
|
|
45
|
+
@ledger = ledger
|
|
46
|
+
@kind = kind
|
|
47
|
+
@cfg = DEFAULTS.merge(thresholds)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Pair `sources` (id-space A) to `targets` (id-space B). Returns a Result.
|
|
51
|
+
def pair(sources, targets)
|
|
52
|
+
sources = Array(sources)
|
|
53
|
+
targets = Array(targets)
|
|
54
|
+
|
|
55
|
+
accepted, remaining_sources, remaining_targets = apply_ledger(sources, targets)
|
|
56
|
+
scored = score_all(remaining_sources, remaining_targets)
|
|
57
|
+
assign(scored, remaining_sources, accepted)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Persist a confirmed pairing to the ledger (auto-accept or human decision). No-op
|
|
61
|
+
# without a ledger. `matched_by` overrides the candidate's dominant signal (e.g. :human
|
|
62
|
+
# when a person adjudicated). Returns the recorded Entry (or nil).
|
|
63
|
+
def confirm!(candidate, matched_by: nil)
|
|
64
|
+
return nil unless @ledger
|
|
65
|
+
|
|
66
|
+
@ledger.record(
|
|
67
|
+
kind: @kind, source_id: candidate.source_id, target_id: candidate.target_id,
|
|
68
|
+
matched_by: matched_by || candidate.matched_by, confidence: candidate.score,
|
|
69
|
+
signals: candidate.signals
|
|
70
|
+
)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
private
|
|
74
|
+
|
|
75
|
+
# Pull out sources already confirmed in the ledger (auto-resolved), consuming the matched
|
|
76
|
+
# targets so they cannot be re-assigned. Returns [accepted, sources_left, targets_left].
|
|
77
|
+
def apply_ledger(sources, targets)
|
|
78
|
+
return [[], sources, targets] unless @ledger
|
|
79
|
+
|
|
80
|
+
by_id = targets.each_with_object({}) { |t, h| h[t['id']] = t if t['id'] }
|
|
81
|
+
accepted = []
|
|
82
|
+
used = []
|
|
83
|
+
|
|
84
|
+
left = sources.reject do |src|
|
|
85
|
+
tid = @ledger.target_for(@kind, src['id'])
|
|
86
|
+
tgt = tid && by_id[tid]
|
|
87
|
+
next false unless tgt
|
|
88
|
+
|
|
89
|
+
accepted << ledger_candidate(src, tgt)
|
|
90
|
+
used << tid
|
|
91
|
+
true
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
[accepted, left, targets.reject { |t| used.include?(t['id']) }]
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def ledger_candidate(source, target)
|
|
98
|
+
Candidate.new(source: source, target: target, score: 1.0, matched_by: :ledger,
|
|
99
|
+
signals: { ledger: 1.0 })
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Score every source×target pair; keep only those at/above the review threshold, best
|
|
103
|
+
# first, so the greedy assignment takes the strongest links first.
|
|
104
|
+
def score_all(sources, targets)
|
|
105
|
+
sources.flat_map do |src|
|
|
106
|
+
targets.map { |tgt| score_pair(src, tgt) }
|
|
107
|
+
end.select { |c| c.score >= @cfg[:review_threshold] }.sort_by { |c| -c.score }
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def score_pair(source, target)
|
|
111
|
+
parts = {
|
|
112
|
+
genome: Signals.genome(source, target),
|
|
113
|
+
type: Signals.type(source, target),
|
|
114
|
+
label: Signals.label(source, target),
|
|
115
|
+
options: Signals.options(source, target)
|
|
116
|
+
}
|
|
117
|
+
applied = parts.compact
|
|
118
|
+
score = weighted_average(applied)
|
|
119
|
+
Candidate.new(source: source, target: target, score: score,
|
|
120
|
+
signals: applied, matched_by: dominant(applied))
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Weighted mean over the signals that APPLIED (nil signals are excluded and their weight
|
|
124
|
+
# dropped), so a select field is not penalised for genome being absent, etc.
|
|
125
|
+
def weighted_average(applied)
|
|
126
|
+
return 0.0 if applied.empty?
|
|
127
|
+
|
|
128
|
+
num = applied.sum { |name, val| Signals::WEIGHTS.fetch(name, 0) * val }
|
|
129
|
+
den = applied.sum { |name, _| Signals::WEIGHTS.fetch(name, 0) }
|
|
130
|
+
den.zero? ? 0.0 : (num / den).round(4)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def dominant(applied)
|
|
134
|
+
return nil if applied.empty?
|
|
135
|
+
|
|
136
|
+
applied.max_by { |name, val| [val, Signals::WEIGHTS.fetch(name, 0)] }.first
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Greedy one-to-one assignment: walk best-scored candidates first, take a pair when both
|
|
140
|
+
# ends are still free. Then classify the taken pairs and collect unmatched sources.
|
|
141
|
+
def assign(scored, sources, accepted)
|
|
142
|
+
ambiguous = []
|
|
143
|
+
taken_src = accepted.map(&:source_id)
|
|
144
|
+
taken_tgt = accepted.map(&:target_id)
|
|
145
|
+
best_by_src = scored.group_by(&:source_id)
|
|
146
|
+
|
|
147
|
+
scored.each do |cand|
|
|
148
|
+
next if taken_src.include?(cand.source_id) || taken_tgt.include?(cand.target_id)
|
|
149
|
+
|
|
150
|
+
bucket = accept?(cand, best_by_src[cand.source_id]) ? accepted : ambiguous
|
|
151
|
+
bucket << cand
|
|
152
|
+
taken_src << cand.source_id
|
|
153
|
+
taken_tgt << cand.target_id
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
unmatched = sources.reject { |s| taken_src.include?(s['id']) }
|
|
157
|
+
Result.new(accepted: accepted, ambiguous: ambiguous, unmatched: unmatched)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Accept only when confidently above threshold AND not in a near-tie with the runner-up
|
|
161
|
+
# for the same source (a close second means the human should decide).
|
|
162
|
+
def accept?(cand, siblings)
|
|
163
|
+
return false if cand.score < @cfg[:accept_threshold]
|
|
164
|
+
|
|
165
|
+
runner_up = Array(siblings).reject { |c| c.target_id == cand.target_id }.map(&:score).max
|
|
166
|
+
runner_up.nil? || (cand.score - runner_up) >= @cfg[:tie_margin]
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
require 'json'
|
|
2
|
+
require 'time'
|
|
3
|
+
|
|
4
|
+
module Ecoportal
|
|
5
|
+
module API
|
|
6
|
+
class GraphQL
|
|
7
|
+
module Diff
|
|
8
|
+
module Pairing
|
|
9
|
+
# The LEARNING LEDGER — a first-class artifact that records CONFIRMED equivalences between
|
|
10
|
+
# objects that live in different id-spaces (UAT<->PROD, page<->template). Because MongoDB
|
|
11
|
+
# gives every object a distinct id and nothing is shared across counterparts, pairing is an
|
|
12
|
+
# equivalence / entity-resolution problem, not a lookup. Every decision the engine (or a
|
|
13
|
+
# human) makes is persisted here so pairing IMPROVES over time: on the next run the ledger
|
|
14
|
+
# is consulted FIRST and previously-resolved pairs auto-resolve, leaving only genuine
|
|
15
|
+
# novelty for the human to adjudicate.
|
|
16
|
+
#
|
|
17
|
+
# An entry is keyed by (kind, source_id) and records the paired target_id plus HOW it was
|
|
18
|
+
# resolved (the method, the confidence, the signals, a timestamp). This log of *how* is the
|
|
19
|
+
# bridge data Product's Field-ID / template-entity-id effort needs.
|
|
20
|
+
#
|
|
21
|
+
# ledger = Ledger.load('pairings.json') # or Ledger.new (in-memory)
|
|
22
|
+
# ledger.record(kind: :field, source_id: 'a', target_id: 'b',
|
|
23
|
+
# method: :genome, confidence: 0.98, signals: {...})
|
|
24
|
+
# ledger.lookup(:field, 'a') # => Entry or nil
|
|
25
|
+
# ledger.save # persist back to the same path
|
|
26
|
+
#
|
|
27
|
+
# SAFETY — the ledger stores only decisions that were CONFIRMED (auto-accepted at high
|
|
28
|
+
# confidence, or human-adjudicated). Ambiguous/low-confidence candidates are never written;
|
|
29
|
+
# they are escalated. The ledger is thus a growing store of ground truth, never guesses.
|
|
30
|
+
class Ledger
|
|
31
|
+
Entry = Struct.new(:kind, :source_id, :target_id, :matched_by, :confidence, :signals,
|
|
32
|
+
:recorded_at, keyword_init: true) do
|
|
33
|
+
def to_h
|
|
34
|
+
{
|
|
35
|
+
kind: kind, source_id: source_id, target_id: target_id, matched_by: matched_by,
|
|
36
|
+
confidence: confidence, signals: signals, recorded_at: recorded_at
|
|
37
|
+
}.compact
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# @param entries [Array<Entry,Hash>] existing entries (e.g. from a loaded file).
|
|
42
|
+
# @param path [String,nil] where #save writes; defaults to the load path.
|
|
43
|
+
def initialize(entries: [], path: nil)
|
|
44
|
+
@path = path
|
|
45
|
+
@entries = {}
|
|
46
|
+
Array(entries).each { |e| add_entry(coerce(e)) }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Load a ledger from a JSON file. A missing file yields an empty (still writable) ledger.
|
|
50
|
+
def self.load(path)
|
|
51
|
+
data = File.exist?(path) ? JSON.parse(File.read(path)) : {}
|
|
52
|
+
new(entries: data['entries'] || [], path: path)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Record a CONFIRMED equivalence. `matched_by` names how it was resolved (:genome,
|
|
56
|
+
# :label, :ledger, :human, ...). Later records for the same (kind, source_id) supersede
|
|
57
|
+
# earlier ones (a human can correct an auto-accept). Returns the stored Entry.
|
|
58
|
+
def record(kind:, source_id:, target_id:, matched_by:, confidence: nil, signals: nil)
|
|
59
|
+
entry = Entry.new(
|
|
60
|
+
kind: kind.to_sym, source_id: source_id, target_id: target_id,
|
|
61
|
+
matched_by: matched_by&.to_sym, confidence: confidence, signals: signals,
|
|
62
|
+
recorded_at: Time.now.utc.iso8601
|
|
63
|
+
)
|
|
64
|
+
add_entry(entry)
|
|
65
|
+
entry
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# The confirmed Entry for (kind, source_id), or nil if never resolved.
|
|
69
|
+
def lookup(kind, source_id)
|
|
70
|
+
@entries[[kind.to_sym, source_id]]
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# The confirmed target id for (kind, source_id), or nil.
|
|
74
|
+
def target_for(kind, source_id)
|
|
75
|
+
lookup(kind, source_id)&.target_id
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def entries
|
|
79
|
+
@entries.values
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def size
|
|
83
|
+
@entries.size
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def to_h
|
|
87
|
+
{ entries: entries.map(&:to_h) }
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Persist to `path` (or the load path). Returns the path written.
|
|
91
|
+
def save(path = @path)
|
|
92
|
+
raise ArgumentError, 'no path to save the ledger to' if path.nil?
|
|
93
|
+
|
|
94
|
+
File.write(path, JSON.pretty_generate(to_h))
|
|
95
|
+
path
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
private
|
|
99
|
+
|
|
100
|
+
def add_entry(entry)
|
|
101
|
+
@entries[[entry.kind, entry.source_id]] = entry
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def coerce(entry)
|
|
105
|
+
return entry if entry.is_a?(Entry)
|
|
106
|
+
|
|
107
|
+
h = entry.transform_keys(&:to_sym)
|
|
108
|
+
Entry.new(
|
|
109
|
+
kind: h[:kind]&.to_sym, source_id: h[:source_id], target_id: h[:target_id],
|
|
110
|
+
matched_by: h[:matched_by]&.to_sym, confidence: h[:confidence], signals: h[:signals],
|
|
111
|
+
recorded_at: h[:recorded_at]
|
|
112
|
+
)
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|