ecoportal-api-graphql 1.3.10 → 1.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.ai-assistance/code/diff_pairing_engine.md +243 -0
  3. data/.ai-assistance/code/graphql_domain_knowledge.md +20 -10
  4. data/.ai-assistance/code/template_diff_pairing_domain.md +175 -0
  5. data/.ai-assistance/code/workflow-command-guide.md +28 -0
  6. data/.ai-assistance/projects/ooze-graphql-native-migration/INVENTORY.md +136 -0
  7. data/.ai-assistance/projects/ooze-graphql-native-migration/TODO.md +6 -1
  8. data/.ai-assistance/projects/qa-services-delivery/DECISIONS.md +93 -0
  9. data/.ai-assistance/projects/qa-services-delivery/INTENT.md +76 -0
  10. data/.ai-assistance/projects/qa-services-delivery/PHASE3-SCOPE.md +115 -0
  11. data/.ai-assistance/projects/qa-services-delivery/ROADMAP.md +99 -0
  12. data/.ai-assistance/projects/qa-services-delivery/TODO.md +81 -0
  13. data/.ai-assistance/projects/template-automatic-build-maintenance/INTENT.md +77 -0
  14. data/.ai-assistance/projects/template-automatic-build-maintenance/TODO.md +97 -0
  15. data/.ai-assistance/projects/template-diff-deploy/INTENT.md +12 -0
  16. data/.ai-assistance/projects/template-diff-deploy/TODO.md +9 -0
  17. data/.ai-assistance/projects/template-maintenance/PHASE0-FINDINGS.md +93 -0
  18. data/.ai-assistance/projects/template-maintenance/README.md +14 -0
  19. data/CHANGELOG.md +87 -0
  20. data/docs/worklog.md +279 -0
  21. data/ecoportal-api-graphql.gemspec +1 -1
  22. data/lib/ecoportal/api/graphql/base/page/data_field.rb +1 -1
  23. data/lib/ecoportal/api/graphql/builder/template_builder.rb +174 -0
  24. data/lib/ecoportal/api/graphql/builder.rb +17 -16
  25. data/lib/ecoportal/api/graphql/diff/change.rb +59 -0
  26. data/lib/ecoportal/api/graphql/diff/command_synthesizer.rb +329 -0
  27. data/lib/ecoportal/api/graphql/diff/cross_object_diff.rb +165 -0
  28. data/lib/ecoportal/api/graphql/diff/deploy.rb +121 -0
  29. data/lib/ecoportal/api/graphql/diff/id_resolver.rb +64 -0
  30. data/lib/ecoportal/api/graphql/diff/pairing/candidate.rb +32 -0
  31. data/lib/ecoportal/api/graphql/diff/pairing/engine.rb +173 -0
  32. data/lib/ecoportal/api/graphql/diff/pairing/ledger.rb +119 -0
  33. data/lib/ecoportal/api/graphql/diff/pairing/signals.rb +104 -0
  34. data/lib/ecoportal/api/graphql/diff/strategy.rb +113 -0
  35. data/lib/ecoportal/api/graphql/diff/version_diff.rb +332 -0
  36. data/lib/ecoportal/api/graphql/diff.rb +34 -0
  37. data/lib/ecoportal/api/graphql/fragment/pages/common_page_union.rb +1 -0
  38. data/lib/ecoportal/api/graphql/input/workflow_command/add_field.rb +27 -18
  39. data/lib/ecoportal/api/graphql/mutation/action/archive.rb +1 -1
  40. data/lib/ecoportal/api/graphql/mutation/action/create.rb +1 -1
  41. data/lib/ecoportal/api/graphql/mutation/action/update.rb +1 -1
  42. data/lib/ecoportal/api/graphql/mutation/contractor_entity/create.rb +1 -1
  43. data/lib/ecoportal/api/graphql/mutation/contractor_entity/destroy.rb +1 -1
  44. data/lib/ecoportal/api/graphql/mutation/contractor_entity/update.rb +1 -1
  45. data/lib/ecoportal/api/graphql/mutation/kickstand/fail_workflow.rb +1 -1
  46. data/lib/ecoportal/api/graphql/mutation/kickstand/start_workflow.rb +1 -1
  47. data/lib/ecoportal/api/graphql/mutation/kickstand/stop_workflow.rb +1 -1
  48. data/lib/ecoportal/api/graphql.rb +1 -0
  49. data/lib/ecoportal/api/graphql_version.rb +1 -1
  50. data/tests/dump_template_model.rb +90 -0
  51. data/tests/validate_queries.rb +31 -9
  52. metadata +31 -3
@@ -0,0 +1,165 @@
1
+ module Ecoportal
2
+ module API
3
+ class GraphQL
4
+ module Diff
5
+ # Cross-object diff: two templates/pages that DO NOT share Mongo ids (UAT<->PROD,
6
+ # page<->template). Because nothing has the same id as its counterpart, we cannot match by id
7
+ # (that is `VersionDiff`). Instead we PAIR the fields as an equivalence problem
8
+ # (`Pairing::Engine` — genome + type + label + options, human-assisted, ledger-backed), build
9
+ # an id-correspondence map from the accepted pairs, then emit the SAME `Change` output against
10
+ # that map so the existing `CommandSynthesizer` / `Deploy` layer can consume it unchanged.
11
+ #
12
+ # This deliberately operates at the FIELD level — the load-bearing simplifying principle
13
+ # (domain ref §6): sections/stages are scaffolding, customer data lives in the data-fields, so
14
+ # a cross-object diff pairs FIELDS precisely and treats structure as context. Structure-level
15
+ # (stage/section) reconciliation across id-spaces is not attempted here (no reliable pairing
16
+ # signal for scaffolding) — it is left to the human review the engine already routes to.
17
+ #
18
+ # engine = Pairing::Engine.new(ledger: ledger) # optional
19
+ # diff = CrossObjectDiff.new(uat_doc, prod_doc, engine: engine,
20
+ # strategy: Strategy.new(pairing: :assisted, scope: :data_migration))
21
+ # diff.changes # => [Change, ...] (field label/type changes on paired fields; +added/-removed)
22
+ # diff.pairing # => Pairing::Engine::Result (accepted / ambiguous / unmatched — for review)
23
+ # diff.unresolved # => [field-doc, ...] sources the human must adjudicate (ambiguous+unmatched)
24
+ #
25
+ # SAFETY — only ACCEPTED pairs are treated as equivalences; ambiguous/unmatched sources are
26
+ # surfaced in `unresolved` and NEVER auto-paired or auto-removed. A same-label field whose
27
+ # genome contradicts is escalated, not silently matched (the engine guarantees this).
28
+ class CrossObjectDiff
29
+ attr_reader :strategy
30
+
31
+ # @param source_doc [Hash] the "before" template/page (e.g. the desired/UAT state).
32
+ # @param target_doc [Hash] the "after"/destination template/page (e.g. PROD).
33
+ # @param engine [Pairing::Engine,nil] the equivalence matcher; a default one is built when
34
+ # nil. Provide one wired to a `Ledger` to reuse confirmed pairings and learn over time.
35
+ # @param strategy [Strategy] scope + move-sensitivity. Pairing is inherently cross-object
36
+ # here; `strategy.pairing` is informational (which signal set the caller intends).
37
+ def initialize(source_doc, target_doc, engine: nil, strategy: Strategy.new(pairing: :assisted))
38
+ @source = source_doc || {}
39
+ @target = target_doc || {}
40
+ @engine = engine || Pairing::Engine.new
41
+ @strategy = strategy || Strategy.new(pairing: :assisted)
42
+ end
43
+
44
+ # The `Pairing::Engine::Result` for the field sets (accepted / ambiguous / unmatched).
45
+ def pairing
46
+ @pairing ||= @engine.pair(source_fields, target_fields)
47
+ end
48
+
49
+ # Source field docs the engine could not confidently pair — the human review set. These are
50
+ # NOT emitted as removals (we do not know they were deleted vs merely unpaired).
51
+ def unresolved
52
+ pairing.ambiguous.map(&:source) + pairing.unmatched
53
+ end
54
+
55
+ # Strategy-filtered change-set, emitted against the pairing map (target ids translated into
56
+ # the source id-space so the changes replay coherently through the synthesizer).
57
+ def changes
58
+ @changes ||= @strategy.filter(all_changes)
59
+ end
60
+
61
+ def changelog
62
+ changes.map(&:description)
63
+ end
64
+
65
+ def summary
66
+ by_op = changes.group_by(&:op)
67
+ {
68
+ added: by_op.fetch(:added, []).size,
69
+ removed: by_op.fetch(:removed, []).size,
70
+ changed: by_op.fetch(:changed, []).size,
71
+ moved: by_op.fetch(:moved, []).size,
72
+ total: changes.size,
73
+ paired: pairing.accepted.size,
74
+ unresolved: unresolved.size
75
+ }
76
+ end
77
+
78
+ def to_h
79
+ { summary: summary, changes: changes.map(&:to_h), pairing: pairing.to_h }
80
+ end
81
+
82
+ private
83
+
84
+ # Emit field-level changes off the pairing:
85
+ # * accepted pair (src<->tgt): compare label/type/config; the change id is the SOURCE
86
+ # field id (the id-space the synthesizer/target-resolver expects to translate);
87
+ # * source with no accepted target: :removed (only from the confidently-paired frame —
88
+ # unresolved sources are held back for review, never auto-removed);
89
+ # * target with no accepted source: :added.
90
+ def all_changes
91
+ accepted = pairing.accepted
92
+ paired_src = accepted.map(&:source_id)
93
+ paired_tgt = accepted.map(&:target_id)
94
+
95
+ compared = accepted.flat_map { |c| compare_pair(c) }
96
+ removed = removed_changes(paired_src)
97
+ added = added_changes(paired_tgt)
98
+
99
+ compared + removed + added
100
+ end
101
+
102
+ def compare_pair(candidate)
103
+ src = candidate.source
104
+ tgt = candidate.target
105
+ id = candidate.source_id
106
+ cmp = []
107
+ cmp << field_change(id, src, tgt, 'label', src['label'], tgt['label']) if src['label'] != tgt['label']
108
+ cmp << field_change(id, src, tgt, 'type', field_type(src), field_type(tgt)) if field_type(src) != field_type(tgt)
109
+ cmp
110
+ end
111
+
112
+ # Source fields with no accepted counterpart AND that were confidently classified (i.e. not
113
+ # sitting in the unresolved review set) → removed. A source in `unresolved` is never emitted
114
+ # as a removal.
115
+ def removed_changes(paired_src)
116
+ held = unresolved.map { |f| f['id'] }
117
+ source_fields.reject { |f| paired_src.include?(f['id']) || held.include?(f['id']) }.map do |f|
118
+ Change.new(op: :removed, kind: :field, id: f['id'], label: f['label'])
119
+ end
120
+ end
121
+
122
+ # Target fields with no accepted counterpart → added, EXCEPT those tangled in an ambiguous
123
+ # candidate (a target the engine proposed but did not confirm): those are held for review,
124
+ # never auto-added (they may be the same field as an unresolved source, relabelled).
125
+ def added_changes(paired_tgt)
126
+ held = pairing.ambiguous.map(&:target_id)
127
+ target_fields.reject { |f| paired_tgt.include?(f['id']) || held.include?(f['id']) }.map do |f|
128
+ Change.new(op: :added, kind: :field, id: f['id'], label: f['label'])
129
+ end
130
+ end
131
+
132
+ def field_change(id, _src, tgt, attribute, before, after) # rubocop:disable Metrics/ParameterLists
133
+ Change.new(op: :changed, kind: :field, id: id, label: tgt['label'],
134
+ attribute: attribute, before: before, after: after)
135
+ end
136
+
137
+ def field_type(field)
138
+ field['__typename'] || field['type']
139
+ end
140
+
141
+ def source_fields
142
+ @source_fields ||= fields_of(@source)
143
+ end
144
+
145
+ def target_fields
146
+ @target_fields ||= fields_of(@target)
147
+ end
148
+
149
+ # Flatten every data-field doc out of a page/template doc (stages -> sections -> fields).
150
+ def fields_of(doc)
151
+ out = []
152
+ Array(doc['stages']).each do |st|
153
+ Array(st['sections']).each do |sec|
154
+ out.concat(Array(sec['dataFields']))
155
+ out.concat(Array(sec['leftDataFields']))
156
+ out.concat(Array(sec['rightDataFields']))
157
+ end
158
+ end
159
+ out.select { |f| f.is_a?(Hash) && f['id'] }
160
+ end
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,121 @@
1
+ module Ecoportal
2
+ module API
3
+ class GraphQL
4
+ module Diff
5
+ # Deploy orchestration: turns a diff into the replayable, ordered WorkflowCommand batch that
6
+ # `executeWorkflowCommands` consumes — plus the honest list of changes that could NOT be
7
+ # synthesised (needs-human), so a review step can gate the apply.
8
+ #
9
+ # Two entry points:
10
+ #
11
+ # # self-version replay (same object, real ids) — the "commit" replayed onto itself/a clone
12
+ # plan = Deploy.from_versions(before_doc, after_doc)
13
+ #
14
+ # # cross-object deploy (UAT delta -> a real PROD target): the diff gives the delta, but the
15
+ # # move targets (section/stage) must resolve against the PROD doc's ids.
16
+ # plan = Deploy.from_versions(uat_v1, uat_v2, target_doc: prod_doc)
17
+ #
18
+ # plan.commands # => ordered Array of built command hashes, ready to send
19
+ # plan.unsupported # => [Change, ...] needing human handling (never guessed)
20
+ # plan.changelog # => human one-liners for a ticket / review checklist
21
+ # plan.execute!(page) # => sends via the given executor (page.execute_workflow_commands)
22
+ #
23
+ # SAFETY — `Deploy` is inert until `execute!` is called with an explicit executor. It never
24
+ # applies anything on its own, and it surfaces `unsupported` so a human gates the deploy.
25
+ class Deploy
26
+ attr_reader :diff, :synthesizer
27
+
28
+ # @param diff [VersionDiff]
29
+ # @param resolver [#resolve, nil] maps move targets (stage name / section heading) to ids.
30
+ # @param thread_placeholders [Boolean] thread client-chosen placeholderIds so that an
31
+ # add-then-reference sequence (e.g. add a Select field, then add its options) stays
32
+ # self-consistent within one executeWorkflowCommands call. ON by default for deploy —
33
+ # the whole batch is applied to a target where the source ids are not valid.
34
+ def initialize(diff, resolver: nil, thread_placeholders: true)
35
+ @diff = diff
36
+ @synthesizer = CommandSynthesizer.new(
37
+ diff.changes, resolver: resolver, thread_placeholders: thread_placeholders
38
+ )
39
+ end
40
+
41
+ # Build a plan from two snapshots. When `target_doc` is given (cross-object deploy), a
42
+ # resolver is derived from it so field/section moves can address the target's real ids.
43
+ # An explicit `resolver:` overrides the derived one.
44
+ #
45
+ # `strategy:` selects the diff modality (scope / move-sensitivity / intent). It defaults to
46
+ # the self-version structural changelog strategy — the historical behaviour. When the
47
+ # strategy's `intent` is `:deploy` (or unspecified) placeholder threading stays ON.
48
+ def self.from_versions(before_doc, after_doc, target_doc: nil, resolver: nil,
49
+ thread_placeholders: true, strategy: Strategy.default)
50
+ resolver ||= (IdResolver.from_doc(target_doc) if target_doc)
51
+ diff = VersionDiff.new(before_doc, after_doc, strategy: strategy)
52
+ new(diff, resolver: resolver, thread_placeholders: thread_placeholders)
53
+ end
54
+
55
+ # Build a plan for a CROSS-OBJECT deploy (UAT<->PROD, page<->template): the two docs do not
56
+ # share ids, so fields are PAIRED (equivalence) before the delta is emitted. The pairing
57
+ # (accepted/ambiguous/unmatched) is exposed via `#pairing` so a human can adjudicate the
58
+ # unresolved set before applying. `engine:` lets the caller pass a `Pairing::Engine` wired
59
+ # to a `Ledger` (learns over time); `strategy:` defaults to the assisted/data-migration mode.
60
+ #
61
+ # plan = Deploy.from_cross_object(uat_doc, prod_doc, engine: engine)
62
+ # plan.pairing.ambiguous # review before deploy
63
+ # plan.commands # the paired delta as WorkflowCommands
64
+ def self.from_cross_object(source_doc, target_doc, engine: nil, resolver: nil,
65
+ thread_placeholders: true,
66
+ strategy: Strategy.new(pairing: :assisted, scope: :data_migration, intent: :deploy))
67
+ resolver ||= (IdResolver.from_doc(target_doc) if target_doc)
68
+ diff = CrossObjectDiff.new(source_doc, target_doc, engine: engine, strategy: strategy)
69
+ new(diff, resolver: resolver, thread_placeholders: thread_placeholders)
70
+ end
71
+
72
+ # The pairing result when this plan is cross-object (nil for a self-version plan). Lets a
73
+ # caller inspect/adjudicate ambiguous + unmatched pairings before `execute!`.
74
+ def pairing
75
+ @diff.respond_to?(:pairing) ? @diff.pairing : nil
76
+ end
77
+
78
+ # Ordered, dependency-safe Array of built command hashes ready for executeWorkflowCommands.
79
+ def commands
80
+ @synthesizer.commands
81
+ end
82
+
83
+ # Changes with no faithful command — surfaced for human review, never guessed.
84
+ def unsupported
85
+ @synthesizer.unsupported
86
+ end
87
+
88
+ def changelog
89
+ @diff.changelog
90
+ end
91
+
92
+ def summary
93
+ {
94
+ changes: @diff.summary,
95
+ commands: commands.size,
96
+ unsupported: unsupported.size
97
+ }
98
+ end
99
+
100
+ # True when every detected change was synthesised into a command (nothing needs a human).
101
+ def fully_supported?
102
+ unsupported.empty?
103
+ end
104
+
105
+ def to_h
106
+ { summary: summary, commands: commands, unsupported: unsupported.map(&:to_h) }
107
+ end
108
+
109
+ # Apply the batch. `executor` must respond to `execute_workflow_commands(commands)` (the
110
+ # gem's page/template mutation facade). Raises unless the plan is fully supported — the
111
+ # caller must review/clear `unsupported` first — unless `allow_partial: true`.
112
+ def execute!(executor, allow_partial: false)
113
+ raise ArgumentError, "#{unsupported.size} unsupported change(s); review before deploy" unless fully_supported? || allow_partial
114
+
115
+ executor.execute_workflow_commands(commands)
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,64 @@
1
+ module Ecoportal
2
+ module API
3
+ class GraphQL
4
+ module Diff
5
+ # Resolves the human keys a `VersionDiff` records (stage NAME, section HEADING) into the
6
+ # target-doc ids that move commands need (moveField -> section id, add/removeStageSection ->
7
+ # stage id). A structural diff only knows those human keys; this fills the gap WITHOUT
8
+ # guessing — it looks them up in a real target doc (the deploy destination), so
9
+ # `CommandSynthesizer` can emit faithful move commands.
10
+ #
11
+ # Answers `resolve(kind, key) => id | nil`. A nil (ambiguous / not found) keeps the move
12
+ # UNSUPPORTED rather than picking a wrong id.
13
+ #
14
+ # resolver = IdResolver.from_doc(prod_template_doc)
15
+ # CommandSynthesizer.new(changes, resolver: resolver)
16
+ #
17
+ # AMBIGUITY — if two stages share a name (or two sections a heading), the key is ambiguous
18
+ # and resolves to nil (the caller must disambiguate). Uniqueness is the safe default.
19
+ class IdResolver
20
+ # Build from an explicit map: { stage: { 'Report' => 'stg1' }, section: { 'Location' => 'sec1' } }
21
+ def initialize(index = {})
22
+ @index = index
23
+ end
24
+
25
+ # Build a resolver by indexing a page/template doc's stages (by name) and sections
26
+ # (by heading). Duplicate keys are dropped (resolve -> nil) so we never pick arbitrarily.
27
+ def self.from_doc(doc)
28
+ doc ||= {}
29
+ stages = {}
30
+ sections = {}
31
+ Array(doc['stages']).each do |st|
32
+ mark(stages, st['name'], st['id'])
33
+ Array(st['sections']).each { |sec| mark(sections, sec['heading'], sec['id']) }
34
+ end
35
+ new(stage: strip_ambiguous(stages), section: strip_ambiguous(sections))
36
+ end
37
+
38
+ # @return [String,nil] the target id for (kind, key), or nil if unknown/ambiguous.
39
+ def resolve(kind, key)
40
+ @index.dig(kind.to_sym, key)
41
+ end
42
+
43
+ class << self
44
+ private
45
+
46
+ def mark(bucket, key, id)
47
+ return if key.nil? || id.nil?
48
+
49
+ (bucket[key] ||= []) << id
50
+ end
51
+
52
+ # Keep only keys that map to exactly one id; ambiguous keys are omitted (resolve -> nil).
53
+ def strip_ambiguous(bucket)
54
+ bucket.each_with_object({}) do |(key, ids), out|
55
+ uniq = ids.uniq
56
+ out[key] = uniq.first if uniq.size == 1
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,32 @@
1
+ module Ecoportal
2
+ module API
3
+ class GraphQL
4
+ module Diff
5
+ module Pairing
6
+ # A scored pairing proposal: source object (id-space A) <-> target object (id-space B),
7
+ # with the aggregate confidence and the per-signal breakdown that produced it.
8
+ #
9
+ # `source`/`target` are the raw field docs (Hashes) being paired. `score` is 0.0..1.0.
10
+ # `signals` maps signal-name => contribution (for transparency + the ledger + Product's
11
+ # Field-ID data). `matched_by` names the dominant signal (e.g. :genome, :label, :ledger).
12
+ Candidate = Struct.new(:source, :target, :score, :signals, :matched_by, keyword_init: true) do
13
+ def source_id
14
+ source && source['id']
15
+ end
16
+
17
+ def target_id
18
+ target && target['id']
19
+ end
20
+
21
+ def to_h
22
+ {
23
+ source_id: source_id, target_id: target_id, score: score,
24
+ matched_by: matched_by, signals: signals
25
+ }.compact
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,173 @@
1
+ module Ecoportal
2
+ module API
3
+ class GraphQL
4
+ module Diff
5
+ module Pairing
6
+ # The equivalence-matching engine for CROSS-OBJECT pairing (UAT<->PROD, page<->template).
7
+ #
8
+ # Given two lists of field docs living in different id-spaces, it proposes pairings by:
9
+ # 1. consulting the LEDGER first — a previously-confirmed pair auto-resolves (method
10
+ # :ledger, confidence 1.0), so pairing improves over time and only novelty is scored;
11
+ # 2. otherwise scoring every remaining source×target candidate with multi-signal
12
+ # confidence (genome + type + label + options — see Signals), picking the best target
13
+ # per source in a stable, greedy, one-to-one assignment (highest scores first).
14
+ #
15
+ # It then CLASSIFIES each best candidate:
16
+ # - `accepted` — score >= accept_threshold: high confidence, auto-paired;
17
+ # - `ambiguous` — accept > score >= review_threshold, OR the top two candidates are within
18
+ # `tie_margin` (genuinely close): route to a HUMAN to adjudicate;
19
+ # - `unmatched` — no target scored >= review_threshold: escalate as a novelty.
20
+ #
21
+ # NEVER guesses: only `accepted` pairs are safe to auto-apply / auto-record; ambiguous and
22
+ # unmatched are surfaced for human resolution. Confirmed decisions are written back to the
23
+ # ledger by the caller (or via #confirm!) so the next run needs no human on them.
24
+ class Engine
25
+ DEFAULTS = { accept_threshold: 0.85, review_threshold: 0.5, tie_margin: 0.1 }.freeze
26
+
27
+ Result = Struct.new(:accepted, :ambiguous, :unmatched, keyword_init: true) do
28
+ # source field docs with no acceptable target — the pure novelty set.
29
+ def resolved?
30
+ ambiguous.empty? && unmatched.empty?
31
+ end
32
+
33
+ def to_h
34
+ {
35
+ accepted: accepted.map(&:to_h),
36
+ ambiguous: ambiguous.map(&:to_h),
37
+ unmatched: unmatched.map { |f| { source_id: f['id'], label: f['label'] } }
38
+ }
39
+ end
40
+ end
41
+
42
+ # @param ledger [Ledger,nil] consulted first + written to on #confirm!.
43
+ # @param kind [Symbol] entity kind recorded in the ledger (default :field).
44
+ def initialize(ledger: nil, kind: :field, **thresholds)
45
+ @ledger = ledger
46
+ @kind = kind
47
+ @cfg = DEFAULTS.merge(thresholds)
48
+ end
49
+
50
+ # Pair `sources` (id-space A) to `targets` (id-space B). Returns a Result.
51
+ def pair(sources, targets)
52
+ sources = Array(sources)
53
+ targets = Array(targets)
54
+
55
+ accepted, remaining_sources, remaining_targets = apply_ledger(sources, targets)
56
+ scored = score_all(remaining_sources, remaining_targets)
57
+ assign(scored, remaining_sources, accepted)
58
+ end
59
+
60
+ # Persist a confirmed pairing to the ledger (auto-accept or human decision). No-op
61
+ # without a ledger. `matched_by` overrides the candidate's dominant signal (e.g. :human
62
+ # when a person adjudicated). Returns the recorded Entry (or nil).
63
+ def confirm!(candidate, matched_by: nil)
64
+ return nil unless @ledger
65
+
66
+ @ledger.record(
67
+ kind: @kind, source_id: candidate.source_id, target_id: candidate.target_id,
68
+ matched_by: matched_by || candidate.matched_by, confidence: candidate.score,
69
+ signals: candidate.signals
70
+ )
71
+ end
72
+
73
+ private
74
+
75
+ # Pull out sources already confirmed in the ledger (auto-resolved), consuming the matched
76
+ # targets so they cannot be re-assigned. Returns [accepted, sources_left, targets_left].
77
+ def apply_ledger(sources, targets)
78
+ return [[], sources, targets] unless @ledger
79
+
80
+ by_id = targets.each_with_object({}) { |t, h| h[t['id']] = t if t['id'] }
81
+ accepted = []
82
+ used = []
83
+
84
+ left = sources.reject do |src|
85
+ tid = @ledger.target_for(@kind, src['id'])
86
+ tgt = tid && by_id[tid]
87
+ next false unless tgt
88
+
89
+ accepted << ledger_candidate(src, tgt)
90
+ used << tid
91
+ true
92
+ end
93
+
94
+ [accepted, left, targets.reject { |t| used.include?(t['id']) }]
95
+ end
96
+
97
+ def ledger_candidate(source, target)
98
+ Candidate.new(source: source, target: target, score: 1.0, matched_by: :ledger,
99
+ signals: { ledger: 1.0 })
100
+ end
101
+
102
+ # Score every source×target pair; keep only those at/above the review threshold, best
103
+ # first, so the greedy assignment takes the strongest links first.
104
+ def score_all(sources, targets)
105
+ sources.flat_map do |src|
106
+ targets.map { |tgt| score_pair(src, tgt) }
107
+ end.select { |c| c.score >= @cfg[:review_threshold] }.sort_by { |c| -c.score }
108
+ end
109
+
110
+ def score_pair(source, target)
111
+ parts = {
112
+ genome: Signals.genome(source, target),
113
+ type: Signals.type(source, target),
114
+ label: Signals.label(source, target),
115
+ options: Signals.options(source, target)
116
+ }
117
+ applied = parts.compact
118
+ score = weighted_average(applied)
119
+ Candidate.new(source: source, target: target, score: score,
120
+ signals: applied, matched_by: dominant(applied))
121
+ end
122
+
123
+ # Weighted mean over the signals that APPLIED (nil signals are excluded and their weight
124
+ # dropped), so a select field is not penalised for genome being absent, etc.
125
+ def weighted_average(applied)
126
+ return 0.0 if applied.empty?
127
+
128
+ num = applied.sum { |name, val| Signals::WEIGHTS.fetch(name, 0) * val }
129
+ den = applied.sum { |name, _| Signals::WEIGHTS.fetch(name, 0) }
130
+ den.zero? ? 0.0 : (num / den).round(4)
131
+ end
132
+
133
+ def dominant(applied)
134
+ return nil if applied.empty?
135
+
136
+ applied.max_by { |name, val| [val, Signals::WEIGHTS.fetch(name, 0)] }.first
137
+ end
138
+
139
+ # Greedy one-to-one assignment: walk best-scored candidates first, take a pair when both
140
+ # ends are still free. Then classify the taken pairs and collect unmatched sources.
141
+ def assign(scored, sources, accepted)
142
+ ambiguous = []
143
+ taken_src = accepted.map(&:source_id)
144
+ taken_tgt = accepted.map(&:target_id)
145
+ best_by_src = scored.group_by(&:source_id)
146
+
147
+ scored.each do |cand|
148
+ next if taken_src.include?(cand.source_id) || taken_tgt.include?(cand.target_id)
149
+
150
+ bucket = accept?(cand, best_by_src[cand.source_id]) ? accepted : ambiguous
151
+ bucket << cand
152
+ taken_src << cand.source_id
153
+ taken_tgt << cand.target_id
154
+ end
155
+
156
+ unmatched = sources.reject { |s| taken_src.include?(s['id']) }
157
+ Result.new(accepted: accepted, ambiguous: ambiguous, unmatched: unmatched)
158
+ end
159
+
160
+ # Accept only when confidently above threshold AND not in a near-tie with the runner-up
161
+ # for the same source (a close second means the human should decide).
162
+ def accept?(cand, siblings)
163
+ return false if cand.score < @cfg[:accept_threshold]
164
+
165
+ runner_up = Array(siblings).reject { |c| c.target_id == cand.target_id }.map(&:score).max
166
+ runner_up.nil? || (cand.score - runner_up) >= @cfg[:tie_margin]
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end
172
+ end
173
+ end
@@ -0,0 +1,119 @@
1
+ require 'json'
2
+ require 'time'
3
+
4
+ module Ecoportal
5
+ module API
6
+ class GraphQL
7
+ module Diff
8
+ module Pairing
9
+ # The LEARNING LEDGER — a first-class artifact that records CONFIRMED equivalences between
10
+ # objects that live in different id-spaces (UAT<->PROD, page<->template). Because MongoDB
11
+ # gives every object a distinct id and nothing is shared across counterparts, pairing is an
12
+ # equivalence / entity-resolution problem, not a lookup. Every decision the engine (or a
13
+ # human) makes is persisted here so pairing IMPROVES over time: on the next run the ledger
14
+ # is consulted FIRST and previously-resolved pairs auto-resolve, leaving only genuine
15
+ # novelty for the human to adjudicate.
16
+ #
17
+ # An entry is keyed by (kind, source_id) and records the paired target_id plus HOW it was
18
+ # resolved (the method, the confidence, the signals, a timestamp). This log of *how* is the
19
+ # bridge data Product's Field-ID / template-entity-id effort needs.
20
+ #
21
+ # ledger = Ledger.load('pairings.json') # or Ledger.new (in-memory)
22
+ # ledger.record(kind: :field, source_id: 'a', target_id: 'b',
23
+ # method: :genome, confidence: 0.98, signals: {...})
24
+ # ledger.lookup(:field, 'a') # => Entry or nil
25
+ # ledger.save # persist back to the same path
26
+ #
27
+ # SAFETY — the ledger stores only decisions that were CONFIRMED (auto-accepted at high
28
+ # confidence, or human-adjudicated). Ambiguous/low-confidence candidates are never written;
29
+ # they are escalated. The ledger is thus a growing store of ground truth, never guesses.
30
+ class Ledger
31
+ Entry = Struct.new(:kind, :source_id, :target_id, :matched_by, :confidence, :signals,
32
+ :recorded_at, keyword_init: true) do
33
+ def to_h
34
+ {
35
+ kind: kind, source_id: source_id, target_id: target_id, matched_by: matched_by,
36
+ confidence: confidence, signals: signals, recorded_at: recorded_at
37
+ }.compact
38
+ end
39
+ end
40
+
41
+ # @param entries [Array<Entry,Hash>] existing entries (e.g. from a loaded file).
42
+ # @param path [String,nil] where #save writes; defaults to the load path.
43
+ def initialize(entries: [], path: nil)
44
+ @path = path
45
+ @entries = {}
46
+ Array(entries).each { |e| add_entry(coerce(e)) }
47
+ end
48
+
49
+ # Load a ledger from a JSON file. A missing file yields an empty (still writable) ledger.
50
+ def self.load(path)
51
+ data = File.exist?(path) ? JSON.parse(File.read(path)) : {}
52
+ new(entries: data['entries'] || [], path: path)
53
+ end
54
+
55
+ # Record a CONFIRMED equivalence. `matched_by` names how it was resolved (:genome,
56
+ # :label, :ledger, :human, ...). Later records for the same (kind, source_id) supersede
57
+ # earlier ones (a human can correct an auto-accept). Returns the stored Entry.
58
+ def record(kind:, source_id:, target_id:, matched_by:, confidence: nil, signals: nil)
59
+ entry = Entry.new(
60
+ kind: kind.to_sym, source_id: source_id, target_id: target_id,
61
+ matched_by: matched_by&.to_sym, confidence: confidence, signals: signals,
62
+ recorded_at: Time.now.utc.iso8601
63
+ )
64
+ add_entry(entry)
65
+ entry
66
+ end
67
+
68
+ # The confirmed Entry for (kind, source_id), or nil if never resolved.
69
+ def lookup(kind, source_id)
70
+ @entries[[kind.to_sym, source_id]]
71
+ end
72
+
73
+ # The confirmed target id for (kind, source_id), or nil.
74
+ def target_for(kind, source_id)
75
+ lookup(kind, source_id)&.target_id
76
+ end
77
+
78
+ def entries
79
+ @entries.values
80
+ end
81
+
82
+ def size
83
+ @entries.size
84
+ end
85
+
86
+ def to_h
87
+ { entries: entries.map(&:to_h) }
88
+ end
89
+
90
+ # Persist to `path` (or the load path). Returns the path written.
91
+ def save(path = @path)
92
+ raise ArgumentError, 'no path to save the ledger to' if path.nil?
93
+
94
+ File.write(path, JSON.pretty_generate(to_h))
95
+ path
96
+ end
97
+
98
+ private
99
+
100
+ def add_entry(entry)
101
+ @entries[[entry.kind, entry.source_id]] = entry
102
+ end
103
+
104
+ def coerce(entry)
105
+ return entry if entry.is_a?(Entry)
106
+
107
+ h = entry.transform_keys(&:to_sym)
108
+ Entry.new(
109
+ kind: h[:kind]&.to_sym, source_id: h[:source_id], target_id: h[:target_id],
110
+ matched_by: h[:matched_by]&.to_sym, confidence: h[:confidence], signals: h[:signals],
111
+ recorded_at: h[:recorded_at]
112
+ )
113
+ end
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end