moderate 0.1.0 → 1.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +8 -0
  3. data/.simplecov +62 -0
  4. data/AGENTS.md +7 -0
  5. data/Appraisals +16 -0
  6. data/CHANGELOG.md +71 -1
  7. data/CLAUDE.md +7 -0
  8. data/README.md +376 -29
  9. data/Rakefile +28 -2
  10. data/app/controllers/concerns/moderate/moderation.rb +161 -0
  11. data/app/controllers/moderate/appeals_controller.rb +190 -0
  12. data/app/controllers/moderate/application_controller.rb +45 -0
  13. data/app/controllers/moderate/notices_controller.rb +382 -0
  14. data/app/controllers/moderate/transparency_reports_controller.rb +30 -0
  15. data/app/helpers/moderate/engine_helper.rb +151 -0
  16. data/app/views/moderate/appeals/new.html.erb +78 -0
  17. data/app/views/moderate/notices/new.html.erb +255 -0
  18. data/app/views/moderate/transparency_reports/_summary_card.html.erb +20 -0
  19. data/app/views/moderate/transparency_reports/show.html.erb +52 -0
  20. data/config/moderate/blocklists/en.yml +81 -0
  21. data/config/moderate/blocklists/es.yml +40 -0
  22. data/config/routes.rb +36 -0
  23. data/docs/compliance.md +178 -0
  24. data/docs/configuration.md +326 -0
  25. data/docs/dsa-notice-form.md +371 -0
  26. data/docs/madmin.md +490 -0
  27. data/docs/notifications.md +363 -0
  28. data/examples/aws_rekognition_adapter.rb +140 -0
  29. data/examples/openai_moderation_adapter.rb +111 -0
  30. data/gemfiles/rails_7.1.gemfile +36 -0
  31. data/gemfiles/rails_7.2.gemfile +36 -0
  32. data/gemfiles/rails_8.1.gemfile +36 -0
  33. data/lib/generators/moderate/install_generator.rb +56 -0
  34. data/lib/generators/moderate/templates/create_moderate_tables.rb.erb +237 -0
  35. data/lib/generators/moderate/templates/initializer.rb +198 -0
  36. data/lib/generators/moderate/views_generator.rb +63 -0
  37. data/lib/moderate/configuration.rb +341 -0
  38. data/lib/moderate/engine.rb +138 -0
  39. data/lib/moderate/errors.rb +26 -0
  40. data/lib/moderate/event.rb +75 -0
  41. data/lib/moderate/filters/base.rb +126 -0
  42. data/lib/moderate/filters/wordlist.rb +255 -0
  43. data/lib/moderate/jobs/classify_job.rb +158 -0
  44. data/lib/moderate/label.rb +111 -0
  45. data/lib/moderate/macros.rb +90 -0
  46. data/lib/moderate/models/appeal.rb +154 -0
  47. data/lib/moderate/models/application_record.rb +31 -0
  48. data/lib/moderate/models/block.rb +203 -0
  49. data/lib/moderate/models/concerns/actor.rb +174 -0
  50. data/lib/moderate/models/concerns/content_filterable.rb +155 -0
  51. data/lib/moderate/models/concerns/reportable.rb +282 -0
  52. data/lib/moderate/models/flag.rb +136 -0
  53. data/lib/moderate/models/report.rb +620 -0
  54. data/lib/moderate/result.rb +176 -0
  55. data/lib/moderate/services/intake_appeal.rb +89 -0
  56. data/lib/moderate/services/intake_notice.rb +132 -0
  57. data/lib/moderate/services/intake_report.rb +132 -0
  58. data/lib/moderate/services/resolve_appeal.rb +134 -0
  59. data/lib/moderate/services/resolve_flag.rb +101 -0
  60. data/lib/moderate/services/resolve_report.rb +291 -0
  61. data/lib/moderate/version.rb +1 -1
  62. data/lib/moderate.rb +365 -18
  63. data/log/development.log +0 -0
  64. data/log/test.log +0 -0
  65. metadata +154 -15
@@ -0,0 +1,620 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moderate
4
+ # The core notice/report record of the Trust & Safety system.
5
+ #
6
+ # A `Moderate::Report` is BOTH an in-app community report ("this comment is
7
+ # harassment") AND a public EU DSA legal notice ("this URL hosts illegal
8
+ # content"), distinguished by `intake_kind`. Keeping them in one table — one
9
+ # queue, one decision workflow, one evidence snapshot — is deliberate: the DSA
10
+ # wants notices "decided in a timely, diligent, non-arbitrary and objective
11
+ # manner" (Art. 16(6)), and the simplest way to guarantee that is to make a
12
+ # legal notice land in the exact same `pending` queue a moderator already works.
13
+ # See: https://eur-lex.europa.eu/eli/reg/2022/2065/oj
14
+ #
15
+ # The model owns: validations, the immutable evidence snapshot, the
16
+ # automated-processing disclosure (DSA Art. 16(6)/17(3)(c)), the appeal window
17
+ # (DSA Art. 20), and a signed-GlobalID locator for the public flows. It stays
18
+ # host-agnostic: who "owns" reported content, how content is snapshotted, and
19
+ # what fields may be reported are all answered by the polymorphic `has_reportable_content`
20
+ # (through the `Moderate::Reportable` interface), never by anything domain-specific.
21
+ class Report < ApplicationRecord
22
+ self.table_name = "moderate_reports"
23
+
24
+ STATUSES = %w[open actioned dismissed].freeze
25
+ INTAKE_KINDS = %w[community dsa].freeze
26
+
27
+ # The default in-app COMMUNITY report categories. Two taxonomies on purpose
28
+ # (README): a friendly community set the user picks from a "Report" sheet, plus
29
+ # the regulator-aligned DSA legal-reason set below for public notices. These two
30
+ # vocabularies serve different audiences and must NOT be collapsed.
31
+ #
32
+ # This list is HOST-CUSTOMIZABLE: a host that needs its own community labels sets
33
+ # `config.report_categories = %w[...]` and validation tracks that instead (see
34
+ # `.report_categories` below). The taxonomy lives in the MODEL — there is no DB
35
+ # check constraint on `category` — precisely so adding a label never requires a
36
+ # migration. (The DSA legal-reason/country lists below are regulator-defined and
37
+ # therefore fixed, NOT host-overridable.)
38
+ DEFAULT_CATEGORIES = %w[
39
+ harassment hate threats sexual_content spam fraud unsafe_behavior
40
+ illegal_content privacy child_safety other hate_abuse_harassment
41
+ violent_speech graphic_violent_media illegal_regulated_behaviors
42
+ impersonation adult_sexual_content private_non_consensual_content
43
+ suicide_self_harm terrorism_violent_extremism scam_fraud
44
+ ].freeze
45
+
46
+ # The DSA "statement of reasons" legal-reason taxonomy used on public notices.
47
+ # These are the categories the EU Transparency Database expects, so the Art. 24
48
+ # transparency counters and the Art. 16 intake speak one regulator-aligned
49
+ # vocabulary. Regulator-defined, so this is a FIXED constant (not host-overridable):
50
+ # widening it is a gem change, not a host config. Validated by the model, not the DB.
51
+ DSA_LEGAL_REASONS = %w[
52
+ animal_welfare consumer_information cyber_violence data_protection_privacy
53
+ illegal_or_harmful_speech civic_elections non_consensual_behavior
54
+ pornography_sexualized_content protection_of_minors public_security
55
+ scams_fraud scope_of_platform_service self_harm unsafe_illegal_products
56
+ violence intellectual_property other
57
+ ].freeze
58
+
59
+ # The 27 EU member-state codes plus "EU" (whole-union). The DSA notice form
60
+ # requires the member state whose law is allegedly broken. Regulator-defined and
61
+ # therefore fixed (not host-overridable). Validated by the model, not the DB.
62
+ EU_COUNTRY_CODES = %w[
63
+ AT BE BG CY CZ DE DK EE ES FI FR GR HR HU IE IT LT LU LV MT NL PL PT
64
+ RO SE SI SK EU
65
+ ].freeze
66
+
67
+ # Generic, host-agnostic content-type buckets. A host's reportable supplies its
68
+ # own via `moderation_content_type`, but the stored value is constrained to this
69
+ # vocabulary (model-level inclusion, NULL allowed) so the queue and transparency
70
+ # counts stay tidy.
71
+ CONTENT_TYPES = %w[
72
+ user_profile profile_avatar listing message conversation group other
73
+ ].freeze
74
+
75
+ # The legal/contractual ground a moderator records when closing a report —
76
+ # the DSA Art. 17(1) "legal or contractual ground" of the statement of reasons.
77
+ # Model-level inclusion (NULL allowed); no DB constraint.
78
+ RESOLUTION_BASES = %w[terms law law_and_terms insufficient_information no_violation].freeze
79
+
80
+ # Matches the `moderate_reports_message_length_check` DB constraint — the one
81
+ # value guard kept at the DB level (a cheap runaway-free-text backstop).
82
+ MESSAGE_MAX_LENGTH = 4000
83
+
84
+ # Signed-GlobalID purposes. A purpose is a namespace tag baked into the signed
85
+ # token so a token minted to locate the *reported content* can't be replayed to
86
+ # locate the *report itself* (and vice versa) — GlobalID verifies the purpose on
87
+ # the way back out. See https://github.com/rails/globalid#signed-global-ids.
88
+ # The strings are gem-stable ("moderate_*") and host-agnostic.
89
+ SIGNED_GLOBAL_ID_PURPOSE = "moderate_report"
90
+ APPEAL_SIGNED_GLOBAL_ID_PURPOSE = "moderate_appeal"
91
+
92
+ # DSA Art. 20(1): the internal complaint (appeal) mechanism must stay open for
93
+ # "at least six months following the decision". We default to exactly six months
94
+ # and refuse appeals filed after it (enforced on Moderate::Appeal).
95
+ APPEAL_WINDOW = 6.months
96
+
97
+ # Transient flags a caller may set on the intake side (e.g. "don't email a
98
+ # receipt for this seeded record", "also block the reported user"). They never
99
+ # persist; the services read them. Kept here so both the model and its services
100
+ # share one definition.
101
+ attr_accessor :skip_received_notice, :block_reported_user
102
+
103
+ # All actor associations resolve to the HOST's configured user class, read
104
+ # lazily from the configuration as a String so this file loads before the host
105
+ # has declared its User model (the initializer sets `config.user_class` first,
106
+ # models autoload on demand). `optional: true` everywhere a user may be absent:
107
+ # a DSA notice can come from a non-user, and reported content does not always
108
+ # resolve to a single owning account.
109
+ belongs_to :reporter, class_name: Moderate.config.user_class, optional: true
110
+ belongs_to :reported_user, class_name: Moderate.config.user_class, optional: true
111
+ belongs_to :resolved_by, class_name: Moderate.config.user_class, optional: true
112
+ belongs_to :reportable, polymorphic: true, optional: true
113
+
114
+ has_many :appeals, class_name: "Moderate::Appeal", dependent: :destroy
115
+
116
+ # The auto-filter flags that touched the SAME (record, field) this report is
117
+ # about. Used to disclose automated means at intake (Art. 17(3)(c)) — if the
118
+ # wordlist/image adapter already flagged this exact content, the decision email
119
+ # must not later claim "no automated means were used". The scope re-keys the
120
+ # association onto the polymorphic flaggable columns, since a Flag points at the
121
+ # content polymorphically, not at the Report.
122
+ has_many :flags,
123
+ ->(report) { where(flaggable_type: report.reportable_type, field: report.reported_field) },
124
+ class_name: "Moderate::Flag",
125
+ primary_key: :reportable_id,
126
+ foreign_key: :flaggable_id
127
+
128
+ before_validation :normalize_strings
129
+ before_validation :hydrate_reporter_contact, if: :reporter
130
+ before_validation :infer_reported_user, on: :create
131
+ before_validation :capture_snapshot, on: :create
132
+ before_validation :capture_automated_processing, on: :create
133
+ # Default the JSON columns to their empty shape before save. WHY: the migration
134
+ # makes these NOT NULL, but MySQL 8+ forbids a DEFAULT on a JSON column, so on
135
+ # MySQL the column has NO database default — inserting a row that never touched
136
+ # `automated_processing` / `resolution_actions` would write NULL and trip a
137
+ # NotNullViolation. (SQLite/PostgreSQL get a `{}`/`[]` default from the migration
138
+ # and don't need this, but coalescing nil→empty is harmless there.) The migration
139
+ # explicitly delegates this to the model ("Models handle nil metadata gracefully
140
+ # by defaulting to {} in their accessors"); this is that handling.
141
+ before_save :default_json_columns
142
+
143
+ scope :open, -> { where(status: "open") }
144
+ scope :actioned, -> { where(status: "actioned") }
145
+ scope :dismissed, -> { where(status: "dismissed") }
146
+ # `pending` == awaiting a decision. The README/admin queue is `Report.pending`;
147
+ # it's the same set as `open`, named for the queue's vocabulary (a "pending"
148
+ # decision) rather than the record's status word. Both exist so each call site
149
+ # reads naturally.
150
+ scope :pending, -> { where(status: "open") }
151
+ scope :recent_first, -> { order(created_at: :desc) }
152
+
153
+ validates :status, inclusion: { in: STATUSES }
154
+ validates :intake_kind, inclusion: { in: INTAKE_KINDS }
155
+ # `category` is validated against the HOST-CUSTOMIZABLE list (config override or
156
+ # DEFAULT_CATEGORIES) — resolved at validation time, NOT class-load time, so a host
157
+ # that sets `config.report_categories` in its initializer is honored even though
158
+ # this model loads first. Passed as a lambda because `inclusion: { in: [...] }`
159
+ # snapshots a plain array once at load; a lambda is re-evaluated per record. The
160
+ # lambda's argument IS the record, so we reach the class method through its class
161
+ # (inside the proc, `self` is the record instance, which has no `report_categories`).
162
+ validates :category, inclusion: { in: ->(report) { report.class.report_categories } }
163
+ validates :message, presence: true, length: { maximum: MESSAGE_MAX_LENGTH }
164
+ # DSA Art. 16(2)(c): notices must carry the notifier's name and email — UNLESS
165
+ # the notice alleges an offence against minors, where the regulation waives the
166
+ # identity requirement (the CSAM/child-safety anonymity carve-out). Both name and
167
+ # email are required ONLY for DSA public notices: a notifier the provider has no
168
+ # account for must be reachable to receive the Art. 16(4) confirmation of receipt
169
+ # and the Art. 17 decision. An in-app COMMUNITY report comes from a logged-in user
170
+ # the app can already reach (or doesn't need to email at all — they get the in-app
171
+ # acknowledgement), and that user may legitimately have no email column, so we do
172
+ # NOT force a notifier_email there. The format check still applies to any email
173
+ # that IS present, on either path.
174
+ validates :notifier_name, presence: true, if: -> { dsa? && !anonymous_notice? }
175
+ validates :notifier_email, presence: true, if: -> { dsa? && !anonymous_notice? }
176
+ validates :notifier_email, format: { with: URI::MailTo::EMAIL_REGEXP }, allow_blank: true
177
+ validates :subject_url, length: { maximum: 2048 }, allow_blank: true
178
+ validates :reported_field, length: { maximum: 64 }, allow_blank: true
179
+ validates :resolution_note, presence: true, if: :closed?
180
+ # DSA Art. 16(2)(d): a notice/report must include a good-faith statement that its
181
+ # contents are accurate and complete. `acceptance: true` rejects the save unless
182
+ # the flag is truthy. This applies to BOTH intake kinds: a public DSA notifier
183
+ # ticks the box on the form, and an in-app reporter attests implicitly by tapping
184
+ # "Report" — so the `report!` actor helper sets `good_faith_confirmed: true` for
185
+ # the community path (see Moderate::Actor#report!). Keeping ONE always-on
186
+ # validation (rather than scoping it to DSA) means a Report built directly with a
187
+ # falsy good-faith flag is rejected regardless of kind, which is the safe default.
188
+ validates :good_faith_confirmed, acceptance: true
189
+ validates :legal_reason, inclusion: { in: DSA_LEGAL_REASONS }, allow_blank: true
190
+ validates :legal_country_code, inclusion: { in: EU_COUNTRY_CODES }, allow_blank: true
191
+ validates :content_type, inclusion: { in: CONTENT_TYPES }, allow_blank: true
192
+ validates :resolution_basis, inclusion: { in: RESOLUTION_BASES }, allow_blank: true
193
+ validate :reporter_cannot_report_self
194
+ validate :reportable_field_must_be_allowed
195
+ validate :subject_url_must_be_http_url
196
+ validate :public_notice_requires_subject_url
197
+ validate :dsa_notice_must_be_substantiated
198
+ validate :anonymous_notice_only_for_child_safety
199
+
200
+ # --- Taxonomy (host-customizable) ----------------------------------------
201
+
202
+ # The community `category` vocabulary in effect: the host's `config.report_categories`
203
+ # if they set one, else the gem's DEFAULT_CATEGORIES. Read at the point of use (not
204
+ # memoized) so a host can change it without a reboot and so it tracks `Moderate.reset!`
205
+ # in tests. Coerced to strings to match the normalized, persisted column value.
206
+ def self.report_categories
207
+ Array(Moderate.config.report_categories).map(&:to_s).presence || DEFAULT_CATEGORIES
208
+ end
209
+
210
+ # --- Signed-GlobalID locators --------------------------------------------
211
+
212
+ # Resolve a signed token back into the reported content. Prefer the
213
+ # auto-discovered registry allow-list when it is already populated, then fall
214
+ # back to the reportable contract after verifying the SignedGlobalID purpose.
215
+ # That second path matters in lazy-loaded Rails apps: resolving the token may be
216
+ # the first thing that constantizes the reportable model, so the registry can be
217
+ # stale until after GlobalID loads the class.
218
+ def self.locate_signed_reportable(token)
219
+ return if token.blank?
220
+
221
+ locate_signed_reportable_from_registry(token) ||
222
+ locate_signed_reportable_by_contract(token)
223
+ end
224
+
225
+ # Resolve a signed token back into the Report it was minted for (used by the
226
+ # public appeal flow, where the appellant arrives via an emailed signed link).
227
+ # Locked to `self` so the token can only ever name a Report.
228
+ def self.locate_signed_appeal_report(token)
229
+ return if token.blank?
230
+
231
+ GlobalID::Locator.locate_signed(token, for: APPEAL_SIGNED_GLOBAL_ID_PURPOSE, only: [self])
232
+ end
233
+
234
+ # --- Status predicates ----------------------------------------------------
235
+
236
+ def open?
237
+ status == "open"
238
+ end
239
+
240
+ def actioned?
241
+ status == "actioned"
242
+ end
243
+
244
+ def dismissed?
245
+ status == "dismissed"
246
+ end
247
+
248
+ def closed?
249
+ actioned? || dismissed?
250
+ end
251
+
252
+ def dsa?
253
+ intake_kind == "dsa"
254
+ end
255
+
256
+ def community?
257
+ intake_kind == "community"
258
+ end
259
+
260
+ # The anonymity carve-out from DSA Art. 16(2)(c): a notice may omit the
261
+ # notifier's identity ONLY when it concerns offences against minors. We tie it
262
+ # to the `protection_of_minors` legal reason — the single ground for which the
263
+ # regulation lets a notice be filed anonymously.
264
+ def anonymous_notice?
265
+ anonymous? && legal_reason == "protection_of_minors"
266
+ end
267
+
268
+ # --- Human-readable labels (delegated to the reportable, host-agnostic) ----
269
+
270
+ # How to address the notifier in copy: their name if given, else their email.
271
+ def notifier_label
272
+ notifier_name.presence || notifier_email
273
+ end
274
+
275
+ # A label for the reported thing. We ask the reportable for its own label (via
276
+ # the Moderate::Reportable interface), fall back to the notice URL, and finally
277
+ # to a generic localized "legal notice" string — so a notice about an external
278
+ # URL (no in-app record) still renders something sensible in the queue.
279
+ def reportable_label
280
+ return reportable.moderation_label if reportable.respond_to?(:moderation_label)
281
+
282
+ subject_url.presence || I18n.t("moderate.reports.legal_notice_label", default: "Legal notice")
283
+ end
284
+
285
+ # The snapshotted text of the reported field, asked of the reportable. Returns
286
+ # nil when the reportable doesn't expose snapshot text (or there's no record).
287
+ def reported_content_text
288
+ return unless reportable.respond_to?(:moderation_snapshot)
289
+
290
+ reportable.moderation_snapshot(reported_field)
291
+ end
292
+
293
+ # --- Signed GIDs for emailed links ---------------------------------------
294
+
295
+ def signed_reportable_gid
296
+ reportable&.to_sgid_param(for: SIGNED_GLOBAL_ID_PURPOSE)
297
+ end
298
+
299
+ def signed_appeal_gid
300
+ to_sgid_param(for: APPEAL_SIGNED_GLOBAL_ID_PURPOSE)
301
+ end
302
+
303
+ # --- URL helpers (defensive parsing) -------------------------------------
304
+
305
+ def safe_subject_url
306
+ parsed_subject_uri(subject_url)&.to_s
307
+ end
308
+
309
+ def safe_subject_url_for(url)
310
+ parsed_subject_uri(url)&.to_s
311
+ end
312
+
313
+ def safe_subject_urls
314
+ subject_url_list.filter_map { |url| parsed_subject_uri(url)&.to_s }
315
+ end
316
+
317
+ # The de-duplicated list of notice URLs (a notice may cite several). Falls back
318
+ # to the single `subject_url` for records created before/without the list column.
319
+ def subject_url_list
320
+ Array(subject_urls).presence || Array(subject_url)
321
+ end
322
+
323
+ # --- Lifecycle helpers ----------------------------------------------------
324
+
325
+ # DSA Art. 16(4): record that the confirmation of receipt was acknowledged.
326
+ # `update_column` skips validations/callbacks on purpose — this is a timestamp
327
+ # stamp, not a content change, and must succeed even on an already-validated row.
328
+ # Idempotent: only sets the first acknowledgement.
329
+ def acknowledge!(at: Time.current)
330
+ update_column(:acknowledged_at, at) if acknowledged_at.blank?
331
+ end
332
+
333
+ # DSA Art. 20(1): open the redress (appeal) window for at least six months after
334
+ # the decision. Stamped once when the report is decided; idempotent so re-running
335
+ # a decision flow doesn't slide the deadline.
336
+ def close_redress_window!(at: resolved_at || Time.current)
337
+ update_column(:appeal_deadline_at, at + APPEAL_WINDOW) if appeal_deadline_at.blank?
338
+ end
339
+
340
+ # Resolve this report — model-level sugar over Moderate::Services::ResolveReport,
341
+ # so a host can write `report.resolve!(by: moderator, remove_content: true,
342
+ # ban_user: true, note: "…")` instead of constructing the service. The service is
343
+ # where the real work lives (row lock + re-check open, in-transaction enforcement,
344
+ # out-of-transaction notify, statement-of-reasons + appeal-window stamping); this
345
+ # only forwards. `by:` is the moderator; the rest (`note:`, `remove_content:`,
346
+ # `ban_user:`, `resolution_basis:`) pass straight through.
347
+ def resolve!(by:, **options)
348
+ Moderate::Services::ResolveReport.new(self, by: by).resolve!(**options)
349
+ end
350
+
351
+ # Dismiss this report (no action taken) — sugar over
352
+ # Moderate::Services::ResolveReport#dismiss!.
353
+ def dismiss!(by:, note:)
354
+ Moderate::Services::ResolveReport.new(self, by: by).dismiss!(note: note)
355
+ end
356
+
357
+ # DSA Art. 17(3)(c): did automated means (a wordlist/image/remote classifier)
358
+ # participate in surfacing or deciding this report? The decision email reads this
359
+ # to truthfully state whether automation was used. We treat the disclosure as
360
+ # "used" if the explicit `used` flag is true OR any captured evidence value is
361
+ # true — defensive against partially-populated evidence hashes.
362
+ def automated_processing_used?
363
+ automation = automated_processing.to_h
364
+ boolean = ActiveModel::Type::Boolean.new
365
+
366
+ return true if boolean.cast(automation["used"])
367
+
368
+ automation.except("used").values.any? { |value| value == true || value == "true" }
369
+ end
370
+
371
+ private
372
+
373
+ def self.locate_signed_reportable_from_registry(token)
374
+ classes = Moderate.reportable_classes
375
+ return if classes.empty?
376
+
377
+ record = GlobalID::Locator.locate_signed(token, for: SIGNED_GLOBAL_ID_PURPOSE, only: classes)
378
+ reportable_contract?(record) ? record : nil
379
+ end
380
+
381
+ def self.locate_signed_reportable_by_contract(token)
382
+ record = GlobalID::Locator.locate_signed(token, for: SIGNED_GLOBAL_ID_PURPOSE)
383
+ reportable_contract?(record) && reportable_allowed?(record) ? record : nil
384
+ end
385
+
386
+ def self.reportable_contract?(record)
387
+ record.respond_to?(:reportable_field_allowed?) &&
388
+ record.respond_to?(:reported_owner)
389
+ end
390
+
391
+ def self.reportable_allowed?(record)
392
+ return false if record.blank?
393
+
394
+ Moderate.reportable_classes.include?(record.class) ||
395
+ record.is_a?(Moderate::Reportable)
396
+ end
397
+ private_class_method :locate_signed_reportable_from_registry,
398
+ :locate_signed_reportable_by_contract,
399
+ :reportable_contract?,
400
+ :reportable_allowed?
401
+
402
+ # Coalesce the JSON columns to their empty shape so a NULL never reaches a
403
+ # NOT-NULL JSON column (the MySQL-no-JSON-default case — see the before_save
404
+ # comment). Hash-shaped columns default to {}, the list-shaped one to [].
405
+ def default_json_columns
406
+ self.snapshot ||= {}
407
+ self.automated_processing ||= {}
408
+ self.resolution_actions ||= {}
409
+ self.subject_urls ||= []
410
+ end
411
+
412
+ # Normalize every free-text field once, up front: collapse internal whitespace,
413
+ # turn blanks into nil (so `presence`/`allow_blank` validations behave), downcase
414
+ # emails, upcase country codes, and canonicalize newlines in long text. Doing it
415
+ # in one before_validation keeps the snapshot and the validations consistent.
416
+ def normalize_strings
417
+ self.reported_field = reported_field.to_s.squish.presence
418
+ self.category = category.to_s.squish.presence
419
+ self.status = status.to_s.squish.presence || "open"
420
+ self.intake_kind = intake_kind.to_s.squish.presence || "community"
421
+ self.notifier_name = notifier_name.to_s.squish.presence
422
+ self.notifier_email = notifier_email.to_s.squish.presence&.downcase
423
+ self.subject_url = subject_url.to_s.squish.presence
424
+ self.subject_urls = normalize_subject_urls
425
+ self.legal_reason = legal_reason.to_s.squish.presence
426
+ self.legal_country_code = legal_country_code.to_s.squish.upcase.presence
427
+ self.content_type = content_type.to_s.squish.presence
428
+ self.reported_account_identifier = reported_account_identifier.to_s.squish.presence
429
+ self.resolution_basis = resolution_basis.to_s.squish.presence
430
+ self.decision_visibility = decision_visibility.to_s.squish.presence
431
+ self.message = message.to_s.gsub(/\r\n?/, "\n").strip.presence
432
+ self.resolution_note = resolution_note.to_s.strip.presence
433
+ end
434
+
435
+ # When an authenticated user files the report, copy their contact onto the
436
+ # notifier fields so DSA notices and community reports carry the same identity
437
+ # shape downstream. We read `email`/`display_name` via `try` so the host's user
438
+ # class only needs whichever it actually has.
439
+ def hydrate_reporter_contact
440
+ self.notifier_email ||= reporter.try(:email)
441
+ self.notifier_name ||= reporter.try(:display_name)
442
+ end
443
+
444
+ # Infer who's responsible for the reported content from the reportable itself
445
+ # (its `reported_owner`, per the Moderate::Reportable interface). This is how a
446
+ # report against a piece of content becomes a report "about" the user who posted
447
+ # it — without this model knowing anything about the host's ownership model.
448
+ def infer_reported_user
449
+ return if reported_user.present?
450
+ return unless reportable.respond_to?(:reported_owner)
451
+
452
+ self.reported_user = reportable.reported_owner
453
+ end
454
+
455
+ # Capture an IMMUTABLE evidence snapshot at create time. This is the heart of a
456
+ # defensible decision: the content as it was WHEN reported, frozen into JSON, so
457
+ # the evidence survives the author editing or deleting the original afterward
458
+ # (DSA Art. 17 "facts and circumstances relied on"; Apple/Play "timely response"
459
+ # all assume the evidence is still there when you act). `.compact` drops nils so
460
+ # the snapshot stores only what was actually present.
461
+ def capture_snapshot
462
+ self.snapshot = {
463
+ intake_kind: intake_kind,
464
+ reportable_type: reportable_type,
465
+ reportable_id: reportable_id,
466
+ reported_field: reported_field,
467
+ reported_user_id: reported_user_id,
468
+ reporter_id: reporter_id,
469
+ subject_url: subject_url,
470
+ subject_urls: subject_url_list,
471
+ legal_reason: legal_reason,
472
+ legal_country_code: legal_country_code,
473
+ content_type: content_type,
474
+ reported_account_identifier: reported_account_identifier,
475
+ content_text: reported_content_text,
476
+ captured_at: Time.current.iso8601
477
+ }.compact
478
+ end
479
+
480
+ def capture_automated_processing
481
+ evidence = automated_processing_evidence
482
+ return if evidence.blank?
483
+
484
+ # DSA Art. 16(6)/17(3)(c) require disclosing whether automated means were used
485
+ # in detection or decision. We store the factual source/category metadata at
486
+ # INTAKE so a later decision notice can never accidentally say "No automated
487
+ # means" after a wordlist or image adapter already flagged this exact content.
488
+ # Merge (don't overwrite) so any pre-set evidence is preserved.
489
+ # Source: https://eur-lex.europa.eu/eli/reg/2022/2065/oj
490
+ self.automated_processing = automated_processing.to_h.deep_stringify_keys.merge(evidence)
491
+ end
492
+
493
+ # --- Custom validations ---------------------------------------------------
494
+
495
+ def reporter_cannot_report_self
496
+ return if reporter_id.blank? || reported_user_id.blank? || reporter_id != reported_user_id
497
+
498
+ errors.add(:reported_user, I18n.t("moderate.errors.reporter_cannot_report_self", default: "You can't report yourself"))
499
+ end
500
+
501
+ # The reported field must be one the reportable actually allows to be reported
502
+ # (declared via `has_reportable_content :title, :description`). We ask the reportable via its
503
+ # `reportable_field_allowed?` predicate; if it doesn't expose one (a record that
504
+ # isn't a managed reportable), we don't constrain the field.
505
+ def reportable_field_must_be_allowed
506
+ return if reportable.blank?
507
+ return unless reportable.respond_to?(:reportable_field_allowed?)
508
+ return if reportable.reportable_field_allowed?(reported_field)
509
+
510
+ errors.add(:reported_field, I18n.t("moderate.errors.invalid_reported_field", default: "is not a reportable field"))
511
+ end
512
+
513
+ def subject_url_must_be_http_url
514
+ invalid_urls = subject_url_list.reject { |url| parsed_subject_uri(url).present? }
515
+ return if invalid_urls.empty?
516
+
517
+ errors.add(:subject_url, I18n.t("moderate.errors.invalid_subject_url", default: "must be a valid http(s) URL"))
518
+ end
519
+
520
+ # A notice with NO in-app reportable record (a pure external-URL DSA notice)
521
+ # must carry at least one subject URL — DSA Art. 16(2)(b) requires the "exact
522
+ # electronic location" of the content.
523
+ def public_notice_requires_subject_url
524
+ return if reportable.present?
525
+ return if subject_url_list.any?
526
+
527
+ errors.add(:subject_url, I18n.t("moderate.errors.missing_subject_url", default: "is required"))
528
+ end
529
+
530
+ # DSA Art. 16(2): a legal notice must be "sufficiently substantiated" — we
531
+ # require the legal ground, the member state, and the content type so the
532
+ # statement of reasons (Art. 17) can actually be written from it.
533
+ def dsa_notice_must_be_substantiated
534
+ return unless dsa?
535
+
536
+ errors.add(:legal_reason, :blank) if legal_reason.blank?
537
+ errors.add(:legal_country_code, :blank) if legal_country_code.blank?
538
+ errors.add(:content_type, :blank) if content_type.blank?
539
+ end
540
+
541
+ # The anonymity carve-out is NARROW: anonymous notices are permitted only for
542
+ # offences against minors. Any other anonymous notice is rejected, so the
543
+ # identity requirement of Art. 16(2)(c) isn't bypassed for ordinary notices.
544
+ def anonymous_notice_only_for_child_safety
545
+ return unless anonymous?
546
+ return if legal_reason == "protection_of_minors"
547
+
548
+ errors.add(:anonymous, I18n.t("moderate.errors.anonymous_notice_child_safety_only", default: "anonymous notices are only allowed for offences against minors"))
549
+ end
550
+
551
+ # Accept either a multi-URL list or newline-separated text, fold in the single
552
+ # `subject_url`, squish/dedupe, and keep `subject_url` pointing at the first —
553
+ # so the single-URL and multi-URL representations never drift apart.
554
+ def normalize_subject_urls
555
+ urls = Array(subject_urls).flat_map { |value| value.to_s.split(/\R/) }
556
+ urls << subject_url
557
+ urls = urls.map { |value| value.to_s.squish.presence }.compact.uniq
558
+ self.subject_url = urls.first
559
+ urls
560
+ end
561
+
562
+ # Parse a string into a URI only if it's a real http(s) URL with a host —
563
+ # rejecting `javascript:`, `file:`, scheme-relative, and garbage. Returns nil
564
+ # (never raises) on malformed input, so it's safe to use in both validations and
565
+ # the `safe_*` readers that feed links into emails/views.
566
+ def parsed_subject_uri(url)
567
+ return if url.blank?
568
+
569
+ uri = URI.parse(url)
570
+ uri if uri.is_a?(URI::HTTP) && uri.host.present?
571
+ rescue URI::InvalidURIError
572
+ nil
573
+ end
574
+
575
+ # Gather the automated-means evidence for this report's (record, field): any
576
+ # existing auto-Flags on the same content, plus a fresh synchronous classify if
577
+ # the field is under a :block policy. Returns nil when nothing automated touched
578
+ # it (so the caller can skip storing an empty disclosure).
579
+ def automated_processing_evidence
580
+ evidence = {}
581
+
582
+ matching_flags = flags.to_a
583
+ if matching_flags.any?
584
+ evidence["used"] = true
585
+ evidence["flag_ids"] = matching_flags.map(&:id)
586
+ evidence["sources"] = matching_flags.map(&:source).compact.uniq
587
+ evidence["categories"] = matching_flags.flat_map { |flag| Array(flag.categories) }.compact.uniq
588
+ end
589
+
590
+ result = automated_classifier_result
591
+ if result&.flagged?
592
+ evidence["used"] = true
593
+ evidence["field"] = reported_field
594
+ evidence["sources"] = Array(evidence["sources"]).concat([result.source]).compact.uniq
595
+ evidence["categories"] = Array(evidence["categories"]).concat(result.categories).compact.uniq
596
+ evidence["scores"] = result.scores
597
+ # `raw` is the untouched provider payload (Moderate::Result#raw) — kept for
598
+ # audit so a regulator query can be answered with the exact classifier output.
599
+ evidence["metadata"] = result.raw
600
+ end
601
+
602
+ evidence.presence
603
+ end
604
+
605
+ # Run the configured filter for this (record, field) ONLY when it's a synchronous
606
+ # :block policy — an async adapter can't have contributed to a synchronous intake
607
+ # decision, so re-running it here would be both wrong (it didn't fire) and slow.
608
+ def automated_classifier_result
609
+ return if reportable.blank? || reported_field.blank?
610
+
611
+ policy = Moderate.filter_policy_for(reportable, reported_field)
612
+ return unless policy.block?
613
+
614
+ value = reported_content_text
615
+ return if value.blank?
616
+
617
+ Moderate.classify(value, policy: policy)
618
+ end
619
+ end
620
+ end