civics-cdf-validator 1.60.dev6__tar.gz → 1.61.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {civics_cdf_validator-1.60.dev6/civics_cdf_validator.egg-info → civics_cdf_validator-1.61.dev1}/PKG-INFO +1 -1
  2. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/base.py +1 -1
  3. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1/civics_cdf_validator.egg-info}/PKG-INFO +1 -1
  4. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/rules.py +620 -13
  5. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/version.py +1 -1
  6. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/CONTRIBUTING.md +0 -0
  7. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/LICENSE-2.0.txt +0 -0
  8. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/MANIFEST.in +0 -0
  9. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/README.md +0 -0
  10. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/__init__.py +0 -0
  11. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/SOURCES.txt +0 -0
  12. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/dependency_links.txt +0 -0
  13. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/entry_points.txt +0 -0
  14. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/requires.txt +0 -0
  15. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/top_level.txt +0 -0
  16. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/gpunit_rules.py +0 -0
  17. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/loggers.py +0 -0
  18. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/office_utils.py +0 -0
  19. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/setup.cfg +0 -0
  20. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/setup.py +0 -0
  21. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/stats.py +0 -0
  22. {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: civics_cdf_validator
3
- Version: 1.60.dev6
3
+ Version: 1.61.dev1
4
4
  Summary: Checks if an election feed follows best practices
5
5
  Home-page: https://github.com/google/civics_cdf_validator
6
6
  Author: Google Civics
@@ -225,7 +225,7 @@ class DateRule(BaseRule):
225
225
 
226
226
  def is_date_in_past(self, date):
227
227
  """Check if a date is in the past."""
228
- today = datetime.datetime.utcnow()
228
+ today = datetime.datetime.now(datetime.timezone.utc)
229
229
  today_partial_date = PartialDate(today.year, today.month, today.day)
230
230
  delta = date.is_older_than(today_partial_date)
231
231
  return delta > 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: civics_cdf_validator
3
- Version: 1.60.dev6
3
+ Version: 1.61.dev1
4
4
  Summary: Checks if an election feed follows best practices
5
5
  Home-page: https://github.com/google/civics_cdf_validator
6
6
  Author: Google Civics
@@ -32,6 +32,8 @@ import networkx
32
32
  import pycountry
33
33
  from six.moves.urllib.parse import urlparse
34
34
 
35
+
36
+ _XML_TRUE_VALUES = frozenset(["true", "1"])
35
37
  _PARTY_LEADERSHIP_TYPES = ["party-leader-id", "party-chair-id"]
36
38
  _INDEPENDENT_PARTY_NAMES = frozenset(["independent", "nonpartisan"])
37
39
  _IDREF_TYPES = frozenset(["xs:IDREF", "xs:IDREFS"])
@@ -58,6 +60,7 @@ _INTERNATIONALIZED_TEXT_ELEMENTS_WITH_ONLY_ONE_TEXT_PER_LANGUAGE = [
58
60
  "BallotTitle",
59
61
  "ConStatement",
60
62
  "Directions",
63
+ "DisplayName",
61
64
  "EffectOfAbstain",
62
65
  "FullName",
63
66
  "FullText",
@@ -285,6 +288,20 @@ def country_code_is_valid(country_code):
285
288
  )
286
289
 
287
290
 
291
+ def _get_type_or_other_type(element):
292
+ type_element = element.find("Type")
293
+ other_type_element = element.find("OtherType")
294
+ type_text = (
295
+ type_element.text.strip() if element_has_text(type_element) else ""
296
+ )
297
+ other_type_text = (
298
+ other_type_element.text.strip()
299
+ if element_has_text(other_type_element)
300
+ else ""
301
+ )
302
+ return other_type_text if type_text == "other" else type_text
303
+
304
+
288
305
  class Schema(base.TreeRule):
289
306
  """Checks if election file validates against the provided schema."""
290
307
 
@@ -2139,7 +2156,7 @@ class VoteCountTypesCoherency(base.BaseRule):
2139
2156
  "seats-total",
2140
2157
  "seats-delta",
2141
2158
  "seats-delta-mandate",
2142
- "seats-delta-institutional"
2159
+ "seats-delta-institutional",
2143
2160
  }
2144
2161
  # Ibid.
2145
2162
  CAND_VC_TYPES = {"candidate-votes"}
@@ -2526,7 +2543,8 @@ class OfficeHasjurisdictionSameAsElectoralDistrict(base.BaseRule):
2526
2543
 
2527
2544
  def check(self, element):
2528
2545
  jurisdiction_values = get_entity_info_for_value_type(
2529
- element, "jurisdiction-id")
2546
+ element, "jurisdiction-id"
2547
+ )
2530
2548
  jurisdiction_values = [
2531
2549
  j_id.strip() for j_id in jurisdiction_values if j_id.strip()
2532
2550
  ]
@@ -3657,8 +3675,7 @@ class MissingFieldsInfo(base.MissingFieldRule):
3657
3675
  return 0
3658
3676
 
3659
3677
  def element_field_mapping(self):
3660
- return {
3661
- }
3678
+ return {}
3662
3679
 
3663
3680
 
3664
3681
  class PartySpanMultipleCountries(base.BaseRule):
@@ -3719,9 +3736,7 @@ class NonExecutiveOfficeShouldHaveGovernmentBody(base.BaseRule):
3719
3736
  officeholder_tenure_collection_element = self.get_elements_by_class(
3720
3737
  election_tree, "OfficeHolderTenureCollection"
3721
3738
  )
3722
- role_element = self.get_elements_by_class(
3723
- election_tree, "Role"
3724
- )
3739
+ role_element = self.get_elements_by_class(election_tree, "Role")
3725
3740
  if officeholder_tenure_collection_element or role_element:
3726
3741
  self.is_post_office_split_feed = True
3727
3742
 
@@ -3747,9 +3762,7 @@ class ExecutiveOfficeShouldNotHaveGovernmentBody(base.BaseRule):
3747
3762
  officeholder_tenure_collection_element = self.get_elements_by_class(
3748
3763
  election_tree, "OfficeHolderTenureCollection"
3749
3764
  )
3750
- role_element = self.get_elements_by_class(
3751
- election_tree, "Role"
3752
- )
3765
+ role_element = self.get_elements_by_class(election_tree, "Role")
3753
3766
  if officeholder_tenure_collection_element or role_element:
3754
3767
  self.is_post_office_split_feed = True
3755
3768
 
@@ -4197,6 +4210,204 @@ class ContestStartDateContainsCorrespondingEndDate(base.DateRule):
4197
4210
  )
4198
4211
 
4199
4212
 
4213
+ class ValidatePollsCloseDatetimes(base.BaseRule):
4214
+ """Checks that LatestPollsClose is not before EarliestPollsClose."""
4215
+
4216
+ def elements(self):
4217
+ return ["Contest"]
4218
+
4219
+ def check(self, element):
4220
+ earliest_polls_close_element = element.find("EarliestPollsClose")
4221
+ latest_polls_close_element = element.find("LatestPollsClose")
4222
+
4223
+ if not element_has_text(
4224
+ earliest_polls_close_element
4225
+ ) or not element_has_text(latest_polls_close_element):
4226
+ return
4227
+
4228
+ earliest_polls_close_text = earliest_polls_close_element.text.strip()
4229
+ latest_polls_close_text = latest_polls_close_element.text.strip()
4230
+
4231
+ try:
4232
+ earliest_polls_close = datetime.datetime.fromisoformat(
4233
+ earliest_polls_close_text
4234
+ )
4235
+ latest_polls_close = datetime.datetime.fromisoformat(
4236
+ latest_polls_close_text
4237
+ )
4238
+
4239
+ if latest_polls_close < earliest_polls_close:
4240
+ raise loggers.ElectionError.from_message(
4241
+ f"LatestPollsClose ({latest_polls_close_text}) must not be before"
4242
+ f" EarliestPollsClose ({earliest_polls_close_text}) for Contest"
4243
+ f" {element.get('objectId')}.",
4244
+ [element],
4245
+ )
4246
+ except ValueError as e:
4247
+ raise loggers.ElectionError.from_message(
4248
+ "Invalid PollsClose datetime format in Contest"
4249
+ f" {element.get('objectId')}: {e}",
4250
+ [element],
4251
+ )
4252
+
4253
+
4254
+ class ValidateResultsExpected(base.BaseRule):
4255
+ """Checks that ResultsExpected is not before the first ResultsReportingStage.
4256
+
4257
+ The ResultsExpected datetime must not be before the ExpectedStartDateTime
4258
+ of the earliest ResultsReportingStage excluding the no-results stage.
4259
+ """
4260
+
4261
+ def elements(self):
4262
+ return ["Contest"]
4263
+
4264
+ def check(self, element):
4265
+ results_expected_element = element.find("ResultsExpected")
4266
+ if not element_has_text(results_expected_element):
4267
+ return
4268
+
4269
+ results_expected_text = results_expected_element.text.strip()
4270
+
4271
+ try:
4272
+ results_expected = datetime.datetime.fromisoformat(results_expected_text)
4273
+ except ValueError as e:
4274
+ raise loggers.ElectionError.from_message(
4275
+ "Invalid ResultsExpected datetime format in Contest"
4276
+ f" {element.get('objectId')}: {e}",
4277
+ [element],
4278
+ )
4279
+
4280
+ stage_collection = element.find("ResultsReportingStageCollection")
4281
+ if stage_collection is None:
4282
+ return
4283
+
4284
+ earliest_start = None
4285
+ earliest_start_text = None
4286
+ for stage in stage_collection.findall("ResultsReportingStage"):
4287
+ stage_type_element = stage.find("StageType")
4288
+ if (
4289
+ element_has_text(stage_type_element)
4290
+ and stage_type_element.text.strip() == "no-results"
4291
+ ):
4292
+ continue
4293
+
4294
+ start_element = stage.find("ExpectedStartDateTime")
4295
+ if not element_has_text(start_element):
4296
+ continue
4297
+
4298
+ start_text = start_element.text.strip()
4299
+ try:
4300
+ start = datetime.datetime.fromisoformat(start_text)
4301
+ if earliest_start is None or start < earliest_start:
4302
+ earliest_start = start
4303
+ earliest_start_text = start_text
4304
+ except ValueError as e:
4305
+ raise loggers.ElectionError.from_message(
4306
+ "Invalid ExpectedStartDateTime datetime format for the"
4307
+ f" '{stage_type_element.text.strip()}' ResultsReportingStage in"
4308
+ f" Contest {element.get('objectId')}: {e}",
4309
+ [element],
4310
+ )
4311
+
4312
+ if earliest_start and results_expected < earliest_start:
4313
+ raise loggers.ElectionError.from_message(
4314
+ f"ResultsExpected ({results_expected_text}) must not be before the"
4315
+ f" ExpectedStartDateTime ({earliest_start_text}) of the earliest"
4316
+ f" ResultsReportingStage for Contest {element.get('objectId')}.",
4317
+ [element],
4318
+ )
4319
+
4320
+
4321
+ class ValidateResultsEmbargoEnd(base.BaseRule):
4322
+ """Checks that ResultsEmbargoEnd is not after the official ResultsReportingStage start."""
4323
+
4324
+ def elements(self):
4325
+ return ["Contest"]
4326
+
4327
+ def check(self, element):
4328
+ results_embargo_end_element = element.find("ResultsEmbargoEnd")
4329
+ if not element_has_text(results_embargo_end_element):
4330
+ return
4331
+
4332
+ results_embargo_end_text = results_embargo_end_element.text.strip()
4333
+
4334
+ try:
4335
+ results_embargo_end = datetime.datetime.fromisoformat(
4336
+ results_embargo_end_text
4337
+ )
4338
+ except ValueError as e:
4339
+ raise loggers.ElectionError.from_message(
4340
+ "Invalid ResultsEmbargoEnd datetime format in Contest"
4341
+ f" {element.get('objectId')}: {e}",
4342
+ [element],
4343
+ )
4344
+
4345
+ stage_collection = element.find("ResultsReportingStageCollection")
4346
+ if stage_collection is None:
4347
+ return
4348
+
4349
+ official_start = None
4350
+ official_start_text = None
4351
+ for stage in stage_collection.findall("ResultsReportingStage"):
4352
+ stage_type_element = stage.find("StageType")
4353
+ if (
4354
+ element_has_text(stage_type_element)
4355
+ and stage_type_element.text.strip() == "official"
4356
+ ):
4357
+ start_element = stage.find("ExpectedStartDateTime")
4358
+ if not element_has_text(start_element):
4359
+ continue
4360
+
4361
+ start_text = start_element.text.strip()
4362
+ try:
4363
+ official_start = datetime.datetime.fromisoformat(start_text)
4364
+ official_start_text = start_text
4365
+ break
4366
+ except ValueError as e:
4367
+ raise loggers.ElectionError.from_message(
4368
+ "Invalid ExpectedStartDateTime datetime format for the"
4369
+ " 'official' ResultsReportingStage in Contest"
4370
+ f" {element.get('objectId')}: {e}",
4371
+ [element],
4372
+ )
4373
+
4374
+ if official_start and official_start < results_embargo_end:
4375
+ raise loggers.ElectionError.from_message(
4376
+ f"ResultsEmbargoEnd ({results_embargo_end_text}) must not be after"
4377
+ f" the ExpectedStartDateTime ({official_start_text}) of the official"
4378
+ f" ResultsReportingStage for Contest {element.get('objectId')}.",
4379
+ [element],
4380
+ )
4381
+
4382
+
4383
+ class ResultsReportingStagesMustHaveUniqueType(base.BaseRule):
4384
+ """Checks that each ResultsReportingStage has a unique StageType per Contest."""
4385
+
4386
+ def elements(self):
4387
+ return ["ResultsReportingStageCollection"]
4388
+
4389
+ def check(self, element):
4390
+ stages_by_type = collections.defaultdict(list)
4391
+ for stage in element.findall("ResultsReportingStage"):
4392
+ stage_type_element = stage.find("StageType")
4393
+ if element_has_text(stage_type_element):
4394
+ stage_type = stage_type_element.text.strip()
4395
+ stages_by_type[stage_type].append(stage)
4396
+
4397
+ errors = []
4398
+ for stage_type, stages in stages_by_type.items():
4399
+ if len(stages) > 1:
4400
+ errors.append(
4401
+ loggers.LogEntry(
4402
+ f"Duplicate ResultsReportingStage StageType '{stage_type}'"
4403
+ " found in the same ResultsReportingStageCollection.",
4404
+ stages,
4405
+ )
4406
+ )
4407
+ if errors:
4408
+ raise loggers.ElectionError(errors)
4409
+
4410
+
4200
4411
  class CandidateContestTypesAreCompatible(base.BaseRule):
4201
4412
  """CandidateContest Type values cannot have both a general and primary type."""
4202
4413
 
@@ -4389,6 +4600,27 @@ class SourceDirPathsAreUnique(base.BaseRule):
4389
4600
  raise loggers.ElectionError(error_log)
4390
4601
 
4391
4602
 
4603
+ class SqsQueueNameRequiresS3SourceDirPath(base.BaseRule):
4604
+ """If SqsQueueName is set, SourceDirPath must also be set and must be an s3 path."""
4605
+
4606
+ def elements(self):
4607
+ return ["Feed"]
4608
+
4609
+ def check(self, element):
4610
+ sqs_queue_name = element.find("SqsQueueName")
4611
+ if not element_has_text(sqs_queue_name):
4612
+ return
4613
+ source_dir_path = element.find("SourceDirPath")
4614
+ if not element_has_text(
4615
+ source_dir_path
4616
+ ) or not source_dir_path.text.strip().lower().startswith("s3://"):
4617
+ raise loggers.ElectionError.from_message(
4618
+ "If SqsQueueName is set, SourceDirPath must also be set and must be"
4619
+ " an s3 path for feed {}.".format(element.find("FeedId").text),
4620
+ [element],
4621
+ )
4622
+
4623
+
4392
4624
  class ElectionEventDatesAreSequential(base.DateRule):
4393
4625
  """Dates in an ElectionEvent element should be sequential."""
4394
4626
 
@@ -4867,15 +5099,240 @@ class FeedElementsShouldHaveSubElementsBasedOnType(base.BaseRule):
4867
5099
  "ElectionEventCollection should exist for %s feed %s."
4868
5100
  % (feed_type, feed_id)
4869
5101
  )
4870
- if not element.find("ElectionEventCollection").findall(
4871
- "ElectionEvent"
4872
- ):
5102
+ if not element.find("ElectionEventCollection").findall("ElectionEvent"):
4873
5103
  raise loggers.ElectionError.from_message(
4874
5104
  "ElectionEventCollection should have at least one ElectionEvent"
4875
5105
  " for %s feed %s." % (feed_type, feed_id)
4876
5106
  )
4877
5107
 
4878
5108
 
5109
+ class NotEmptyUniqueDataSourceUris(base.BaseRule):
5110
+ """Checks that DataSource entities have globally unique URIs and they are not empty."""
5111
+
5112
+ def elements(self):
5113
+ return ["DataSourceCollection"]
5114
+
5115
+ def check(self, element):
5116
+ data_source_ids_by_uri = collections.defaultdict(set)
5117
+ error_log = []
5118
+
5119
+ for data_source in element.findall("DataSource"):
5120
+ datasource_id = data_source.get("objectId")
5121
+ for uri_element in data_source.findall("Uri"):
5122
+ if not element_has_text(uri_element):
5123
+ error_log.append(
5124
+ loggers.LogEntry(
5125
+ "DataSource {} has an empty Uri.".format(datasource_id),
5126
+ [data_source],
5127
+ )
5128
+ )
5129
+ continue
5130
+ uri = uri_element.text.strip()
5131
+ data_source_ids_by_uri[uri].add(data_source)
5132
+
5133
+ for uri, data_sources in data_source_ids_by_uri.items():
5134
+ if len(data_sources) <= 1:
5135
+ continue
5136
+ sorted_data_sources = sorted(
5137
+ data_sources, key=lambda ds: ds.get("objectId")
5138
+ )
5139
+ data_source_ids = [
5140
+ data_source.get("objectId") for data_source in sorted_data_sources
5141
+ ]
5142
+ error_log.append(
5143
+ loggers.LogEntry(
5144
+ "DataSource entities {} have duplicate Uri '{}'.".format(
5145
+ ", ".join(data_source_ids), uri
5146
+ ),
5147
+ sorted_data_sources,
5148
+ )
5149
+ )
5150
+
5151
+ if error_log:
5152
+ raise loggers.ElectionError(error_log)
5153
+
5154
+
5155
+ class UniqueDataSourceLanguages(base.BaseRule):
5156
+ """Checks that Uri elements have unique languages within a DataSource."""
5157
+
5158
+ def elements(self):
5159
+ return ["DataSourceCollection"]
5160
+
5161
+ def check(self, element):
5162
+ error_log = []
5163
+
5164
+ for data_source in element.findall("DataSource"):
5165
+ data_source_id = data_source.get("objectId")
5166
+ seen_uri_languages = set()
5167
+ for uri_element in data_source.findall("Uri"):
5168
+ language = uri_element.get("language")
5169
+ if not language:
5170
+ error_log.append(
5171
+ loggers.LogEntry(
5172
+ "DataSource {} has a Uri element without a language.".format(
5173
+ data_source_id
5174
+ ),
5175
+ [uri_element],
5176
+ )
5177
+ )
5178
+ continue
5179
+ language = language.strip()
5180
+ if language in seen_uri_languages:
5181
+ error_log.append(
5182
+ loggers.LogEntry(
5183
+ "DataSource {} has multiple Uri elements with the same"
5184
+ " language '{}'.".format(data_source_id, language),
5185
+ [element],
5186
+ )
5187
+ )
5188
+ else:
5189
+ seen_uri_languages.add(language)
5190
+
5191
+ if error_log:
5192
+ raise loggers.ElectionError(error_log)
5193
+
5194
+
5195
+ class UniqueDataSourceDisplayNames(base.BaseRule):
5196
+ """Checks that DataSource entities have globally unique DisplayNames."""
5197
+
5198
+ def elements(self):
5199
+ return ["DataSourceCollection"]
5200
+
5201
+ def check(self, element):
5202
+ data_source_ids_by_name = collections.defaultdict(set)
5203
+ error_log = []
5204
+
5205
+ for data_source in element.findall("DataSource"):
5206
+ data_source_id = data_source.get("objectId")
5207
+ display_name_element = data_source.find("DisplayName")
5208
+ for text_element in display_name_element.findall("Text"):
5209
+ if not element_has_text(text_element):
5210
+ error_log.append(
5211
+ loggers.LogEntry(
5212
+ "DataSource {} has a DisplayName element without"
5213
+ " text.".format(data_source_id),
5214
+ [data_source],
5215
+ )
5216
+ )
5217
+ continue
5218
+ name_text = text_element.text.strip()
5219
+ data_source_ids_by_name[name_text].add(data_source)
5220
+
5221
+ for name_text, data_sources in data_source_ids_by_name.items():
5222
+ if len(data_sources) <= 1:
5223
+ continue
5224
+ sorted_data_sources = sorted(
5225
+ data_sources, key=lambda ds: ds.get("objectId")
5226
+ )
5227
+ datasource_ids = [ds.get("objectId") for ds in sorted_data_sources]
5228
+ error_log.append(
5229
+ loggers.LogEntry(
5230
+ "DataSource entities {} have duplicate DisplayName '{}'.".format(
5231
+ ", ".join(datasource_ids), name_text
5232
+ ),
5233
+ sorted_data_sources,
5234
+ )
5235
+ )
5236
+
5237
+ if error_log:
5238
+ raise loggers.ElectionError(error_log)
5239
+
5240
+
5241
+ def _get_attribution_depth(element):
5242
+ """Helper to recursively get the maximum depth of an Attribution tree."""
5243
+ children = element.findall("Attribution")
5244
+ if not children:
5245
+ return 1
5246
+ return 1 + max(_get_attribution_depth(child) for child in children)
5247
+
5248
+
5249
+ def _get_datasource_id(element):
5250
+ """Helper to safely extract and strip the text of the DataSourceId child."""
5251
+ ds_element = element.find("DataSourceId")
5252
+ return ds_element.text.strip() if element_has_text(ds_element) else ""
5253
+
5254
+
5255
+ class AttributionDepthLimit(base.BaseRule):
5256
+ """Checks that each top-level Attribution in a ResultsReportingStage has at most three levels of depth."""
5257
+
5258
+ def elements(self):
5259
+ return ["ResultsReportingStage"]
5260
+
5261
+ def check(self, element):
5262
+ error_log = []
5263
+ # This findall query is non-recursive and only returns direct children.
5264
+ for attribution in element.findall("Attribution"):
5265
+ depth = _get_attribution_depth(attribution)
5266
+ if depth > 3:
5267
+ data_source_id = _get_datasource_id(attribution)
5268
+ error_log.append(
5269
+ loggers.LogEntry(
5270
+ f"Attribution starting with DataSourceId '{data_source_id}'"
5271
+ f" has a depth of {depth}, exceeding the limit of 3.",
5272
+ [attribution],
5273
+ )
5274
+ )
5275
+ if error_log:
5276
+ raise loggers.ElectionError(error_log)
5277
+
5278
+
5279
+ def _canonicalize_cycle(cycle):
5280
+ """Canonicalizes a cycle by rotating it to start with the min element."""
5281
+ if not cycle:
5282
+ return []
5283
+ min_node = min(cycle)
5284
+ min_idx = cycle.index(min_node)
5285
+ return cycle[min_idx:] + cycle[:min_idx]
5286
+
5287
+
5288
+ class AttributionContainsNoCycles(base.TreeRule):
5289
+ """Checks that there are no cycles between Attribution elements."""
5290
+
5291
+ def check(self):
5292
+ graph = networkx.DiGraph()
5293
+
5294
+ attributions = self.get_elements_by_class(self.election_tree, "Attribution")
5295
+ for attribution in attributions:
5296
+ data_source = _get_datasource_id(attribution)
5297
+ if not data_source:
5298
+ continue
5299
+ graph.add_node(data_source)
5300
+ for child in attribution.findall("Attribution"):
5301
+ child_data_source = _get_datasource_id(child)
5302
+ if child_data_source:
5303
+ graph.add_edge(data_source, child_data_source)
5304
+
5305
+ # Find all nodes in the graph that are part of a cycle based on directed
5306
+ # paths.
5307
+ nodes_in_a_cycle = set()
5308
+ for component in networkx.strongly_connected_components(graph):
5309
+ # If the component has multiple nodes, it must be a cycle.
5310
+ if len(component) > 1:
5311
+ nodes_in_a_cycle.update(component)
5312
+ continue
5313
+ # Only include single-node components if they have a self-loop.
5314
+ (node,) = component
5315
+ if graph.has_edge(node, node):
5316
+ nodes_in_a_cycle.add(node)
5317
+
5318
+ if not nodes_in_a_cycle:
5319
+ return
5320
+
5321
+ error_log = []
5322
+ # Optimize performance for large graphs by excluding non-cyclic nodes.
5323
+ all_cycles_subgraph = graph.subgraph(sorted(nodes_in_a_cycle))
5324
+ for cycle in networkx.simple_cycles(all_cycles_subgraph):
5325
+ canonical = _canonicalize_cycle(cycle)
5326
+ # Append the starting node to the end to represent a closed loop.
5327
+ error_log.append(
5328
+ loggers.LogEntry(
5329
+ f"Cycle detected in Attribution: {' -> '.join(canonical)} ->"
5330
+ f" {canonical[0]}"
5331
+ )
5332
+ )
5333
+ raise loggers.ElectionError(error_log)
5334
+
5335
+
4879
5336
  class RuleSet(enum.Enum):
4880
5337
  """Names for sets of rules used to validate a particular feed type."""
4881
5338
 
@@ -4888,6 +5345,144 @@ class RuleSet(enum.Enum):
4888
5345
  VOTER_INFORMATION = 7
4889
5346
 
4890
5347
 
5348
+ class ValidateSpecialBallotSelectionCountedInTotal(base.BaseRule):
5349
+ """Enforces constraints on CountedInTotal for SpecialBallotSelections.
5350
+
5351
+ More specifically, BlankBallotSelection, NullBallotSelection, and
5352
+ NoneOfTheAboveSelection must have an explicit value for CountedInTotal, and
5353
+ AggregateBallotSelection must not have this set.
5354
+ """
5355
+
5356
+ def elements(self):
5357
+ return [
5358
+ "BlankBallotSelection",
5359
+ "NullBallotSelection",
5360
+ "NoneOfTheAboveBallotSelection",
5361
+ "AggregateBallotSelection",
5362
+ ]
5363
+
5364
+ def check(self, element):
5365
+ counted_in_total = element.find("CountedInTotal")
5366
+ tag = element.tag
5367
+
5368
+ if tag in (
5369
+ "BlankBallotSelection",
5370
+ "NullBallotSelection",
5371
+ "NoneOfTheAboveBallotSelection",
5372
+ ) and not element_has_text(counted_in_total):
5373
+ raise loggers.ElectionError.from_message(
5374
+ f"{tag} must have an explicit value for CountedInTotal.",
5375
+ [element],
5376
+ )
5377
+ elif tag == "AggregateBallotSelection" and counted_in_total is not None:
5378
+ raise loggers.ElectionError.from_message(
5379
+ "AggregateBallotSelection must not have CountedInTotal set.",
5380
+ [element],
5381
+ )
5382
+
5383
+
5384
+ class ValidateIncludeInAggregationBallotSelections(base.BaseRule):
5385
+ """Validates BallotSelections with IncludedInAggregation.
5386
+
5387
+ Checks that the sum of all vote counts for a BallotSelection with
5388
+ IncludedInAggregation must not be > the total vote counts for the
5389
+ AggregateBallotSelection on that same Contest for the same vote count type.
5390
+ Also requires that if IncludedInAggregation is set on any BallotSelection then
5391
+ the AggregateBallotSelection must also be present on that Contest.
5392
+ """
5393
+
5394
+ def elements(self):
5395
+ return ["CandidateContest", "PartyContest"]
5396
+
5397
+ def _gather_vote_counts(self, element):
5398
+ """Gathers vote counts from a selection element grouped by type."""
5399
+ count_by_type_and_gp_unit = collections.defaultdict(float)
5400
+ vote_counts_collection = element.find("VoteCountsCollection")
5401
+ if vote_counts_collection is None:
5402
+ return count_by_type_and_gp_unit
5403
+
5404
+ for vote_counts in vote_counts_collection.findall("VoteCounts"):
5405
+ count_element = vote_counts.find("Count")
5406
+ if not element_has_text(count_element):
5407
+ continue
5408
+ count = float(count_element.text)
5409
+
5410
+ vote_count_type = _get_type_or_other_type(vote_counts)
5411
+ gp_unit_id_element = vote_counts.find("GpUnitId")
5412
+ gp_unit_id = (
5413
+ gp_unit_id_element.text.strip()
5414
+ if element_has_text(gp_unit_id_element)
5415
+ else ""
5416
+ )
5417
+
5418
+ type_and_gp_unit = (vote_count_type, gp_unit_id)
5419
+ count_by_type_and_gp_unit[type_and_gp_unit] += count
5420
+
5421
+ return count_by_type_and_gp_unit
5422
+
5423
+ def check(self, element):
5424
+ contest_id = element.get("objectId")
5425
+
5426
+ candidate_selections = self.get_elements_by_class(
5427
+ element, "CandidateSelection"
5428
+ )
5429
+ party_selections = self.get_elements_by_class(element, "PartySelection")
5430
+ all_selections = candidate_selections + party_selections
5431
+
5432
+ included_selections = []
5433
+ for selection in all_selections:
5434
+ included_in_aggregation_element = selection.find("IncludedInAggregation")
5435
+ if (
5436
+ element_has_text(included_in_aggregation_element)
5437
+ and included_in_aggregation_element.text in _XML_TRUE_VALUES
5438
+ ):
5439
+ included_selections.append(selection)
5440
+ if not included_selections:
5441
+ return
5442
+
5443
+ aggregate_selection = element.find("AggregateBallotSelection")
5444
+ if aggregate_selection is None:
5445
+ raise loggers.ElectionError.from_message(
5446
+ f"Contest {contest_id} has selections marked as IncludedInAggregation"
5447
+ " but is missing AggregateBallotSelection.",
5448
+ [element],
5449
+ )
5450
+
5451
+ aggregate_count_by_type_and_gp_unit = self._gather_vote_counts(
5452
+ aggregate_selection
5453
+ )
5454
+
5455
+ selections_count_sum_by_type_and_gp_unit = collections.defaultdict(float)
5456
+ for selection in included_selections:
5457
+ for (
5458
+ type_and_gp_unit,
5459
+ count,
5460
+ ) in self._gather_vote_counts(selection).items():
5461
+ selections_count_sum_by_type_and_gp_unit[type_and_gp_unit] += count
5462
+
5463
+ error_log = []
5464
+ for (
5465
+ type_and_gp_unit,
5466
+ total_count,
5467
+ ) in selections_count_sum_by_type_and_gp_unit.items():
5468
+ aggregate_count = aggregate_count_by_type_and_gp_unit[type_and_gp_unit]
5469
+ if total_count > aggregate_count:
5470
+ resolved_type, gp_unit_id = type_and_gp_unit
5471
+ error_log.append(
5472
+ loggers.LogEntry(
5473
+ f"In Contest {contest_id}, the sum of vote counts"
5474
+ f" ({total_count}) for selections marked as"
5475
+ " IncludedInAggregation exceeds the vote count"
5476
+ f" ({aggregate_count}) for the AggregateBallotSelection for"
5477
+ f" vote count type='{resolved_type}' (GpUnit: '{gp_unit_id}').",
5478
+ [element],
5479
+ )
5480
+ )
5481
+
5482
+ if error_log:
5483
+ raise loggers.ElectionError(error_log)
5484
+
5485
+
4891
5486
  # To add new rules, create a new class, inherit the base rule,
4892
5487
  # and add it to the correct rule list.
4893
5488
  COMMON_RULES = (
@@ -4948,6 +5543,8 @@ COMMON_RULES = (
4948
5543
 
4949
5544
  ELECTION_RULES = COMMON_RULES + (
4950
5545
  # go/keep-sorted start
5546
+ AttributionContainsNoCycles,
5547
+ AttributionDepthLimit,
4951
5548
  BallotTitle,
4952
5549
  CandidateContestTypesAreCompatible,
4953
5550
  CandidatesReferencedInRelatedContests,
@@ -4980,16 +5577,25 @@ ELECTION_RULES = COMMON_RULES + (
4980
5577
  MissingPartyNameTranslation,
4981
5578
  MultipleCandidatesPointToTheSamePersonInTheSameContest,
4982
5579
  MultipleInternationalizedTextWithSameLanguageCode,
5580
+ NotEmptyUniqueDataSourceUris,
4983
5581
  OfficeHasjurisdictionSameAsElectoralDistrict,
4984
5582
  PartisanPrimary,
4985
5583
  PartisanPrimaryHeuristic,
4986
5584
  PercentSum,
4987
5585
  ProperBallotSelection,
5586
+ ResultsReportingStagesMustHaveUniqueType,
4988
5587
  SelfDeclaredCandidateMethod,
4989
5588
  SingularPartySelection,
4990
5589
  SubsequentContestIdIsValidRelatedContest,
5590
+ UniqueDataSourceDisplayNames,
5591
+ UniqueDataSourceLanguages,
4991
5592
  ValidateDuplicateColors,
5593
+ ValidateIncludeInAggregationBallotSelections,
4992
5594
  ValidateInfoUriAnnotation,
5595
+ ValidatePollsCloseDatetimes,
5596
+ ValidateResultsEmbargoEnd,
5597
+ ValidateResultsExpected,
5598
+ ValidateSpecialBallotSelectionCountedInTotal,
4993
5599
  VoteCountTypesCoherency,
4994
5600
  VoteCountValidSeatsDeltaTypes,
4995
5601
  WinnerCountLimit,
@@ -5040,6 +5646,7 @@ METADATA_RULES = (
5040
5646
  Schema,
5041
5647
  SourceDirPathMustBeSetAfterInitialDeliveryDate,
5042
5648
  SourceDirPathsAreUnique,
5649
+ SqsQueueNameRequiresS3SourceDirPath,
5043
5650
  UniqueLabel,
5044
5651
  # go/keep-sorted end
5045
5652
  )
@@ -5,4 +5,4 @@ No dependencies should be added to this module.
5
5
  See https://packaging.python.org/guides/single-sourcing-package-version/
6
6
  """
7
7
 
8
- __version__ = '1.60.dev6'
8
+ __version__ = '1.61.dev1'