civics-cdf-validator 1.60.dev5__tar.gz → 1.61.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {civics_cdf_validator-1.60.dev5/civics_cdf_validator.egg-info → civics_cdf_validator-1.61.dev1}/PKG-INFO +1 -1
  2. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/base.py +1 -1
  3. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1/civics_cdf_validator.egg-info}/PKG-INFO +1 -1
  4. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/rules.py +620 -50
  5. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/version.py +1 -1
  6. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/CONTRIBUTING.md +0 -0
  7. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/LICENSE-2.0.txt +0 -0
  8. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/MANIFEST.in +0 -0
  9. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/README.md +0 -0
  10. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/__init__.py +0 -0
  11. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/SOURCES.txt +0 -0
  12. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/dependency_links.txt +0 -0
  13. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/entry_points.txt +0 -0
  14. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/requires.txt +0 -0
  15. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/top_level.txt +0 -0
  16. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/gpunit_rules.py +0 -0
  17. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/loggers.py +0 -0
  18. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/office_utils.py +0 -0
  19. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/setup.cfg +0 -0
  20. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/setup.py +0 -0
  21. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/stats.py +0 -0
  22. {civics_cdf_validator-1.60.dev5 → civics_cdf_validator-1.61.dev1}/validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: civics_cdf_validator
3
- Version: 1.60.dev5
3
+ Version: 1.61.dev1
4
4
  Summary: Checks if an election feed follows best practices
5
5
  Home-page: https://github.com/google/civics_cdf_validator
6
6
  Author: Google Civics
@@ -225,7 +225,7 @@ class DateRule(BaseRule):
225
225
 
226
226
  def is_date_in_past(self, date):
227
227
  """Check if a date is in the past."""
228
- today = datetime.datetime.utcnow()
228
+ today = datetime.datetime.now(datetime.timezone.utc)
229
229
  today_partial_date = PartialDate(today.year, today.month, today.day)
230
230
  delta = date.is_older_than(today_partial_date)
231
231
  return delta > 0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: civics_cdf_validator
3
- Version: 1.60.dev5
3
+ Version: 1.61.dev1
4
4
  Summary: Checks if an election feed follows best practices
5
5
  Home-page: https://github.com/google/civics_cdf_validator
6
6
  Author: Google Civics
@@ -32,6 +32,8 @@ import networkx
32
32
  import pycountry
33
33
  from six.moves.urllib.parse import urlparse
34
34
 
35
+
36
+ _XML_TRUE_VALUES = frozenset(["true", "1"])
35
37
  _PARTY_LEADERSHIP_TYPES = ["party-leader-id", "party-chair-id"]
36
38
  _INDEPENDENT_PARTY_NAMES = frozenset(["independent", "nonpartisan"])
37
39
  _IDREF_TYPES = frozenset(["xs:IDREF", "xs:IDREFS"])
@@ -58,6 +60,7 @@ _INTERNATIONALIZED_TEXT_ELEMENTS_WITH_ONLY_ONE_TEXT_PER_LANGUAGE = [
58
60
  "BallotTitle",
59
61
  "ConStatement",
60
62
  "Directions",
63
+ "DisplayName",
61
64
  "EffectOfAbstain",
62
65
  "FullName",
63
66
  "FullText",
@@ -285,6 +288,20 @@ def country_code_is_valid(country_code):
285
288
  )
286
289
 
287
290
 
291
+ def _get_type_or_other_type(element):
292
+ type_element = element.find("Type")
293
+ other_type_element = element.find("OtherType")
294
+ type_text = (
295
+ type_element.text.strip() if element_has_text(type_element) else ""
296
+ )
297
+ other_type_text = (
298
+ other_type_element.text.strip()
299
+ if element_has_text(other_type_element)
300
+ else ""
301
+ )
302
+ return other_type_text if type_text == "other" else type_text
303
+
304
+
288
305
  class Schema(base.TreeRule):
289
306
  """Checks if election file validates against the provided schema."""
290
307
 
@@ -1213,42 +1230,6 @@ class SingularPartySelection(base.BaseRule):
1213
1230
  )
1214
1231
 
1215
1232
 
1216
- class PartiesHaveValidColors(base.BaseRule):
1217
- """Each Party should have a valid hex integer less than 16^6, without a leading '#'.
1218
-
1219
- A Party object that has no Color or an invalid Color should be picked up
1220
- within this class and returned to the user as a warning.
1221
- """
1222
-
1223
- def elements(self):
1224
- return ["Party"]
1225
-
1226
- def check(self, element):
1227
- colors = element.findall("Color")
1228
- if not colors:
1229
- return
1230
- if len(colors) > 1:
1231
- raise loggers.ElectionWarning.from_message(
1232
- "The Party has more than one color.", [element]
1233
- )
1234
- color_val = colors[0].text
1235
- if not color_val:
1236
- raise loggers.ElectionWarning.from_message(
1237
- "Color tag is missing a value.", [colors[0]]
1238
- )
1239
- try:
1240
- int(color_val, 16)
1241
- except ValueError:
1242
- raise loggers.ElectionWarning.from_message(
1243
- "%s is not a valid hex color." % color_val,
1244
- [colors[0]],
1245
- )
1246
- if not re.match("^([A-Fa-f0-9]{6}|[A-Fa-f0-9]{3})$", color_val):
1247
- raise loggers.ElectionWarning.from_message(
1248
- "%s should be a hexadecimal less than 16^6." % color_val, [colors[0]]
1249
- )
1250
-
1251
-
1252
1233
  class PersonHasUniqueFullName(base.BaseRule):
1253
1234
  """A Person should be defined one time in <PersonCollection>.
1254
1235
 
@@ -2175,7 +2156,7 @@ class VoteCountTypesCoherency(base.BaseRule):
2175
2156
  "seats-total",
2176
2157
  "seats-delta",
2177
2158
  "seats-delta-mandate",
2178
- "seats-delta-institutional"
2159
+ "seats-delta-institutional",
2179
2160
  }
2180
2161
  # Ibid.
2181
2162
  CAND_VC_TYPES = {"candidate-votes"}
@@ -2562,7 +2543,8 @@ class OfficeHasjurisdictionSameAsElectoralDistrict(base.BaseRule):
2562
2543
 
2563
2544
  def check(self, element):
2564
2545
  jurisdiction_values = get_entity_info_for_value_type(
2565
- element, "jurisdiction-id")
2546
+ element, "jurisdiction-id"
2547
+ )
2566
2548
  jurisdiction_values = [
2567
2549
  j_id.strip() for j_id in jurisdiction_values if j_id.strip()
2568
2550
  ]
@@ -3693,8 +3675,7 @@ class MissingFieldsInfo(base.MissingFieldRule):
3693
3675
  return 0
3694
3676
 
3695
3677
  def element_field_mapping(self):
3696
- return {
3697
- }
3678
+ return {}
3698
3679
 
3699
3680
 
3700
3681
  class PartySpanMultipleCountries(base.BaseRule):
@@ -3755,9 +3736,7 @@ class NonExecutiveOfficeShouldHaveGovernmentBody(base.BaseRule):
3755
3736
  officeholder_tenure_collection_element = self.get_elements_by_class(
3756
3737
  election_tree, "OfficeHolderTenureCollection"
3757
3738
  )
3758
- role_element = self.get_elements_by_class(
3759
- election_tree, "Role"
3760
- )
3739
+ role_element = self.get_elements_by_class(election_tree, "Role")
3761
3740
  if officeholder_tenure_collection_element or role_element:
3762
3741
  self.is_post_office_split_feed = True
3763
3742
 
@@ -3783,9 +3762,7 @@ class ExecutiveOfficeShouldNotHaveGovernmentBody(base.BaseRule):
3783
3762
  officeholder_tenure_collection_element = self.get_elements_by_class(
3784
3763
  election_tree, "OfficeHolderTenureCollection"
3785
3764
  )
3786
- role_element = self.get_elements_by_class(
3787
- election_tree, "Role"
3788
- )
3765
+ role_element = self.get_elements_by_class(election_tree, "Role")
3789
3766
  if officeholder_tenure_collection_element or role_element:
3790
3767
  self.is_post_office_split_feed = True
3791
3768
 
@@ -4233,6 +4210,204 @@ class ContestStartDateContainsCorrespondingEndDate(base.DateRule):
4233
4210
  )
4234
4211
 
4235
4212
 
4213
+ class ValidatePollsCloseDatetimes(base.BaseRule):
4214
+ """Checks that LatestPollsClose is not before EarliestPollsClose."""
4215
+
4216
+ def elements(self):
4217
+ return ["Contest"]
4218
+
4219
+ def check(self, element):
4220
+ earliest_polls_close_element = element.find("EarliestPollsClose")
4221
+ latest_polls_close_element = element.find("LatestPollsClose")
4222
+
4223
+ if not element_has_text(
4224
+ earliest_polls_close_element
4225
+ ) or not element_has_text(latest_polls_close_element):
4226
+ return
4227
+
4228
+ earliest_polls_close_text = earliest_polls_close_element.text.strip()
4229
+ latest_polls_close_text = latest_polls_close_element.text.strip()
4230
+
4231
+ try:
4232
+ earliest_polls_close = datetime.datetime.fromisoformat(
4233
+ earliest_polls_close_text
4234
+ )
4235
+ latest_polls_close = datetime.datetime.fromisoformat(
4236
+ latest_polls_close_text
4237
+ )
4238
+
4239
+ if latest_polls_close < earliest_polls_close:
4240
+ raise loggers.ElectionError.from_message(
4241
+ f"LatestPollsClose ({latest_polls_close_text}) must not be before"
4242
+ f" EarliestPollsClose ({earliest_polls_close_text}) for Contest"
4243
+ f" {element.get('objectId')}.",
4244
+ [element],
4245
+ )
4246
+ except ValueError as e:
4247
+ raise loggers.ElectionError.from_message(
4248
+ "Invalid PollsClose datetime format in Contest"
4249
+ f" {element.get('objectId')}: {e}",
4250
+ [element],
4251
+ )
4252
+
4253
+
4254
+ class ValidateResultsExpected(base.BaseRule):
4255
+ """Checks that ResultsExpected is not before the first ResultsReportingStage.
4256
+
4257
+ The ResultsExpected datetime must not be before the ExpectedStartDateTime
4258
+ of the earliest ResultsReportingStage excluding the no-results stage.
4259
+ """
4260
+
4261
+ def elements(self):
4262
+ return ["Contest"]
4263
+
4264
+ def check(self, element):
4265
+ results_expected_element = element.find("ResultsExpected")
4266
+ if not element_has_text(results_expected_element):
4267
+ return
4268
+
4269
+ results_expected_text = results_expected_element.text.strip()
4270
+
4271
+ try:
4272
+ results_expected = datetime.datetime.fromisoformat(results_expected_text)
4273
+ except ValueError as e:
4274
+ raise loggers.ElectionError.from_message(
4275
+ "Invalid ResultsExpected datetime format in Contest"
4276
+ f" {element.get('objectId')}: {e}",
4277
+ [element],
4278
+ )
4279
+
4280
+ stage_collection = element.find("ResultsReportingStageCollection")
4281
+ if stage_collection is None:
4282
+ return
4283
+
4284
+ earliest_start = None
4285
+ earliest_start_text = None
4286
+ for stage in stage_collection.findall("ResultsReportingStage"):
4287
+ stage_type_element = stage.find("StageType")
4288
+ if (
4289
+ element_has_text(stage_type_element)
4290
+ and stage_type_element.text.strip() == "no-results"
4291
+ ):
4292
+ continue
4293
+
4294
+ start_element = stage.find("ExpectedStartDateTime")
4295
+ if not element_has_text(start_element):
4296
+ continue
4297
+
4298
+ start_text = start_element.text.strip()
4299
+ try:
4300
+ start = datetime.datetime.fromisoformat(start_text)
4301
+ if earliest_start is None or start < earliest_start:
4302
+ earliest_start = start
4303
+ earliest_start_text = start_text
4304
+ except ValueError as e:
4305
+ raise loggers.ElectionError.from_message(
4306
+ "Invalid ExpectedStartDateTime datetime format for the"
4307
+ f" '{stage_type_element.text.strip()}' ResultsReportingStage in"
4308
+ f" Contest {element.get('objectId')}: {e}",
4309
+ [element],
4310
+ )
4311
+
4312
+ if earliest_start and results_expected < earliest_start:
4313
+ raise loggers.ElectionError.from_message(
4314
+ f"ResultsExpected ({results_expected_text}) must not be before the"
4315
+ f" ExpectedStartDateTime ({earliest_start_text}) of the earliest"
4316
+ f" ResultsReportingStage for Contest {element.get('objectId')}.",
4317
+ [element],
4318
+ )
4319
+
4320
+
4321
+ class ValidateResultsEmbargoEnd(base.BaseRule):
4322
+ """Checks that ResultsEmbargoEnd is not after the official ResultsReportingStage start."""
4323
+
4324
+ def elements(self):
4325
+ return ["Contest"]
4326
+
4327
+ def check(self, element):
4328
+ results_embargo_end_element = element.find("ResultsEmbargoEnd")
4329
+ if not element_has_text(results_embargo_end_element):
4330
+ return
4331
+
4332
+ results_embargo_end_text = results_embargo_end_element.text.strip()
4333
+
4334
+ try:
4335
+ results_embargo_end = datetime.datetime.fromisoformat(
4336
+ results_embargo_end_text
4337
+ )
4338
+ except ValueError as e:
4339
+ raise loggers.ElectionError.from_message(
4340
+ "Invalid ResultsEmbargoEnd datetime format in Contest"
4341
+ f" {element.get('objectId')}: {e}",
4342
+ [element],
4343
+ )
4344
+
4345
+ stage_collection = element.find("ResultsReportingStageCollection")
4346
+ if stage_collection is None:
4347
+ return
4348
+
4349
+ official_start = None
4350
+ official_start_text = None
4351
+ for stage in stage_collection.findall("ResultsReportingStage"):
4352
+ stage_type_element = stage.find("StageType")
4353
+ if (
4354
+ element_has_text(stage_type_element)
4355
+ and stage_type_element.text.strip() == "official"
4356
+ ):
4357
+ start_element = stage.find("ExpectedStartDateTime")
4358
+ if not element_has_text(start_element):
4359
+ continue
4360
+
4361
+ start_text = start_element.text.strip()
4362
+ try:
4363
+ official_start = datetime.datetime.fromisoformat(start_text)
4364
+ official_start_text = start_text
4365
+ break
4366
+ except ValueError as e:
4367
+ raise loggers.ElectionError.from_message(
4368
+ "Invalid ExpectedStartDateTime datetime format for the"
4369
+ " 'official' ResultsReportingStage in Contest"
4370
+ f" {element.get('objectId')}: {e}",
4371
+ [element],
4372
+ )
4373
+
4374
+ if official_start and official_start < results_embargo_end:
4375
+ raise loggers.ElectionError.from_message(
4376
+ f"ResultsEmbargoEnd ({results_embargo_end_text}) must not be after"
4377
+ f" the ExpectedStartDateTime ({official_start_text}) of the official"
4378
+ f" ResultsReportingStage for Contest {element.get('objectId')}.",
4379
+ [element],
4380
+ )
4381
+
4382
+
4383
+ class ResultsReportingStagesMustHaveUniqueType(base.BaseRule):
4384
+ """Checks that each ResultsReportingStage has a unique StageType per Contest."""
4385
+
4386
+ def elements(self):
4387
+ return ["ResultsReportingStageCollection"]
4388
+
4389
+ def check(self, element):
4390
+ stages_by_type = collections.defaultdict(list)
4391
+ for stage in element.findall("ResultsReportingStage"):
4392
+ stage_type_element = stage.find("StageType")
4393
+ if element_has_text(stage_type_element):
4394
+ stage_type = stage_type_element.text.strip()
4395
+ stages_by_type[stage_type].append(stage)
4396
+
4397
+ errors = []
4398
+ for stage_type, stages in stages_by_type.items():
4399
+ if len(stages) > 1:
4400
+ errors.append(
4401
+ loggers.LogEntry(
4402
+ f"Duplicate ResultsReportingStage StageType '{stage_type}'"
4403
+ " found in the same ResultsReportingStageCollection.",
4404
+ stages,
4405
+ )
4406
+ )
4407
+ if errors:
4408
+ raise loggers.ElectionError(errors)
4409
+
4410
+
4236
4411
  class CandidateContestTypesAreCompatible(base.BaseRule):
4237
4412
  """CandidateContest Type values cannot have both a general and primary type."""
4238
4413
 
@@ -4425,6 +4600,27 @@ class SourceDirPathsAreUnique(base.BaseRule):
4425
4600
  raise loggers.ElectionError(error_log)
4426
4601
 
4427
4602
 
4603
+ class SqsQueueNameRequiresS3SourceDirPath(base.BaseRule):
4604
+ """If SqsQueueName is set, SourceDirPath must also be set and must be an s3 path."""
4605
+
4606
+ def elements(self):
4607
+ return ["Feed"]
4608
+
4609
+ def check(self, element):
4610
+ sqs_queue_name = element.find("SqsQueueName")
4611
+ if not element_has_text(sqs_queue_name):
4612
+ return
4613
+ source_dir_path = element.find("SourceDirPath")
4614
+ if not element_has_text(
4615
+ source_dir_path
4616
+ ) or not source_dir_path.text.strip().lower().startswith("s3://"):
4617
+ raise loggers.ElectionError.from_message(
4618
+ "If SqsQueueName is set, SourceDirPath must also be set and must be"
4619
+ " an s3 path for feed {}.".format(element.find("FeedId").text),
4620
+ [element],
4621
+ )
4622
+
4623
+
4428
4624
  class ElectionEventDatesAreSequential(base.DateRule):
4429
4625
  """Dates in an ElectionEvent element should be sequential."""
4430
4626
 
@@ -4903,15 +5099,240 @@ class FeedElementsShouldHaveSubElementsBasedOnType(base.BaseRule):
4903
5099
  "ElectionEventCollection should exist for %s feed %s."
4904
5100
  % (feed_type, feed_id)
4905
5101
  )
4906
- if not element.find("ElectionEventCollection").findall(
4907
- "ElectionEvent"
4908
- ):
5102
+ if not element.find("ElectionEventCollection").findall("ElectionEvent"):
4909
5103
  raise loggers.ElectionError.from_message(
4910
5104
  "ElectionEventCollection should have at least one ElectionEvent"
4911
5105
  " for %s feed %s." % (feed_type, feed_id)
4912
5106
  )
4913
5107
 
4914
5108
 
5109
+ class NotEmptyUniqueDataSourceUris(base.BaseRule):
5110
+ """Checks that DataSource entities have globally unique URIs and they are not empty."""
5111
+
5112
+ def elements(self):
5113
+ return ["DataSourceCollection"]
5114
+
5115
+ def check(self, element):
5116
+ data_source_ids_by_uri = collections.defaultdict(set)
5117
+ error_log = []
5118
+
5119
+ for data_source in element.findall("DataSource"):
5120
+ datasource_id = data_source.get("objectId")
5121
+ for uri_element in data_source.findall("Uri"):
5122
+ if not element_has_text(uri_element):
5123
+ error_log.append(
5124
+ loggers.LogEntry(
5125
+ "DataSource {} has an empty Uri.".format(datasource_id),
5126
+ [data_source],
5127
+ )
5128
+ )
5129
+ continue
5130
+ uri = uri_element.text.strip()
5131
+ data_source_ids_by_uri[uri].add(data_source)
5132
+
5133
+ for uri, data_sources in data_source_ids_by_uri.items():
5134
+ if len(data_sources) <= 1:
5135
+ continue
5136
+ sorted_data_sources = sorted(
5137
+ data_sources, key=lambda ds: ds.get("objectId")
5138
+ )
5139
+ data_source_ids = [
5140
+ data_source.get("objectId") for data_source in sorted_data_sources
5141
+ ]
5142
+ error_log.append(
5143
+ loggers.LogEntry(
5144
+ "DataSource entities {} have duplicate Uri '{}'.".format(
5145
+ ", ".join(data_source_ids), uri
5146
+ ),
5147
+ sorted_data_sources,
5148
+ )
5149
+ )
5150
+
5151
+ if error_log:
5152
+ raise loggers.ElectionError(error_log)
5153
+
5154
+
5155
+ class UniqueDataSourceLanguages(base.BaseRule):
5156
+ """Checks that Uri elements have unique languages within a DataSource."""
5157
+
5158
+ def elements(self):
5159
+ return ["DataSourceCollection"]
5160
+
5161
+ def check(self, element):
5162
+ error_log = []
5163
+
5164
+ for data_source in element.findall("DataSource"):
5165
+ data_source_id = data_source.get("objectId")
5166
+ seen_uri_languages = set()
5167
+ for uri_element in data_source.findall("Uri"):
5168
+ language = uri_element.get("language")
5169
+ if not language:
5170
+ error_log.append(
5171
+ loggers.LogEntry(
5172
+ "DataSource {} has a Uri element without a language.".format(
5173
+ data_source_id
5174
+ ),
5175
+ [uri_element],
5176
+ )
5177
+ )
5178
+ continue
5179
+ language = language.strip()
5180
+ if language in seen_uri_languages:
5181
+ error_log.append(
5182
+ loggers.LogEntry(
5183
+ "DataSource {} has multiple Uri elements with the same"
5184
+ " language '{}'.".format(data_source_id, language),
5185
+ [element],
5186
+ )
5187
+ )
5188
+ else:
5189
+ seen_uri_languages.add(language)
5190
+
5191
+ if error_log:
5192
+ raise loggers.ElectionError(error_log)
5193
+
5194
+
5195
+ class UniqueDataSourceDisplayNames(base.BaseRule):
5196
+ """Checks that DataSource entities have globally unique DisplayNames."""
5197
+
5198
+ def elements(self):
5199
+ return ["DataSourceCollection"]
5200
+
5201
+ def check(self, element):
5202
+ data_source_ids_by_name = collections.defaultdict(set)
5203
+ error_log = []
5204
+
5205
+ for data_source in element.findall("DataSource"):
5206
+ data_source_id = data_source.get("objectId")
5207
+ display_name_element = data_source.find("DisplayName")
5208
+ for text_element in display_name_element.findall("Text"):
5209
+ if not element_has_text(text_element):
5210
+ error_log.append(
5211
+ loggers.LogEntry(
5212
+ "DataSource {} has a DisplayName element without"
5213
+ " text.".format(data_source_id),
5214
+ [data_source],
5215
+ )
5216
+ )
5217
+ continue
5218
+ name_text = text_element.text.strip()
5219
+ data_source_ids_by_name[name_text].add(data_source)
5220
+
5221
+ for name_text, data_sources in data_source_ids_by_name.items():
5222
+ if len(data_sources) <= 1:
5223
+ continue
5224
+ sorted_data_sources = sorted(
5225
+ data_sources, key=lambda ds: ds.get("objectId")
5226
+ )
5227
+ datasource_ids = [ds.get("objectId") for ds in sorted_data_sources]
5228
+ error_log.append(
5229
+ loggers.LogEntry(
5230
+ "DataSource entities {} have duplicate DisplayName '{}'.".format(
5231
+ ", ".join(datasource_ids), name_text
5232
+ ),
5233
+ sorted_data_sources,
5234
+ )
5235
+ )
5236
+
5237
+ if error_log:
5238
+ raise loggers.ElectionError(error_log)
5239
+
5240
+
5241
+ def _get_attribution_depth(element):
5242
+ """Helper to recursively get the maximum depth of an Attribution tree."""
5243
+ children = element.findall("Attribution")
5244
+ if not children:
5245
+ return 1
5246
+ return 1 + max(_get_attribution_depth(child) for child in children)
5247
+
5248
+
5249
+ def _get_datasource_id(element):
5250
+ """Helper to safely extract and strip the text of the DataSourceId child."""
5251
+ ds_element = element.find("DataSourceId")
5252
+ return ds_element.text.strip() if element_has_text(ds_element) else ""
5253
+
5254
+
5255
+ class AttributionDepthLimit(base.BaseRule):
5256
+ """Checks that each top-level Attribution in a ResultsReportingStage has at most three levels of depth."""
5257
+
5258
+ def elements(self):
5259
+ return ["ResultsReportingStage"]
5260
+
5261
+ def check(self, element):
5262
+ error_log = []
5263
+ # This findall query is non-recursive and only returns direct children.
5264
+ for attribution in element.findall("Attribution"):
5265
+ depth = _get_attribution_depth(attribution)
5266
+ if depth > 3:
5267
+ data_source_id = _get_datasource_id(attribution)
5268
+ error_log.append(
5269
+ loggers.LogEntry(
5270
+ f"Attribution starting with DataSourceId '{data_source_id}'"
5271
+ f" has a depth of {depth}, exceeding the limit of 3.",
5272
+ [attribution],
5273
+ )
5274
+ )
5275
+ if error_log:
5276
+ raise loggers.ElectionError(error_log)
5277
+
5278
+
5279
+ def _canonicalize_cycle(cycle):
5280
+ """Canonicalizes a cycle by rotating it to start with the min element."""
5281
+ if not cycle:
5282
+ return []
5283
+ min_node = min(cycle)
5284
+ min_idx = cycle.index(min_node)
5285
+ return cycle[min_idx:] + cycle[:min_idx]
5286
+
5287
+
5288
+ class AttributionContainsNoCycles(base.TreeRule):
5289
+ """Checks that there are no cycles between Attribution elements."""
5290
+
5291
+ def check(self):
5292
+ graph = networkx.DiGraph()
5293
+
5294
+ attributions = self.get_elements_by_class(self.election_tree, "Attribution")
5295
+ for attribution in attributions:
5296
+ data_source = _get_datasource_id(attribution)
5297
+ if not data_source:
5298
+ continue
5299
+ graph.add_node(data_source)
5300
+ for child in attribution.findall("Attribution"):
5301
+ child_data_source = _get_datasource_id(child)
5302
+ if child_data_source:
5303
+ graph.add_edge(data_source, child_data_source)
5304
+
5305
+ # Find all nodes in the graph that are part of a cycle based on directed
5306
+ # paths.
5307
+ nodes_in_a_cycle = set()
5308
+ for component in networkx.strongly_connected_components(graph):
5309
+ # If the component has multiple nodes, it must be a cycle.
5310
+ if len(component) > 1:
5311
+ nodes_in_a_cycle.update(component)
5312
+ continue
5313
+ # Only include single-node components if they have a self-loop.
5314
+ (node,) = component
5315
+ if graph.has_edge(node, node):
5316
+ nodes_in_a_cycle.add(node)
5317
+
5318
+ if not nodes_in_a_cycle:
5319
+ return
5320
+
5321
+ error_log = []
5322
+ # Optimize performance for large graphs by excluding non-cyclic nodes.
5323
+ all_cycles_subgraph = graph.subgraph(sorted(nodes_in_a_cycle))
5324
+ for cycle in networkx.simple_cycles(all_cycles_subgraph):
5325
+ canonical = _canonicalize_cycle(cycle)
5326
+ # Append the starting node to the end to represent a closed loop.
5327
+ error_log.append(
5328
+ loggers.LogEntry(
5329
+ f"Cycle detected in Attribution: {' -> '.join(canonical)} ->"
5330
+ f" {canonical[0]}"
5331
+ )
5332
+ )
5333
+ raise loggers.ElectionError(error_log)
5334
+
5335
+
4915
5336
  class RuleSet(enum.Enum):
4916
5337
  """Names for sets of rules used to validate a particular feed type."""
4917
5338
 
@@ -4924,6 +5345,144 @@ class RuleSet(enum.Enum):
4924
5345
  VOTER_INFORMATION = 7
4925
5346
 
4926
5347
 
5348
+ class ValidateSpecialBallotSelectionCountedInTotal(base.BaseRule):
5349
+ """Enforces constraints on CountedInTotal for SpecialBallotSelections.
5350
+
5351
+ More specifically, BlankBallotSelection, NullBallotSelection, and
5352
+ NoneOfTheAboveSelection must have an explicit value for CountedInTotal, and
5353
+ AggregateBallotSelection must not have this set.
5354
+ """
5355
+
5356
+ def elements(self):
5357
+ return [
5358
+ "BlankBallotSelection",
5359
+ "NullBallotSelection",
5360
+ "NoneOfTheAboveBallotSelection",
5361
+ "AggregateBallotSelection",
5362
+ ]
5363
+
5364
+ def check(self, element):
5365
+ counted_in_total = element.find("CountedInTotal")
5366
+ tag = element.tag
5367
+
5368
+ if tag in (
5369
+ "BlankBallotSelection",
5370
+ "NullBallotSelection",
5371
+ "NoneOfTheAboveBallotSelection",
5372
+ ) and not element_has_text(counted_in_total):
5373
+ raise loggers.ElectionError.from_message(
5374
+ f"{tag} must have an explicit value for CountedInTotal.",
5375
+ [element],
5376
+ )
5377
+ elif tag == "AggregateBallotSelection" and counted_in_total is not None:
5378
+ raise loggers.ElectionError.from_message(
5379
+ "AggregateBallotSelection must not have CountedInTotal set.",
5380
+ [element],
5381
+ )
5382
+
5383
+
5384
+ class ValidateIncludeInAggregationBallotSelections(base.BaseRule):
5385
+ """Validates BallotSelections with IncludedInAggregation.
5386
+
5387
+ Checks that the sum of all vote counts for a BallotSelection with
5388
+ IncludedInAggregation must not be > the total vote counts for the
5389
+ AggregateBallotSelection on that same Contest for the same vote count type.
5390
+ Also requires that if IncludedInAggregation is set on any BallotSelection then
5391
+ the AggregateBallotSelection must also be present on that Contest.
5392
+ """
5393
+
5394
+ def elements(self):
5395
+ return ["CandidateContest", "PartyContest"]
5396
+
5397
+ def _gather_vote_counts(self, element):
5398
+ """Gathers vote counts from a selection element grouped by type."""
5399
+ count_by_type_and_gp_unit = collections.defaultdict(float)
5400
+ vote_counts_collection = element.find("VoteCountsCollection")
5401
+ if vote_counts_collection is None:
5402
+ return count_by_type_and_gp_unit
5403
+
5404
+ for vote_counts in vote_counts_collection.findall("VoteCounts"):
5405
+ count_element = vote_counts.find("Count")
5406
+ if not element_has_text(count_element):
5407
+ continue
5408
+ count = float(count_element.text)
5409
+
5410
+ vote_count_type = _get_type_or_other_type(vote_counts)
5411
+ gp_unit_id_element = vote_counts.find("GpUnitId")
5412
+ gp_unit_id = (
5413
+ gp_unit_id_element.text.strip()
5414
+ if element_has_text(gp_unit_id_element)
5415
+ else ""
5416
+ )
5417
+
5418
+ type_and_gp_unit = (vote_count_type, gp_unit_id)
5419
+ count_by_type_and_gp_unit[type_and_gp_unit] += count
5420
+
5421
+ return count_by_type_and_gp_unit
5422
+
5423
+ def check(self, element):
5424
+ contest_id = element.get("objectId")
5425
+
5426
+ candidate_selections = self.get_elements_by_class(
5427
+ element, "CandidateSelection"
5428
+ )
5429
+ party_selections = self.get_elements_by_class(element, "PartySelection")
5430
+ all_selections = candidate_selections + party_selections
5431
+
5432
+ included_selections = []
5433
+ for selection in all_selections:
5434
+ included_in_aggregation_element = selection.find("IncludedInAggregation")
5435
+ if (
5436
+ element_has_text(included_in_aggregation_element)
5437
+ and included_in_aggregation_element.text in _XML_TRUE_VALUES
5438
+ ):
5439
+ included_selections.append(selection)
5440
+ if not included_selections:
5441
+ return
5442
+
5443
+ aggregate_selection = element.find("AggregateBallotSelection")
5444
+ if aggregate_selection is None:
5445
+ raise loggers.ElectionError.from_message(
5446
+ f"Contest {contest_id} has selections marked as IncludedInAggregation"
5447
+ " but is missing AggregateBallotSelection.",
5448
+ [element],
5449
+ )
5450
+
5451
+ aggregate_count_by_type_and_gp_unit = self._gather_vote_counts(
5452
+ aggregate_selection
5453
+ )
5454
+
5455
+ selections_count_sum_by_type_and_gp_unit = collections.defaultdict(float)
5456
+ for selection in included_selections:
5457
+ for (
5458
+ type_and_gp_unit,
5459
+ count,
5460
+ ) in self._gather_vote_counts(selection).items():
5461
+ selections_count_sum_by_type_and_gp_unit[type_and_gp_unit] += count
5462
+
5463
+ error_log = []
5464
+ for (
5465
+ type_and_gp_unit,
5466
+ total_count,
5467
+ ) in selections_count_sum_by_type_and_gp_unit.items():
5468
+ aggregate_count = aggregate_count_by_type_and_gp_unit[type_and_gp_unit]
5469
+ if total_count > aggregate_count:
5470
+ resolved_type, gp_unit_id = type_and_gp_unit
5471
+ error_log.append(
5472
+ loggers.LogEntry(
5473
+ f"In Contest {contest_id}, the sum of vote counts"
5474
+ f" ({total_count}) for selections marked as"
5475
+ " IncludedInAggregation exceeds the vote count"
5476
+ f" ({aggregate_count}) for the AggregateBallotSelection for"
5477
+ f" vote count type='{resolved_type}' (GpUnit: '{gp_unit_id}').",
5478
+ [element],
5479
+ )
5480
+ )
5481
+
5482
+ if error_log:
5483
+ raise loggers.ElectionError(error_log)
5484
+
5485
+
4927
5486
  # To add new rules, create a new class, inherit the base rule,
4928
5487
  # and add it to the correct rule list.
4929
5488
  COMMON_RULES = (
@@ -4984,6 +5543,8 @@ COMMON_RULES = (
4984
5543
 
4985
5544
  ELECTION_RULES = COMMON_RULES + (
4986
5545
  # go/keep-sorted start
5546
+ AttributionContainsNoCycles,
5547
+ AttributionDepthLimit,
4987
5548
  BallotTitle,
4988
5549
  CandidateContestTypesAreCompatible,
4989
5550
  CandidatesReferencedInRelatedContests,
@@ -5016,17 +5577,25 @@ ELECTION_RULES = COMMON_RULES + (
5016
5577
  MissingPartyNameTranslation,
5017
5578
  MultipleCandidatesPointToTheSamePersonInTheSameContest,
5018
5579
  MultipleInternationalizedTextWithSameLanguageCode,
5580
+ NotEmptyUniqueDataSourceUris,
5019
5581
  OfficeHasjurisdictionSameAsElectoralDistrict,
5020
- PartiesHaveValidColors,
5021
5582
  PartisanPrimary,
5022
5583
  PartisanPrimaryHeuristic,
5023
5584
  PercentSum,
5024
5585
  ProperBallotSelection,
5586
+ ResultsReportingStagesMustHaveUniqueType,
5025
5587
  SelfDeclaredCandidateMethod,
5026
5588
  SingularPartySelection,
5027
5589
  SubsequentContestIdIsValidRelatedContest,
5590
+ UniqueDataSourceDisplayNames,
5591
+ UniqueDataSourceLanguages,
5028
5592
  ValidateDuplicateColors,
5593
+ ValidateIncludeInAggregationBallotSelections,
5029
5594
  ValidateInfoUriAnnotation,
5595
+ ValidatePollsCloseDatetimes,
5596
+ ValidateResultsEmbargoEnd,
5597
+ ValidateResultsExpected,
5598
+ ValidateSpecialBallotSelectionCountedInTotal,
5030
5599
  VoteCountTypesCoherency,
5031
5600
  VoteCountValidSeatsDeltaTypes,
5032
5601
  WinnerCountLimit,
@@ -5077,6 +5646,7 @@ METADATA_RULES = (
5077
5646
  Schema,
5078
5647
  SourceDirPathMustBeSetAfterInitialDeliveryDate,
5079
5648
  SourceDirPathsAreUnique,
5649
+ SqsQueueNameRequiresS3SourceDirPath,
5080
5650
  UniqueLabel,
5081
5651
  # go/keep-sorted end
5082
5652
  )
@@ -5,4 +5,4 @@ No dependencies should be added to this module.
5
5
  See https://packaging.python.org/guides/single-sourcing-package-version/
6
6
  """
7
7
 
8
- __version__ = '1.60.dev5'
8
+ __version__ = '1.61.dev1'