civics-cdf-validator 1.60.dev6__tar.gz → 1.61.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {civics_cdf_validator-1.60.dev6/civics_cdf_validator.egg-info → civics_cdf_validator-1.61.dev1}/PKG-INFO +1 -1
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/base.py +1 -1
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1/civics_cdf_validator.egg-info}/PKG-INFO +1 -1
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/rules.py +620 -13
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/version.py +1 -1
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/CONTRIBUTING.md +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/LICENSE-2.0.txt +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/MANIFEST.in +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/README.md +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/__init__.py +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/SOURCES.txt +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/dependency_links.txt +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/entry_points.txt +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/requires.txt +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/civics_cdf_validator.egg-info/top_level.txt +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/gpunit_rules.py +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/loggers.py +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/office_utils.py +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/setup.cfg +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/setup.py +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/stats.py +0 -0
- {civics_cdf_validator-1.60.dev6 → civics_cdf_validator-1.61.dev1}/validator.py +0 -0
|
@@ -225,7 +225,7 @@ class DateRule(BaseRule):
|
|
|
225
225
|
|
|
226
226
|
def is_date_in_past(self, date):
|
|
227
227
|
"""Check if a date is in the past."""
|
|
228
|
-
today = datetime.datetime.
|
|
228
|
+
today = datetime.datetime.now(datetime.timezone.utc)
|
|
229
229
|
today_partial_date = PartialDate(today.year, today.month, today.day)
|
|
230
230
|
delta = date.is_older_than(today_partial_date)
|
|
231
231
|
return delta > 0
|
|
@@ -32,6 +32,8 @@ import networkx
|
|
|
32
32
|
import pycountry
|
|
33
33
|
from six.moves.urllib.parse import urlparse
|
|
34
34
|
|
|
35
|
+
|
|
36
|
+
_XML_TRUE_VALUES = frozenset(["true", "1"])
|
|
35
37
|
_PARTY_LEADERSHIP_TYPES = ["party-leader-id", "party-chair-id"]
|
|
36
38
|
_INDEPENDENT_PARTY_NAMES = frozenset(["independent", "nonpartisan"])
|
|
37
39
|
_IDREF_TYPES = frozenset(["xs:IDREF", "xs:IDREFS"])
|
|
@@ -58,6 +60,7 @@ _INTERNATIONALIZED_TEXT_ELEMENTS_WITH_ONLY_ONE_TEXT_PER_LANGUAGE = [
|
|
|
58
60
|
"BallotTitle",
|
|
59
61
|
"ConStatement",
|
|
60
62
|
"Directions",
|
|
63
|
+
"DisplayName",
|
|
61
64
|
"EffectOfAbstain",
|
|
62
65
|
"FullName",
|
|
63
66
|
"FullText",
|
|
@@ -285,6 +288,20 @@ def country_code_is_valid(country_code):
|
|
|
285
288
|
)
|
|
286
289
|
|
|
287
290
|
|
|
291
|
+
def _get_type_or_other_type(element):
|
|
292
|
+
type_element = element.find("Type")
|
|
293
|
+
other_type_element = element.find("OtherType")
|
|
294
|
+
type_text = (
|
|
295
|
+
type_element.text.strip() if element_has_text(type_element) else ""
|
|
296
|
+
)
|
|
297
|
+
other_type_text = (
|
|
298
|
+
other_type_element.text.strip()
|
|
299
|
+
if element_has_text(other_type_element)
|
|
300
|
+
else ""
|
|
301
|
+
)
|
|
302
|
+
return other_type_text if type_text == "other" else type_text
|
|
303
|
+
|
|
304
|
+
|
|
288
305
|
class Schema(base.TreeRule):
|
|
289
306
|
"""Checks if election file validates against the provided schema."""
|
|
290
307
|
|
|
@@ -2139,7 +2156,7 @@ class VoteCountTypesCoherency(base.BaseRule):
|
|
|
2139
2156
|
"seats-total",
|
|
2140
2157
|
"seats-delta",
|
|
2141
2158
|
"seats-delta-mandate",
|
|
2142
|
-
"seats-delta-institutional"
|
|
2159
|
+
"seats-delta-institutional",
|
|
2143
2160
|
}
|
|
2144
2161
|
# Ibid.
|
|
2145
2162
|
CAND_VC_TYPES = {"candidate-votes"}
|
|
@@ -2526,7 +2543,8 @@ class OfficeHasjurisdictionSameAsElectoralDistrict(base.BaseRule):
|
|
|
2526
2543
|
|
|
2527
2544
|
def check(self, element):
|
|
2528
2545
|
jurisdiction_values = get_entity_info_for_value_type(
|
|
2529
|
-
element, "jurisdiction-id"
|
|
2546
|
+
element, "jurisdiction-id"
|
|
2547
|
+
)
|
|
2530
2548
|
jurisdiction_values = [
|
|
2531
2549
|
j_id.strip() for j_id in jurisdiction_values if j_id.strip()
|
|
2532
2550
|
]
|
|
@@ -3657,8 +3675,7 @@ class MissingFieldsInfo(base.MissingFieldRule):
|
|
|
3657
3675
|
return 0
|
|
3658
3676
|
|
|
3659
3677
|
def element_field_mapping(self):
|
|
3660
|
-
return {
|
|
3661
|
-
}
|
|
3678
|
+
return {}
|
|
3662
3679
|
|
|
3663
3680
|
|
|
3664
3681
|
class PartySpanMultipleCountries(base.BaseRule):
|
|
@@ -3719,9 +3736,7 @@ class NonExecutiveOfficeShouldHaveGovernmentBody(base.BaseRule):
|
|
|
3719
3736
|
officeholder_tenure_collection_element = self.get_elements_by_class(
|
|
3720
3737
|
election_tree, "OfficeHolderTenureCollection"
|
|
3721
3738
|
)
|
|
3722
|
-
role_element = self.get_elements_by_class(
|
|
3723
|
-
election_tree, "Role"
|
|
3724
|
-
)
|
|
3739
|
+
role_element = self.get_elements_by_class(election_tree, "Role")
|
|
3725
3740
|
if officeholder_tenure_collection_element or role_element:
|
|
3726
3741
|
self.is_post_office_split_feed = True
|
|
3727
3742
|
|
|
@@ -3747,9 +3762,7 @@ class ExecutiveOfficeShouldNotHaveGovernmentBody(base.BaseRule):
|
|
|
3747
3762
|
officeholder_tenure_collection_element = self.get_elements_by_class(
|
|
3748
3763
|
election_tree, "OfficeHolderTenureCollection"
|
|
3749
3764
|
)
|
|
3750
|
-
role_element = self.get_elements_by_class(
|
|
3751
|
-
election_tree, "Role"
|
|
3752
|
-
)
|
|
3765
|
+
role_element = self.get_elements_by_class(election_tree, "Role")
|
|
3753
3766
|
if officeholder_tenure_collection_element or role_element:
|
|
3754
3767
|
self.is_post_office_split_feed = True
|
|
3755
3768
|
|
|
@@ -4197,6 +4210,204 @@ class ContestStartDateContainsCorrespondingEndDate(base.DateRule):
|
|
|
4197
4210
|
)
|
|
4198
4211
|
|
|
4199
4212
|
|
|
4213
|
+
class ValidatePollsCloseDatetimes(base.BaseRule):
|
|
4214
|
+
"""Checks that LatestPollsClose is not before EarliestPollsClose."""
|
|
4215
|
+
|
|
4216
|
+
def elements(self):
|
|
4217
|
+
return ["Contest"]
|
|
4218
|
+
|
|
4219
|
+
def check(self, element):
|
|
4220
|
+
earliest_polls_close_element = element.find("EarliestPollsClose")
|
|
4221
|
+
latest_polls_close_element = element.find("LatestPollsClose")
|
|
4222
|
+
|
|
4223
|
+
if not element_has_text(
|
|
4224
|
+
earliest_polls_close_element
|
|
4225
|
+
) or not element_has_text(latest_polls_close_element):
|
|
4226
|
+
return
|
|
4227
|
+
|
|
4228
|
+
earliest_polls_close_text = earliest_polls_close_element.text.strip()
|
|
4229
|
+
latest_polls_close_text = latest_polls_close_element.text.strip()
|
|
4230
|
+
|
|
4231
|
+
try:
|
|
4232
|
+
earliest_polls_close = datetime.datetime.fromisoformat(
|
|
4233
|
+
earliest_polls_close_text
|
|
4234
|
+
)
|
|
4235
|
+
latest_polls_close = datetime.datetime.fromisoformat(
|
|
4236
|
+
latest_polls_close_text
|
|
4237
|
+
)
|
|
4238
|
+
|
|
4239
|
+
if latest_polls_close < earliest_polls_close:
|
|
4240
|
+
raise loggers.ElectionError.from_message(
|
|
4241
|
+
f"LatestPollsClose ({latest_polls_close_text}) must not be before"
|
|
4242
|
+
f" EarliestPollsClose ({earliest_polls_close_text}) for Contest"
|
|
4243
|
+
f" {element.get('objectId')}.",
|
|
4244
|
+
[element],
|
|
4245
|
+
)
|
|
4246
|
+
except ValueError as e:
|
|
4247
|
+
raise loggers.ElectionError.from_message(
|
|
4248
|
+
"Invalid PollsClose datetime format in Contest"
|
|
4249
|
+
f" {element.get('objectId')}: {e}",
|
|
4250
|
+
[element],
|
|
4251
|
+
)
|
|
4252
|
+
|
|
4253
|
+
|
|
4254
|
+
class ValidateResultsExpected(base.BaseRule):
|
|
4255
|
+
"""Checks that ResultsExpected is not before the first ResultsReportingStage.
|
|
4256
|
+
|
|
4257
|
+
The ResultsExpected datetime must not be before the ExpectedStartDateTime
|
|
4258
|
+
of the earliest ResultsReportingStage excluding the no-results stage.
|
|
4259
|
+
"""
|
|
4260
|
+
|
|
4261
|
+
def elements(self):
|
|
4262
|
+
return ["Contest"]
|
|
4263
|
+
|
|
4264
|
+
def check(self, element):
|
|
4265
|
+
results_expected_element = element.find("ResultsExpected")
|
|
4266
|
+
if not element_has_text(results_expected_element):
|
|
4267
|
+
return
|
|
4268
|
+
|
|
4269
|
+
results_expected_text = results_expected_element.text.strip()
|
|
4270
|
+
|
|
4271
|
+
try:
|
|
4272
|
+
results_expected = datetime.datetime.fromisoformat(results_expected_text)
|
|
4273
|
+
except ValueError as e:
|
|
4274
|
+
raise loggers.ElectionError.from_message(
|
|
4275
|
+
"Invalid ResultsExpected datetime format in Contest"
|
|
4276
|
+
f" {element.get('objectId')}: {e}",
|
|
4277
|
+
[element],
|
|
4278
|
+
)
|
|
4279
|
+
|
|
4280
|
+
stage_collection = element.find("ResultsReportingStageCollection")
|
|
4281
|
+
if stage_collection is None:
|
|
4282
|
+
return
|
|
4283
|
+
|
|
4284
|
+
earliest_start = None
|
|
4285
|
+
earliest_start_text = None
|
|
4286
|
+
for stage in stage_collection.findall("ResultsReportingStage"):
|
|
4287
|
+
stage_type_element = stage.find("StageType")
|
|
4288
|
+
if (
|
|
4289
|
+
element_has_text(stage_type_element)
|
|
4290
|
+
and stage_type_element.text.strip() == "no-results"
|
|
4291
|
+
):
|
|
4292
|
+
continue
|
|
4293
|
+
|
|
4294
|
+
start_element = stage.find("ExpectedStartDateTime")
|
|
4295
|
+
if not element_has_text(start_element):
|
|
4296
|
+
continue
|
|
4297
|
+
|
|
4298
|
+
start_text = start_element.text.strip()
|
|
4299
|
+
try:
|
|
4300
|
+
start = datetime.datetime.fromisoformat(start_text)
|
|
4301
|
+
if earliest_start is None or start < earliest_start:
|
|
4302
|
+
earliest_start = start
|
|
4303
|
+
earliest_start_text = start_text
|
|
4304
|
+
except ValueError as e:
|
|
4305
|
+
raise loggers.ElectionError.from_message(
|
|
4306
|
+
"Invalid ExpectedStartDateTime datetime format for the"
|
|
4307
|
+
f" '{stage_type_element.text.strip()}' ResultsReportingStage in"
|
|
4308
|
+
f" Contest {element.get('objectId')}: {e}",
|
|
4309
|
+
[element],
|
|
4310
|
+
)
|
|
4311
|
+
|
|
4312
|
+
if earliest_start and results_expected < earliest_start:
|
|
4313
|
+
raise loggers.ElectionError.from_message(
|
|
4314
|
+
f"ResultsExpected ({results_expected_text}) must not be before the"
|
|
4315
|
+
f" ExpectedStartDateTime ({earliest_start_text}) of the earliest"
|
|
4316
|
+
f" ResultsReportingStage for Contest {element.get('objectId')}.",
|
|
4317
|
+
[element],
|
|
4318
|
+
)
|
|
4319
|
+
|
|
4320
|
+
|
|
4321
|
+
class ValidateResultsEmbargoEnd(base.BaseRule):
|
|
4322
|
+
"""Checks that ResultsEmbargoEnd is not after the official ResultsReportingStage start."""
|
|
4323
|
+
|
|
4324
|
+
def elements(self):
|
|
4325
|
+
return ["Contest"]
|
|
4326
|
+
|
|
4327
|
+
def check(self, element):
|
|
4328
|
+
results_embargo_end_element = element.find("ResultsEmbargoEnd")
|
|
4329
|
+
if not element_has_text(results_embargo_end_element):
|
|
4330
|
+
return
|
|
4331
|
+
|
|
4332
|
+
results_embargo_end_text = results_embargo_end_element.text.strip()
|
|
4333
|
+
|
|
4334
|
+
try:
|
|
4335
|
+
results_embargo_end = datetime.datetime.fromisoformat(
|
|
4336
|
+
results_embargo_end_text
|
|
4337
|
+
)
|
|
4338
|
+
except ValueError as e:
|
|
4339
|
+
raise loggers.ElectionError.from_message(
|
|
4340
|
+
"Invalid ResultsEmbargoEnd datetime format in Contest"
|
|
4341
|
+
f" {element.get('objectId')}: {e}",
|
|
4342
|
+
[element],
|
|
4343
|
+
)
|
|
4344
|
+
|
|
4345
|
+
stage_collection = element.find("ResultsReportingStageCollection")
|
|
4346
|
+
if stage_collection is None:
|
|
4347
|
+
return
|
|
4348
|
+
|
|
4349
|
+
official_start = None
|
|
4350
|
+
official_start_text = None
|
|
4351
|
+
for stage in stage_collection.findall("ResultsReportingStage"):
|
|
4352
|
+
stage_type_element = stage.find("StageType")
|
|
4353
|
+
if (
|
|
4354
|
+
element_has_text(stage_type_element)
|
|
4355
|
+
and stage_type_element.text.strip() == "official"
|
|
4356
|
+
):
|
|
4357
|
+
start_element = stage.find("ExpectedStartDateTime")
|
|
4358
|
+
if not element_has_text(start_element):
|
|
4359
|
+
continue
|
|
4360
|
+
|
|
4361
|
+
start_text = start_element.text.strip()
|
|
4362
|
+
try:
|
|
4363
|
+
official_start = datetime.datetime.fromisoformat(start_text)
|
|
4364
|
+
official_start_text = start_text
|
|
4365
|
+
break
|
|
4366
|
+
except ValueError as e:
|
|
4367
|
+
raise loggers.ElectionError.from_message(
|
|
4368
|
+
"Invalid ExpectedStartDateTime datetime format for the"
|
|
4369
|
+
" 'official' ResultsReportingStage in Contest"
|
|
4370
|
+
f" {element.get('objectId')}: {e}",
|
|
4371
|
+
[element],
|
|
4372
|
+
)
|
|
4373
|
+
|
|
4374
|
+
if official_start and official_start < results_embargo_end:
|
|
4375
|
+
raise loggers.ElectionError.from_message(
|
|
4376
|
+
f"ResultsEmbargoEnd ({results_embargo_end_text}) must not be after"
|
|
4377
|
+
f" the ExpectedStartDateTime ({official_start_text}) of the official"
|
|
4378
|
+
f" ResultsReportingStage for Contest {element.get('objectId')}.",
|
|
4379
|
+
[element],
|
|
4380
|
+
)
|
|
4381
|
+
|
|
4382
|
+
|
|
4383
|
+
class ResultsReportingStagesMustHaveUniqueType(base.BaseRule):
|
|
4384
|
+
"""Checks that each ResultsReportingStage has a unique StageType per Contest."""
|
|
4385
|
+
|
|
4386
|
+
def elements(self):
|
|
4387
|
+
return ["ResultsReportingStageCollection"]
|
|
4388
|
+
|
|
4389
|
+
def check(self, element):
|
|
4390
|
+
stages_by_type = collections.defaultdict(list)
|
|
4391
|
+
for stage in element.findall("ResultsReportingStage"):
|
|
4392
|
+
stage_type_element = stage.find("StageType")
|
|
4393
|
+
if element_has_text(stage_type_element):
|
|
4394
|
+
stage_type = stage_type_element.text.strip()
|
|
4395
|
+
stages_by_type[stage_type].append(stage)
|
|
4396
|
+
|
|
4397
|
+
errors = []
|
|
4398
|
+
for stage_type, stages in stages_by_type.items():
|
|
4399
|
+
if len(stages) > 1:
|
|
4400
|
+
errors.append(
|
|
4401
|
+
loggers.LogEntry(
|
|
4402
|
+
f"Duplicate ResultsReportingStage StageType '{stage_type}'"
|
|
4403
|
+
" found in the same ResultsReportingStageCollection.",
|
|
4404
|
+
stages,
|
|
4405
|
+
)
|
|
4406
|
+
)
|
|
4407
|
+
if errors:
|
|
4408
|
+
raise loggers.ElectionError(errors)
|
|
4409
|
+
|
|
4410
|
+
|
|
4200
4411
|
class CandidateContestTypesAreCompatible(base.BaseRule):
|
|
4201
4412
|
"""CandidateContest Type values cannot have both a general and primary type."""
|
|
4202
4413
|
|
|
@@ -4389,6 +4600,27 @@ class SourceDirPathsAreUnique(base.BaseRule):
|
|
|
4389
4600
|
raise loggers.ElectionError(error_log)
|
|
4390
4601
|
|
|
4391
4602
|
|
|
4603
|
+
class SqsQueueNameRequiresS3SourceDirPath(base.BaseRule):
|
|
4604
|
+
"""If SqsQueueName is set, SourceDirPath must also be set and must be an s3 path."""
|
|
4605
|
+
|
|
4606
|
+
def elements(self):
|
|
4607
|
+
return ["Feed"]
|
|
4608
|
+
|
|
4609
|
+
def check(self, element):
|
|
4610
|
+
sqs_queue_name = element.find("SqsQueueName")
|
|
4611
|
+
if not element_has_text(sqs_queue_name):
|
|
4612
|
+
return
|
|
4613
|
+
source_dir_path = element.find("SourceDirPath")
|
|
4614
|
+
if not element_has_text(
|
|
4615
|
+
source_dir_path
|
|
4616
|
+
) or not source_dir_path.text.strip().lower().startswith("s3://"):
|
|
4617
|
+
raise loggers.ElectionError.from_message(
|
|
4618
|
+
"If SqsQueueName is set, SourceDirPath must also be set and must be"
|
|
4619
|
+
" an s3 path for feed {}.".format(element.find("FeedId").text),
|
|
4620
|
+
[element],
|
|
4621
|
+
)
|
|
4622
|
+
|
|
4623
|
+
|
|
4392
4624
|
class ElectionEventDatesAreSequential(base.DateRule):
|
|
4393
4625
|
"""Dates in an ElectionEvent element should be sequential."""
|
|
4394
4626
|
|
|
@@ -4867,15 +5099,240 @@ class FeedElementsShouldHaveSubElementsBasedOnType(base.BaseRule):
|
|
|
4867
5099
|
"ElectionEventCollection should exist for %s feed %s."
|
|
4868
5100
|
% (feed_type, feed_id)
|
|
4869
5101
|
)
|
|
4870
|
-
if not element.find("ElectionEventCollection").findall(
|
|
4871
|
-
"ElectionEvent"
|
|
4872
|
-
):
|
|
5102
|
+
if not element.find("ElectionEventCollection").findall("ElectionEvent"):
|
|
4873
5103
|
raise loggers.ElectionError.from_message(
|
|
4874
5104
|
"ElectionEventCollection should have at least one ElectionEvent"
|
|
4875
5105
|
" for %s feed %s." % (feed_type, feed_id)
|
|
4876
5106
|
)
|
|
4877
5107
|
|
|
4878
5108
|
|
|
5109
|
+
class NotEmptyUniqueDataSourceUris(base.BaseRule):
|
|
5110
|
+
"""Checks that DataSource entities have globally unique URIs and they are not empty."""
|
|
5111
|
+
|
|
5112
|
+
def elements(self):
|
|
5113
|
+
return ["DataSourceCollection"]
|
|
5114
|
+
|
|
5115
|
+
def check(self, element):
|
|
5116
|
+
data_source_ids_by_uri = collections.defaultdict(set)
|
|
5117
|
+
error_log = []
|
|
5118
|
+
|
|
5119
|
+
for data_source in element.findall("DataSource"):
|
|
5120
|
+
datasource_id = data_source.get("objectId")
|
|
5121
|
+
for uri_element in data_source.findall("Uri"):
|
|
5122
|
+
if not element_has_text(uri_element):
|
|
5123
|
+
error_log.append(
|
|
5124
|
+
loggers.LogEntry(
|
|
5125
|
+
"DataSource {} has an empty Uri.".format(datasource_id),
|
|
5126
|
+
[data_source],
|
|
5127
|
+
)
|
|
5128
|
+
)
|
|
5129
|
+
continue
|
|
5130
|
+
uri = uri_element.text.strip()
|
|
5131
|
+
data_source_ids_by_uri[uri].add(data_source)
|
|
5132
|
+
|
|
5133
|
+
for uri, data_sources in data_source_ids_by_uri.items():
|
|
5134
|
+
if len(data_sources) <= 1:
|
|
5135
|
+
continue
|
|
5136
|
+
sorted_data_sources = sorted(
|
|
5137
|
+
data_sources, key=lambda ds: ds.get("objectId")
|
|
5138
|
+
)
|
|
5139
|
+
data_source_ids = [
|
|
5140
|
+
data_source.get("objectId") for data_source in sorted_data_sources
|
|
5141
|
+
]
|
|
5142
|
+
error_log.append(
|
|
5143
|
+
loggers.LogEntry(
|
|
5144
|
+
"DataSource entities {} have duplicate Uri '{}'.".format(
|
|
5145
|
+
", ".join(data_source_ids), uri
|
|
5146
|
+
),
|
|
5147
|
+
sorted_data_sources,
|
|
5148
|
+
)
|
|
5149
|
+
)
|
|
5150
|
+
|
|
5151
|
+
if error_log:
|
|
5152
|
+
raise loggers.ElectionError(error_log)
|
|
5153
|
+
|
|
5154
|
+
|
|
5155
|
+
class UniqueDataSourceLanguages(base.BaseRule):
|
|
5156
|
+
"""Checks that Uri elements have unique languages within a DataSource."""
|
|
5157
|
+
|
|
5158
|
+
def elements(self):
|
|
5159
|
+
return ["DataSourceCollection"]
|
|
5160
|
+
|
|
5161
|
+
def check(self, element):
|
|
5162
|
+
error_log = []
|
|
5163
|
+
|
|
5164
|
+
for data_source in element.findall("DataSource"):
|
|
5165
|
+
data_source_id = data_source.get("objectId")
|
|
5166
|
+
seen_uri_languages = set()
|
|
5167
|
+
for uri_element in data_source.findall("Uri"):
|
|
5168
|
+
language = uri_element.get("language")
|
|
5169
|
+
if not language:
|
|
5170
|
+
error_log.append(
|
|
5171
|
+
loggers.LogEntry(
|
|
5172
|
+
"DataSource {} has a Uri element without a language.".format(
|
|
5173
|
+
data_source_id
|
|
5174
|
+
),
|
|
5175
|
+
[uri_element],
|
|
5176
|
+
)
|
|
5177
|
+
)
|
|
5178
|
+
continue
|
|
5179
|
+
language = language.strip()
|
|
5180
|
+
if language in seen_uri_languages:
|
|
5181
|
+
error_log.append(
|
|
5182
|
+
loggers.LogEntry(
|
|
5183
|
+
"DataSource {} has multiple Uri elements with the same"
|
|
5184
|
+
" language '{}'.".format(data_source_id, language),
|
|
5185
|
+
[element],
|
|
5186
|
+
)
|
|
5187
|
+
)
|
|
5188
|
+
else:
|
|
5189
|
+
seen_uri_languages.add(language)
|
|
5190
|
+
|
|
5191
|
+
if error_log:
|
|
5192
|
+
raise loggers.ElectionError(error_log)
|
|
5193
|
+
|
|
5194
|
+
|
|
5195
|
+
class UniqueDataSourceDisplayNames(base.BaseRule):
|
|
5196
|
+
"""Checks that DataSource entities have globally unique DisplayNames."""
|
|
5197
|
+
|
|
5198
|
+
def elements(self):
|
|
5199
|
+
return ["DataSourceCollection"]
|
|
5200
|
+
|
|
5201
|
+
def check(self, element):
|
|
5202
|
+
data_source_ids_by_name = collections.defaultdict(set)
|
|
5203
|
+
error_log = []
|
|
5204
|
+
|
|
5205
|
+
for data_source in element.findall("DataSource"):
|
|
5206
|
+
data_source_id = data_source.get("objectId")
|
|
5207
|
+
display_name_element = data_source.find("DisplayName")
|
|
5208
|
+
for text_element in display_name_element.findall("Text"):
|
|
5209
|
+
if not element_has_text(text_element):
|
|
5210
|
+
error_log.append(
|
|
5211
|
+
loggers.LogEntry(
|
|
5212
|
+
"DataSource {} has a DisplayName element without"
|
|
5213
|
+
" text.".format(data_source_id),
|
|
5214
|
+
[data_source],
|
|
5215
|
+
)
|
|
5216
|
+
)
|
|
5217
|
+
continue
|
|
5218
|
+
name_text = text_element.text.strip()
|
|
5219
|
+
data_source_ids_by_name[name_text].add(data_source)
|
|
5220
|
+
|
|
5221
|
+
for name_text, data_sources in data_source_ids_by_name.items():
|
|
5222
|
+
if len(data_sources) <= 1:
|
|
5223
|
+
continue
|
|
5224
|
+
sorted_data_sources = sorted(
|
|
5225
|
+
data_sources, key=lambda ds: ds.get("objectId")
|
|
5226
|
+
)
|
|
5227
|
+
datasource_ids = [ds.get("objectId") for ds in sorted_data_sources]
|
|
5228
|
+
error_log.append(
|
|
5229
|
+
loggers.LogEntry(
|
|
5230
|
+
"DataSource entities {} have duplicate DisplayName '{}'.".format(
|
|
5231
|
+
", ".join(datasource_ids), name_text
|
|
5232
|
+
),
|
|
5233
|
+
sorted_data_sources,
|
|
5234
|
+
)
|
|
5235
|
+
)
|
|
5236
|
+
|
|
5237
|
+
if error_log:
|
|
5238
|
+
raise loggers.ElectionError(error_log)
|
|
5239
|
+
|
|
5240
|
+
|
|
5241
|
+
def _get_attribution_depth(element):
|
|
5242
|
+
"""Helper to recursively get the maximum depth of an Attribution tree."""
|
|
5243
|
+
children = element.findall("Attribution")
|
|
5244
|
+
if not children:
|
|
5245
|
+
return 1
|
|
5246
|
+
return 1 + max(_get_attribution_depth(child) for child in children)
|
|
5247
|
+
|
|
5248
|
+
|
|
5249
|
+
def _get_datasource_id(element):
|
|
5250
|
+
"""Helper to safely extract and strip the text of the DataSourceId child."""
|
|
5251
|
+
ds_element = element.find("DataSourceId")
|
|
5252
|
+
return ds_element.text.strip() if element_has_text(ds_element) else ""
|
|
5253
|
+
|
|
5254
|
+
|
|
5255
|
+
class AttributionDepthLimit(base.BaseRule):
|
|
5256
|
+
"""Checks that each top-level Attribution in a ResultsReportingStage has at most three levels of depth."""
|
|
5257
|
+
|
|
5258
|
+
def elements(self):
|
|
5259
|
+
return ["ResultsReportingStage"]
|
|
5260
|
+
|
|
5261
|
+
def check(self, element):
|
|
5262
|
+
error_log = []
|
|
5263
|
+
# This findall query is non-recursive and only returns direct children.
|
|
5264
|
+
for attribution in element.findall("Attribution"):
|
|
5265
|
+
depth = _get_attribution_depth(attribution)
|
|
5266
|
+
if depth > 3:
|
|
5267
|
+
data_source_id = _get_datasource_id(attribution)
|
|
5268
|
+
error_log.append(
|
|
5269
|
+
loggers.LogEntry(
|
|
5270
|
+
f"Attribution starting with DataSourceId '{data_source_id}'"
|
|
5271
|
+
f" has a depth of {depth}, exceeding the limit of 3.",
|
|
5272
|
+
[attribution],
|
|
5273
|
+
)
|
|
5274
|
+
)
|
|
5275
|
+
if error_log:
|
|
5276
|
+
raise loggers.ElectionError(error_log)
|
|
5277
|
+
|
|
5278
|
+
|
|
5279
|
+
def _canonicalize_cycle(cycle):
|
|
5280
|
+
"""Canonicalizes a cycle by rotating it to start with the min element."""
|
|
5281
|
+
if not cycle:
|
|
5282
|
+
return []
|
|
5283
|
+
min_node = min(cycle)
|
|
5284
|
+
min_idx = cycle.index(min_node)
|
|
5285
|
+
return cycle[min_idx:] + cycle[:min_idx]
|
|
5286
|
+
|
|
5287
|
+
|
|
5288
|
+
class AttributionContainsNoCycles(base.TreeRule):
|
|
5289
|
+
"""Checks that there are no cycles between Attribution elements."""
|
|
5290
|
+
|
|
5291
|
+
def check(self):
|
|
5292
|
+
graph = networkx.DiGraph()
|
|
5293
|
+
|
|
5294
|
+
attributions = self.get_elements_by_class(self.election_tree, "Attribution")
|
|
5295
|
+
for attribution in attributions:
|
|
5296
|
+
data_source = _get_datasource_id(attribution)
|
|
5297
|
+
if not data_source:
|
|
5298
|
+
continue
|
|
5299
|
+
graph.add_node(data_source)
|
|
5300
|
+
for child in attribution.findall("Attribution"):
|
|
5301
|
+
child_data_source = _get_datasource_id(child)
|
|
5302
|
+
if child_data_source:
|
|
5303
|
+
graph.add_edge(data_source, child_data_source)
|
|
5304
|
+
|
|
5305
|
+
# Find all nodes in the graph that are part of a cycle based on directed
|
|
5306
|
+
# paths.
|
|
5307
|
+
nodes_in_a_cycle = set()
|
|
5308
|
+
for component in networkx.strongly_connected_components(graph):
|
|
5309
|
+
# If the component has multiple nodes, it must be a cycle.
|
|
5310
|
+
if len(component) > 1:
|
|
5311
|
+
nodes_in_a_cycle.update(component)
|
|
5312
|
+
continue
|
|
5313
|
+
# Only include single-node components if they have a self-loop.
|
|
5314
|
+
(node,) = component
|
|
5315
|
+
if graph.has_edge(node, node):
|
|
5316
|
+
nodes_in_a_cycle.add(node)
|
|
5317
|
+
|
|
5318
|
+
if not nodes_in_a_cycle:
|
|
5319
|
+
return
|
|
5320
|
+
|
|
5321
|
+
error_log = []
|
|
5322
|
+
# Optimize performance for large graphs by excluding non-cyclic nodes.
|
|
5323
|
+
all_cycles_subgraph = graph.subgraph(sorted(nodes_in_a_cycle))
|
|
5324
|
+
for cycle in networkx.simple_cycles(all_cycles_subgraph):
|
|
5325
|
+
canonical = _canonicalize_cycle(cycle)
|
|
5326
|
+
# Append the starting node to the end to represent a closed loop.
|
|
5327
|
+
error_log.append(
|
|
5328
|
+
loggers.LogEntry(
|
|
5329
|
+
f"Cycle detected in Attribution: {' -> '.join(canonical)} ->"
|
|
5330
|
+
f" {canonical[0]}"
|
|
5331
|
+
)
|
|
5332
|
+
)
|
|
5333
|
+
raise loggers.ElectionError(error_log)
|
|
5334
|
+
|
|
5335
|
+
|
|
4879
5336
|
class RuleSet(enum.Enum):
|
|
4880
5337
|
"""Names for sets of rules used to validate a particular feed type."""
|
|
4881
5338
|
|
|
@@ -4888,6 +5345,144 @@ class RuleSet(enum.Enum):
|
|
|
4888
5345
|
VOTER_INFORMATION = 7
|
|
4889
5346
|
|
|
4890
5347
|
|
|
5348
|
+
class ValidateSpecialBallotSelectionCountedInTotal(base.BaseRule):
|
|
5349
|
+
"""Enforces constraints on CountedInTotal for SpecialBallotSelections.
|
|
5350
|
+
|
|
5351
|
+
More specifically, BlankBallotSelection, NullBallotSelection, and
|
|
5352
|
+
NoneOfTheAboveSelection must have an explicit value for CountedInTotal, and
|
|
5353
|
+
AggregateBallotSelection must not have this set.
|
|
5354
|
+
"""
|
|
5355
|
+
|
|
5356
|
+
def elements(self):
|
|
5357
|
+
return [
|
|
5358
|
+
"BlankBallotSelection",
|
|
5359
|
+
"NullBallotSelection",
|
|
5360
|
+
"NoneOfTheAboveBallotSelection",
|
|
5361
|
+
"AggregateBallotSelection",
|
|
5362
|
+
]
|
|
5363
|
+
|
|
5364
|
+
def check(self, element):
|
|
5365
|
+
counted_in_total = element.find("CountedInTotal")
|
|
5366
|
+
tag = element.tag
|
|
5367
|
+
|
|
5368
|
+
if tag in (
|
|
5369
|
+
"BlankBallotSelection",
|
|
5370
|
+
"NullBallotSelection",
|
|
5371
|
+
"NoneOfTheAboveBallotSelection",
|
|
5372
|
+
) and not element_has_text(counted_in_total):
|
|
5373
|
+
raise loggers.ElectionError.from_message(
|
|
5374
|
+
f"{tag} must have an explicit value for CountedInTotal.",
|
|
5375
|
+
[element],
|
|
5376
|
+
)
|
|
5377
|
+
elif tag == "AggregateBallotSelection" and counted_in_total is not None:
|
|
5378
|
+
raise loggers.ElectionError.from_message(
|
|
5379
|
+
"AggregateBallotSelection must not have CountedInTotal set.",
|
|
5380
|
+
[element],
|
|
5381
|
+
)
|
|
5382
|
+
|
|
5383
|
+
|
|
5384
|
+
class ValidateIncludeInAggregationBallotSelections(base.BaseRule):
|
|
5385
|
+
"""Validates BallotSelections with IncludedInAggregation.
|
|
5386
|
+
|
|
5387
|
+
Checks that the sum of all vote counts for a BallotSelection with
|
|
5388
|
+
IncludedInAggregation must not be > the total vote counts for the
|
|
5389
|
+
AggregateBallotSelection on that same Contest for the same vote count type.
|
|
5390
|
+
Also requires that if IncludedInAggregation is set on any BallotSelection then
|
|
5391
|
+
the AggregateBallotSelection must also be present on that Contest.
|
|
5392
|
+
"""
|
|
5393
|
+
|
|
5394
|
+
def elements(self):
|
|
5395
|
+
return ["CandidateContest", "PartyContest"]
|
|
5396
|
+
|
|
5397
|
+
def _gather_vote_counts(self, element):
|
|
5398
|
+
"""Gathers vote counts from a selection element grouped by type."""
|
|
5399
|
+
count_by_type_and_gp_unit = collections.defaultdict(float)
|
|
5400
|
+
vote_counts_collection = element.find("VoteCountsCollection")
|
|
5401
|
+
if vote_counts_collection is None:
|
|
5402
|
+
return count_by_type_and_gp_unit
|
|
5403
|
+
|
|
5404
|
+
for vote_counts in vote_counts_collection.findall("VoteCounts"):
|
|
5405
|
+
count_element = vote_counts.find("Count")
|
|
5406
|
+
if not element_has_text(count_element):
|
|
5407
|
+
continue
|
|
5408
|
+
count = float(count_element.text)
|
|
5409
|
+
|
|
5410
|
+
vote_count_type = _get_type_or_other_type(vote_counts)
|
|
5411
|
+
gp_unit_id_element = vote_counts.find("GpUnitId")
|
|
5412
|
+
gp_unit_id = (
|
|
5413
|
+
gp_unit_id_element.text.strip()
|
|
5414
|
+
if element_has_text(gp_unit_id_element)
|
|
5415
|
+
else ""
|
|
5416
|
+
)
|
|
5417
|
+
|
|
5418
|
+
type_and_gp_unit = (vote_count_type, gp_unit_id)
|
|
5419
|
+
count_by_type_and_gp_unit[type_and_gp_unit] += count
|
|
5420
|
+
|
|
5421
|
+
return count_by_type_and_gp_unit
|
|
5422
|
+
|
|
5423
|
+
def check(self, element):
|
|
5424
|
+
contest_id = element.get("objectId")
|
|
5425
|
+
|
|
5426
|
+
candidate_selections = self.get_elements_by_class(
|
|
5427
|
+
element, "CandidateSelection"
|
|
5428
|
+
)
|
|
5429
|
+
party_selections = self.get_elements_by_class(element, "PartySelection")
|
|
5430
|
+
all_selections = candidate_selections + party_selections
|
|
5431
|
+
|
|
5432
|
+
included_selections = []
|
|
5433
|
+
for selection in all_selections:
|
|
5434
|
+
included_in_aggregation_element = selection.find("IncludedInAggregation")
|
|
5435
|
+
if (
|
|
5436
|
+
element_has_text(included_in_aggregation_element)
|
|
5437
|
+
and included_in_aggregation_element.text in _XML_TRUE_VALUES
|
|
5438
|
+
):
|
|
5439
|
+
included_selections.append(selection)
|
|
5440
|
+
if not included_selections:
|
|
5441
|
+
return
|
|
5442
|
+
|
|
5443
|
+
aggregate_selection = element.find("AggregateBallotSelection")
|
|
5444
|
+
if aggregate_selection is None:
|
|
5445
|
+
raise loggers.ElectionError.from_message(
|
|
5446
|
+
f"Contest {contest_id} has selections marked as IncludedInAggregation"
|
|
5447
|
+
" but is missing AggregateBallotSelection.",
|
|
5448
|
+
[element],
|
|
5449
|
+
)
|
|
5450
|
+
|
|
5451
|
+
aggregate_count_by_type_and_gp_unit = self._gather_vote_counts(
|
|
5452
|
+
aggregate_selection
|
|
5453
|
+
)
|
|
5454
|
+
|
|
5455
|
+
selections_count_sum_by_type_and_gp_unit = collections.defaultdict(float)
|
|
5456
|
+
for selection in included_selections:
|
|
5457
|
+
for (
|
|
5458
|
+
type_and_gp_unit,
|
|
5459
|
+
count,
|
|
5460
|
+
) in self._gather_vote_counts(selection).items():
|
|
5461
|
+
selections_count_sum_by_type_and_gp_unit[type_and_gp_unit] += count
|
|
5462
|
+
|
|
5463
|
+
error_log = []
|
|
5464
|
+
for (
|
|
5465
|
+
type_and_gp_unit,
|
|
5466
|
+
total_count,
|
|
5467
|
+
) in selections_count_sum_by_type_and_gp_unit.items():
|
|
5468
|
+
aggregate_count = aggregate_count_by_type_and_gp_unit[type_and_gp_unit]
|
|
5469
|
+
if total_count > aggregate_count:
|
|
5470
|
+
resolved_type, gp_unit_id = type_and_gp_unit
|
|
5471
|
+
error_log.append(
|
|
5472
|
+
loggers.LogEntry(
|
|
5473
|
+
f"In Contest {contest_id}, the sum of vote counts"
|
|
5474
|
+
f" ({total_count}) for selections marked as"
|
|
5475
|
+
" IncludedInAggregation exceeds the vote count"
|
|
5476
|
+
f" ({aggregate_count}) for the AggregateBallotSelection for"
|
|
5477
|
+
f" vote count type='{resolved_type}' (GpUnit: '{gp_unit_id}').",
|
|
5478
|
+
[element],
|
|
5479
|
+
)
|
|
5480
|
+
)
|
|
5481
|
+
|
|
5482
|
+
if error_log:
|
|
5483
|
+
raise loggers.ElectionError(error_log)
|
|
5484
|
+
|
|
5485
|
+
|
|
4891
5486
|
# To add new rules, create a new class, inherit the base rule,
|
|
4892
5487
|
# and add it to the correct rule list.
|
|
4893
5488
|
COMMON_RULES = (
|
|
@@ -4948,6 +5543,8 @@ COMMON_RULES = (
|
|
|
4948
5543
|
|
|
4949
5544
|
ELECTION_RULES = COMMON_RULES + (
|
|
4950
5545
|
# go/keep-sorted start
|
|
5546
|
+
AttributionContainsNoCycles,
|
|
5547
|
+
AttributionDepthLimit,
|
|
4951
5548
|
BallotTitle,
|
|
4952
5549
|
CandidateContestTypesAreCompatible,
|
|
4953
5550
|
CandidatesReferencedInRelatedContests,
|
|
@@ -4980,16 +5577,25 @@ ELECTION_RULES = COMMON_RULES + (
|
|
|
4980
5577
|
MissingPartyNameTranslation,
|
|
4981
5578
|
MultipleCandidatesPointToTheSamePersonInTheSameContest,
|
|
4982
5579
|
MultipleInternationalizedTextWithSameLanguageCode,
|
|
5580
|
+
NotEmptyUniqueDataSourceUris,
|
|
4983
5581
|
OfficeHasjurisdictionSameAsElectoralDistrict,
|
|
4984
5582
|
PartisanPrimary,
|
|
4985
5583
|
PartisanPrimaryHeuristic,
|
|
4986
5584
|
PercentSum,
|
|
4987
5585
|
ProperBallotSelection,
|
|
5586
|
+
ResultsReportingStagesMustHaveUniqueType,
|
|
4988
5587
|
SelfDeclaredCandidateMethod,
|
|
4989
5588
|
SingularPartySelection,
|
|
4990
5589
|
SubsequentContestIdIsValidRelatedContest,
|
|
5590
|
+
UniqueDataSourceDisplayNames,
|
|
5591
|
+
UniqueDataSourceLanguages,
|
|
4991
5592
|
ValidateDuplicateColors,
|
|
5593
|
+
ValidateIncludeInAggregationBallotSelections,
|
|
4992
5594
|
ValidateInfoUriAnnotation,
|
|
5595
|
+
ValidatePollsCloseDatetimes,
|
|
5596
|
+
ValidateResultsEmbargoEnd,
|
|
5597
|
+
ValidateResultsExpected,
|
|
5598
|
+
ValidateSpecialBallotSelectionCountedInTotal,
|
|
4993
5599
|
VoteCountTypesCoherency,
|
|
4994
5600
|
VoteCountValidSeatsDeltaTypes,
|
|
4995
5601
|
WinnerCountLimit,
|
|
@@ -5040,6 +5646,7 @@ METADATA_RULES = (
|
|
|
5040
5646
|
Schema,
|
|
5041
5647
|
SourceDirPathMustBeSetAfterInitialDeliveryDate,
|
|
5042
5648
|
SourceDirPathsAreUnique,
|
|
5649
|
+
SqsQueueNameRequiresS3SourceDirPath,
|
|
5043
5650
|
UniqueLabel,
|
|
5044
5651
|
# go/keep-sorted end
|
|
5045
5652
|
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|