civics-cdf-validator 1.43.dev4__tar.gz → 1.43.dev6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {civics_cdf_validator-1.43.dev4/civics_cdf_validator.egg-info → civics_cdf_validator-1.43.dev6}/PKG-INFO +2 -1
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/README.md +7 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6/civics_cdf_validator.egg-info}/PKG-INFO +2 -1
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/civics_cdf_validator.egg-info/requires.txt +1 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/rules.py +218 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/setup.py +1 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/validator.py +1 -7
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/version.py +1 -1
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/CONTRIBUTING.md +0 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/LICENSE-2.0.txt +0 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/MANIFEST.in +0 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/__init__.py +0 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/base.py +0 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/civics_cdf_validator.egg-info/SOURCES.txt +0 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/civics_cdf_validator.egg-info/dependency_links.txt +0 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/civics_cdf_validator.egg-info/entry_points.txt +0 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/civics_cdf_validator.egg-info/top_level.txt +0 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/gpunit_rules.py +0 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/loggers.py +0 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/office_utils.py +0 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/setup.cfg +0 -0
- {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/stats.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: civics_cdf_validator
|
|
3
|
-
Version: 1.43.
|
|
3
|
+
Version: 1.43.dev6
|
|
4
4
|
Summary: Checks if an election feed follows best practices
|
|
5
5
|
Home-page: https://github.com/google/civics_cdf_validator
|
|
6
6
|
Author: Google Civics
|
|
@@ -15,6 +15,7 @@ Requires-Dist: requests>=2.10
|
|
|
15
15
|
Requires-Dist: pygithub>=1.28
|
|
16
16
|
Requires-Dist: networkx>=2.6.3
|
|
17
17
|
Requires-Dist: pycountry==22.1.10
|
|
18
|
+
Requires-Dist: frozendict>=2.4.4
|
|
18
19
|
Requires-Dist: six
|
|
19
20
|
|
|
20
21
|
civics_cdf_validator is a script that checks if an election data feed follows best practices and outputs errors, warnings and info messages for common issues.
|
|
@@ -32,6 +32,7 @@ You can use `civics_cdf_validator` to check different types of feed:
|
|
|
32
32
|
* Results
|
|
33
33
|
* Committee
|
|
34
34
|
* Election Dates
|
|
35
|
+
* Metadata
|
|
35
36
|
|
|
36
37
|
## List rules
|
|
37
38
|
|
|
@@ -81,6 +82,12 @@ default). Examples:
|
|
|
81
82
|
civics_cdf_validator validate results_file.xml --xsd civics_cdf_spec.xsd --rule_set election_results
|
|
82
83
|
```
|
|
83
84
|
|
|
85
|
+
* Validate a metadata feed:
|
|
86
|
+
|
|
87
|
+
```
|
|
88
|
+
civics_cdf_validator validate metadata_file.xml --xsd metadata_spec.xsd --rule_set metadata
|
|
89
|
+
```
|
|
90
|
+
|
|
84
91
|
One can choose to only validate one or more comma separated rules by using the `-i` flag
|
|
85
92
|
|
|
86
93
|
```
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: civics_cdf_validator
|
|
3
|
-
Version: 1.43.
|
|
3
|
+
Version: 1.43.dev6
|
|
4
4
|
Summary: Checks if an election feed follows best practices
|
|
5
5
|
Home-page: https://github.com/google/civics_cdf_validator
|
|
6
6
|
Author: Google Civics
|
|
@@ -15,6 +15,7 @@ Requires-Dist: requests>=2.10
|
|
|
15
15
|
Requires-Dist: pygithub>=1.28
|
|
16
16
|
Requires-Dist: networkx>=2.6.3
|
|
17
17
|
Requires-Dist: pycountry==22.1.10
|
|
18
|
+
Requires-Dist: frozendict>=2.4.4
|
|
18
19
|
Requires-Dist: six
|
|
19
20
|
|
|
20
21
|
civics_cdf_validator is a script that checks if an election data feed follows best practices and outputs errors, warnings and info messages for common issues.
|
|
@@ -25,9 +25,11 @@ from civics_cdf_validator import base
|
|
|
25
25
|
from civics_cdf_validator import gpunit_rules
|
|
26
26
|
from civics_cdf_validator import loggers
|
|
27
27
|
from civics_cdf_validator import office_utils
|
|
28
|
+
from frozendict import frozendict
|
|
28
29
|
import language_tags
|
|
29
30
|
from lxml import etree
|
|
30
31
|
import networkx
|
|
32
|
+
import pycountry
|
|
31
33
|
from six.moves.urllib.parse import urlparse
|
|
32
34
|
|
|
33
35
|
_PARTY_LEADERSHIP_TYPES = ["party-leader-id", "party-chair-id"]
|
|
@@ -75,6 +77,14 @@ _EXECUTIVE_OFFICE_ROLES = frozenset([
|
|
|
75
77
|
"deputy head of government",
|
|
76
78
|
])
|
|
77
79
|
|
|
80
|
+
_VALID_FEED_LONGEVITY_BY_FEED_TYPE = frozendict({
|
|
81
|
+
"committee": ["evergreen"],
|
|
82
|
+
"election-dates": ["evergreen"],
|
|
83
|
+
"election-results": ["limited", "yearly"],
|
|
84
|
+
"officeholder": ["evergreen"],
|
|
85
|
+
"pre-election": ["limited", "yearly"],
|
|
86
|
+
})
|
|
87
|
+
|
|
78
88
|
|
|
79
89
|
def _is_executive_office(office_roles):
|
|
80
90
|
return not _EXECUTIVE_OFFICE_ROLES.isdisjoint(office_roles)
|
|
@@ -179,6 +189,14 @@ def element_has_text(element):
|
|
|
179
189
|
and not element.text.isspace())
|
|
180
190
|
|
|
181
191
|
|
|
192
|
+
def country_code_is_valid(country_code):
|
|
193
|
+
# EU is part of ISO 3166/MA
|
|
194
|
+
return (
|
|
195
|
+
country_code == "eu"
|
|
196
|
+
or pycountry.countries.get(alpha_2=country_code) is not None
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
|
|
182
200
|
class Schema(base.TreeRule):
|
|
183
201
|
"""Checks if election file validates against the provided schema."""
|
|
184
202
|
|
|
@@ -3573,6 +3591,199 @@ class AffiliationHasEitherPartyOrPerson(base.BaseRule):
|
|
|
3573
3591
|
)
|
|
3574
3592
|
|
|
3575
3593
|
|
|
3594
|
+
class FeedTypeHasValidFeedLongevity(base.BaseRule):
|
|
3595
|
+
"""Feeds types should have valid corresponding FeedLongevity."""
|
|
3596
|
+
|
|
3597
|
+
def elements(self):
|
|
3598
|
+
return ["Feed"]
|
|
3599
|
+
|
|
3600
|
+
def check(self, element):
|
|
3601
|
+
feed_type_element = element.find("FeedType")
|
|
3602
|
+
feed_longevity_element = element.find("FeedLongevity")
|
|
3603
|
+
if element_has_text(feed_type_element) and element_has_text(
|
|
3604
|
+
feed_longevity_element
|
|
3605
|
+
):
|
|
3606
|
+
feed_type = feed_type_element.text.lower().replace("_", "-")
|
|
3607
|
+
feed_longevity = feed_longevity_element.text.lower().replace("_", "-")
|
|
3608
|
+
if (
|
|
3609
|
+
feed_type in _VALID_FEED_LONGEVITY_BY_FEED_TYPE
|
|
3610
|
+
and feed_longevity
|
|
3611
|
+
not in _VALID_FEED_LONGEVITY_BY_FEED_TYPE[feed_type]
|
|
3612
|
+
):
|
|
3613
|
+
raise loggers.ElectionError.from_message(
|
|
3614
|
+
"Feed type {} has invalid feed longevity {}. Valid feed"
|
|
3615
|
+
" longevities for this type are {}".format(
|
|
3616
|
+
feed_type,
|
|
3617
|
+
feed_longevity,
|
|
3618
|
+
_VALID_FEED_LONGEVITY_BY_FEED_TYPE[feed_type],
|
|
3619
|
+
),
|
|
3620
|
+
[element],
|
|
3621
|
+
)
|
|
3622
|
+
|
|
3623
|
+
|
|
3624
|
+
class FeedIdsAreUnique(base.BaseRule):
|
|
3625
|
+
"""FeedId should be unique."""
|
|
3626
|
+
|
|
3627
|
+
def elements(self):
|
|
3628
|
+
return ["FeedCollection"]
|
|
3629
|
+
|
|
3630
|
+
def check(self, element):
|
|
3631
|
+
feed_ids = set()
|
|
3632
|
+
error_log = []
|
|
3633
|
+
for feed_element in element.findall("Feed"):
|
|
3634
|
+
if element_has_text(feed_element.find("FeedId")):
|
|
3635
|
+
feed_id = feed_element.find("FeedId").text
|
|
3636
|
+
if feed_id in feed_ids:
|
|
3637
|
+
msg = (
|
|
3638
|
+
"FeedId {} appears multiple times in the metadata feed. Feed ids"
|
|
3639
|
+
" must be unique.".format(feed_id)
|
|
3640
|
+
)
|
|
3641
|
+
error_log.append(
|
|
3642
|
+
loggers.LogEntry(
|
|
3643
|
+
msg,
|
|
3644
|
+
[feed_element],
|
|
3645
|
+
)
|
|
3646
|
+
)
|
|
3647
|
+
feed_ids.add(feed_id)
|
|
3648
|
+
|
|
3649
|
+
if error_log:
|
|
3650
|
+
raise loggers.ElectionError(error_log)
|
|
3651
|
+
|
|
3652
|
+
|
|
3653
|
+
class SourceDirPathsAreUnique(base.BaseRule):
|
|
3654
|
+
"""All SourceDirPaths should be unique."""
|
|
3655
|
+
|
|
3656
|
+
def elements(self):
|
|
3657
|
+
return ["FeedCollection"]
|
|
3658
|
+
|
|
3659
|
+
def check(self, element):
|
|
3660
|
+
source_dir_paths = set()
|
|
3661
|
+
error_log = []
|
|
3662
|
+
for feed_element in element.findall("Feed"):
|
|
3663
|
+
if element_has_text(feed_element.find("SourceDirPath")):
|
|
3664
|
+
source_dir_path = feed_element.find("SourceDirPath").text
|
|
3665
|
+
if source_dir_path in source_dir_paths:
|
|
3666
|
+
msg = (
|
|
3667
|
+
"SourceDirPath {} appears multiple times in the metadata feed."
|
|
3668
|
+
" SourceDirPaths must be unique.".format(source_dir_path)
|
|
3669
|
+
)
|
|
3670
|
+
error_log.append(
|
|
3671
|
+
loggers.LogEntry(
|
|
3672
|
+
msg,
|
|
3673
|
+
[feed_element],
|
|
3674
|
+
)
|
|
3675
|
+
)
|
|
3676
|
+
source_dir_paths.add(source_dir_path)
|
|
3677
|
+
|
|
3678
|
+
if error_log:
|
|
3679
|
+
raise loggers.ElectionError(error_log)
|
|
3680
|
+
|
|
3681
|
+
|
|
3682
|
+
class ElectionEventDatesAreSequential(base.DateRule):
|
|
3683
|
+
"""Dates in an ElectionEvent element should be sequential."""
|
|
3684
|
+
|
|
3685
|
+
def elements(self):
|
|
3686
|
+
return ["ElectionEvent"]
|
|
3687
|
+
|
|
3688
|
+
def check(self, element):
|
|
3689
|
+
self.reset_instance_vars()
|
|
3690
|
+
self.gather_dates(element)
|
|
3691
|
+
self.check_end_after_start()
|
|
3692
|
+
if element_has_text(element.find("FullDeliveryDate")) and self.start_date:
|
|
3693
|
+
full_delivery_date = base.PartialDate.init_partial_date(
|
|
3694
|
+
element.find("FullDeliveryDate").text
|
|
3695
|
+
)
|
|
3696
|
+
date_delta = self.start_date.is_older_than(full_delivery_date)
|
|
3697
|
+
if date_delta > 0:
|
|
3698
|
+
self.error_log.append(
|
|
3699
|
+
loggers.LogEntry(
|
|
3700
|
+
"StartDate is older than FullDeliveryDate",
|
|
3701
|
+
[element],
|
|
3702
|
+
)
|
|
3703
|
+
)
|
|
3704
|
+
if element_has_text(
|
|
3705
|
+
element.find("InitialDeliveryDate")
|
|
3706
|
+
) and element_has_text(element.find("FullDeliveryDate")):
|
|
3707
|
+
initial_delivery_date = base.PartialDate.init_partial_date(
|
|
3708
|
+
element.find("InitialDeliveryDate").text
|
|
3709
|
+
)
|
|
3710
|
+
full_delivery_date = base.PartialDate.init_partial_date(
|
|
3711
|
+
element.find("FullDeliveryDate").text
|
|
3712
|
+
)
|
|
3713
|
+
date_delta = full_delivery_date.is_older_than(initial_delivery_date)
|
|
3714
|
+
if date_delta > 0:
|
|
3715
|
+
self.error_log.append(
|
|
3716
|
+
loggers.LogEntry(
|
|
3717
|
+
"FullDeliveryDate is older than InitialDeliveryDate",
|
|
3718
|
+
[element],
|
|
3719
|
+
)
|
|
3720
|
+
)
|
|
3721
|
+
|
|
3722
|
+
if self.error_log:
|
|
3723
|
+
raise loggers.ElectionError(self.error_log)
|
|
3724
|
+
|
|
3725
|
+
|
|
3726
|
+
class OfficeHolderSubFeedDatesAreSequential(base.DateRule):
|
|
3727
|
+
"""Dates in an OfficeHolderSubFeed element should be sequential."""
|
|
3728
|
+
|
|
3729
|
+
def elements(self):
|
|
3730
|
+
return ["OfficeHolderSubFeed"]
|
|
3731
|
+
|
|
3732
|
+
def check(self, element):
|
|
3733
|
+
if element_has_text(
|
|
3734
|
+
element.find("InitialDeliveryDate")
|
|
3735
|
+
) and element_has_text(element.find("FullDeliveryDate")):
|
|
3736
|
+
initial_delivery_date = base.PartialDate.init_partial_date(
|
|
3737
|
+
element.find("InitialDeliveryDate").text
|
|
3738
|
+
)
|
|
3739
|
+
full_delivery_date = base.PartialDate.init_partial_date(
|
|
3740
|
+
element.find("FullDeliveryDate").text
|
|
3741
|
+
)
|
|
3742
|
+
date_delta = full_delivery_date.is_older_than(initial_delivery_date)
|
|
3743
|
+
if date_delta > 0:
|
|
3744
|
+
raise loggers.ElectionError.from_message(
|
|
3745
|
+
"FullDeliveryDate is older than InitialDeliveryDate",
|
|
3746
|
+
[element],
|
|
3747
|
+
)
|
|
3748
|
+
|
|
3749
|
+
|
|
3750
|
+
class FeedHasValidCountryCode(base.BaseRule):
|
|
3751
|
+
"""Feeds should have valid country code."""
|
|
3752
|
+
|
|
3753
|
+
def elements(self):
|
|
3754
|
+
return ["Feed"]
|
|
3755
|
+
|
|
3756
|
+
def check(self, element):
|
|
3757
|
+
country_code_element = element.find("CountryCode")
|
|
3758
|
+
if element_has_text(country_code_element):
|
|
3759
|
+
country_code = country_code_element.text.upper()
|
|
3760
|
+
if not country_code_is_valid(country_code):
|
|
3761
|
+
raise loggers.ElectionError.from_message(
|
|
3762
|
+
"Invalid country code {}.".format(country_code),
|
|
3763
|
+
[element],
|
|
3764
|
+
)
|
|
3765
|
+
|
|
3766
|
+
|
|
3767
|
+
class FeedInactiveDateSetForNonEvergreenFeed(base.BaseRule):
|
|
3768
|
+
"""All non-evergreen feeds should have a FeedInactiveDate set."""
|
|
3769
|
+
|
|
3770
|
+
def elements(self):
|
|
3771
|
+
return ["Feed"]
|
|
3772
|
+
|
|
3773
|
+
def check(self, element):
|
|
3774
|
+
feed_longevity = element.find("FeedLongevity")
|
|
3775
|
+
if (
|
|
3776
|
+
element_has_text(feed_longevity)
|
|
3777
|
+
and feed_longevity.text.lower() != "evergreen"
|
|
3778
|
+
and not element_has_text(element.find("FeedInactiveDate"))
|
|
3779
|
+
):
|
|
3780
|
+
raise loggers.ElectionError.from_message(
|
|
3781
|
+
"FeedInactiveDate is not set for non-evergreen feed with FeedId {}."
|
|
3782
|
+
.format(element.find("FeedId").text),
|
|
3783
|
+
[element],
|
|
3784
|
+
)
|
|
3785
|
+
|
|
3786
|
+
|
|
3576
3787
|
class UnreferencedEntitiesBase(base.TreeRule):
|
|
3577
3788
|
"""All non-top-level entities in a feed should be referenced by at least one other entity.
|
|
3578
3789
|
|
|
@@ -3831,6 +4042,13 @@ METADATA_RULES = (
|
|
|
3831
4042
|
Encoding,
|
|
3832
4043
|
OptionalAndEmpty,
|
|
3833
4044
|
UniqueLabel,
|
|
4045
|
+
FeedTypeHasValidFeedLongevity,
|
|
4046
|
+
FeedIdsAreUnique,
|
|
4047
|
+
SourceDirPathsAreUnique,
|
|
4048
|
+
ElectionEventDatesAreSequential,
|
|
4049
|
+
OfficeHolderSubFeedDatesAreSequential,
|
|
4050
|
+
FeedHasValidCountryCode,
|
|
4051
|
+
FeedInactiveDateSetForNonEvergreenFeed,
|
|
3834
4052
|
)
|
|
3835
4053
|
|
|
3836
4054
|
ALL_RULES = frozenset(
|
|
@@ -33,7 +33,6 @@ from civics_cdf_validator import gpunit_rules
|
|
|
33
33
|
from civics_cdf_validator import loggers
|
|
34
34
|
from civics_cdf_validator import rules
|
|
35
35
|
from civics_cdf_validator import version
|
|
36
|
-
import pycountry
|
|
37
36
|
|
|
38
37
|
_REGISTRY_KEY = "registry"
|
|
39
38
|
_METADATA_KEY = "metadata"
|
|
@@ -82,14 +81,9 @@ def _validate_country_codes(parser, arg):
|
|
|
82
81
|
"""
|
|
83
82
|
country_code = arg.strip().lower()
|
|
84
83
|
|
|
85
|
-
|
|
86
|
-
if country_code == "eu":
|
|
84
|
+
if rules.country_code_is_valid(country_code):
|
|
87
85
|
return country_code
|
|
88
86
|
|
|
89
|
-
for country in pycountry.countries:
|
|
90
|
-
if country_code == country.alpha_2.lower():
|
|
91
|
-
return country_code
|
|
92
|
-
|
|
93
87
|
parser.error(
|
|
94
88
|
"Invalid country code. Please make sure it is listed under the officially"
|
|
95
89
|
" assigned ISO 3166-1 alpha-2 codes."
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|