civics-cdf-validator 1.43.dev4__tar.gz → 1.43.dev6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {civics_cdf_validator-1.43.dev4/civics_cdf_validator.egg-info → civics_cdf_validator-1.43.dev6}/PKG-INFO +2 -1
  2. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/README.md +7 -0
  3. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6/civics_cdf_validator.egg-info}/PKG-INFO +2 -1
  4. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/civics_cdf_validator.egg-info/requires.txt +1 -0
  5. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/rules.py +218 -0
  6. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/setup.py +1 -0
  7. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/validator.py +1 -7
  8. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/version.py +1 -1
  9. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/CONTRIBUTING.md +0 -0
  10. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/LICENSE-2.0.txt +0 -0
  11. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/MANIFEST.in +0 -0
  12. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/__init__.py +0 -0
  13. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/base.py +0 -0
  14. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/civics_cdf_validator.egg-info/SOURCES.txt +0 -0
  15. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/civics_cdf_validator.egg-info/dependency_links.txt +0 -0
  16. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/civics_cdf_validator.egg-info/entry_points.txt +0 -0
  17. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/civics_cdf_validator.egg-info/top_level.txt +0 -0
  18. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/gpunit_rules.py +0 -0
  19. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/loggers.py +0 -0
  20. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/office_utils.py +0 -0
  21. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/setup.cfg +0 -0
  22. {civics_cdf_validator-1.43.dev4 → civics_cdf_validator-1.43.dev6}/stats.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: civics_cdf_validator
3
- Version: 1.43.dev4
3
+ Version: 1.43.dev6
4
4
  Summary: Checks if an election feed follows best practices
5
5
  Home-page: https://github.com/google/civics_cdf_validator
6
6
  Author: Google Civics
@@ -15,6 +15,7 @@ Requires-Dist: requests>=2.10
15
15
  Requires-Dist: pygithub>=1.28
16
16
  Requires-Dist: networkx>=2.6.3
17
17
  Requires-Dist: pycountry==22.1.10
18
+ Requires-Dist: frozendict>=2.4.4
18
19
  Requires-Dist: six
19
20
 
20
21
  civics_cdf_validator is a script that checks if an election data feed follows best practices and outputs errors, warnings and info messages for common issues.
@@ -32,6 +32,7 @@ You can use `civics_cdf_validator` to check different types of feed:
32
32
  * Results
33
33
  * Committee
34
34
  * Election Dates
35
+ * Metadata
35
36
 
36
37
  ## List rules
37
38
 
@@ -81,6 +82,12 @@ default). Examples:
81
82
  civics_cdf_validator validate results_file.xml --xsd civics_cdf_spec.xsd --rule_set election_results
82
83
  ```
83
84
 
85
+ * Validate a metadata feed:
86
+
87
+ ```
88
+ civics_cdf_validator validate metadata_file.xml --xsd metadata_spec.xsd --rule_set metadata
89
+ ```
90
+
84
91
  One can choose to only validate one or more comma separated rules by using the `-i` flag
85
92
 
86
93
  ```
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: civics_cdf_validator
3
- Version: 1.43.dev4
3
+ Version: 1.43.dev6
4
4
  Summary: Checks if an election feed follows best practices
5
5
  Home-page: https://github.com/google/civics_cdf_validator
6
6
  Author: Google Civics
@@ -15,6 +15,7 @@ Requires-Dist: requests>=2.10
15
15
  Requires-Dist: pygithub>=1.28
16
16
  Requires-Dist: networkx>=2.6.3
17
17
  Requires-Dist: pycountry==22.1.10
18
+ Requires-Dist: frozendict>=2.4.4
18
19
  Requires-Dist: six
19
20
 
20
21
  civics_cdf_validator is a script that checks if an election data feed follows best practices and outputs errors, warnings and info messages for common issues.
@@ -4,4 +4,5 @@ requests>=2.10
4
4
  pygithub>=1.28
5
5
  networkx>=2.6.3
6
6
  pycountry==22.1.10
7
+ frozendict>=2.4.4
7
8
  six
@@ -25,9 +25,11 @@ from civics_cdf_validator import base
25
25
  from civics_cdf_validator import gpunit_rules
26
26
  from civics_cdf_validator import loggers
27
27
  from civics_cdf_validator import office_utils
28
+ from frozendict import frozendict
28
29
  import language_tags
29
30
  from lxml import etree
30
31
  import networkx
32
+ import pycountry
31
33
  from six.moves.urllib.parse import urlparse
32
34
 
33
35
  _PARTY_LEADERSHIP_TYPES = ["party-leader-id", "party-chair-id"]
@@ -75,6 +77,14 @@ _EXECUTIVE_OFFICE_ROLES = frozenset([
75
77
  "deputy head of government",
76
78
  ])
77
79
 
80
+ _VALID_FEED_LONGEVITY_BY_FEED_TYPE = frozendict({
81
+ "committee": ["evergreen"],
82
+ "election-dates": ["evergreen"],
83
+ "election-results": ["limited", "yearly"],
84
+ "officeholder": ["evergreen"],
85
+ "pre-election": ["limited", "yearly"],
86
+ })
87
+
78
88
 
79
89
  def _is_executive_office(office_roles):
80
90
  return not _EXECUTIVE_OFFICE_ROLES.isdisjoint(office_roles)
@@ -179,6 +189,14 @@ def element_has_text(element):
179
189
  and not element.text.isspace())
180
190
 
181
191
 
192
+ def country_code_is_valid(country_code):
193
+ # EU is part of ISO 3166/MA
194
+ return (
195
+ country_code == "eu"
196
+ or pycountry.countries.get(alpha_2=country_code) is not None
197
+ )
198
+
199
+
182
200
  class Schema(base.TreeRule):
183
201
  """Checks if election file validates against the provided schema."""
184
202
 
@@ -3573,6 +3591,199 @@ class AffiliationHasEitherPartyOrPerson(base.BaseRule):
3573
3591
  )
3574
3592
 
3575
3593
 
3594
+ class FeedTypeHasValidFeedLongevity(base.BaseRule):
3595
+ """Feeds types should have valid corresponding FeedLongevity."""
3596
+
3597
+ def elements(self):
3598
+ return ["Feed"]
3599
+
3600
+ def check(self, element):
3601
+ feed_type_element = element.find("FeedType")
3602
+ feed_longevity_element = element.find("FeedLongevity")
3603
+ if element_has_text(feed_type_element) and element_has_text(
3604
+ feed_longevity_element
3605
+ ):
3606
+ feed_type = feed_type_element.text.lower().replace("_", "-")
3607
+ feed_longevity = feed_longevity_element.text.lower().replace("_", "-")
3608
+ if (
3609
+ feed_type in _VALID_FEED_LONGEVITY_BY_FEED_TYPE
3610
+ and feed_longevity
3611
+ not in _VALID_FEED_LONGEVITY_BY_FEED_TYPE[feed_type]
3612
+ ):
3613
+ raise loggers.ElectionError.from_message(
3614
+ "Feed type {} has invalid feed longevity {}. Valid feed"
3615
+ " longevities for this type are {}".format(
3616
+ feed_type,
3617
+ feed_longevity,
3618
+ _VALID_FEED_LONGEVITY_BY_FEED_TYPE[feed_type],
3619
+ ),
3620
+ [element],
3621
+ )
3622
+
3623
+
3624
+ class FeedIdsAreUnique(base.BaseRule):
3625
+ """FeedId should be unique."""
3626
+
3627
+ def elements(self):
3628
+ return ["FeedCollection"]
3629
+
3630
+ def check(self, element):
3631
+ feed_ids = set()
3632
+ error_log = []
3633
+ for feed_element in element.findall("Feed"):
3634
+ if element_has_text(feed_element.find("FeedId")):
3635
+ feed_id = feed_element.find("FeedId").text
3636
+ if feed_id in feed_ids:
3637
+ msg = (
3638
+ "FeedId {} appears multiple times in the metadata feed. Feed ids"
3639
+ " must be unique.".format(feed_id)
3640
+ )
3641
+ error_log.append(
3642
+ loggers.LogEntry(
3643
+ msg,
3644
+ [feed_element],
3645
+ )
3646
+ )
3647
+ feed_ids.add(feed_id)
3648
+
3649
+ if error_log:
3650
+ raise loggers.ElectionError(error_log)
3651
+
3652
+
3653
+ class SourceDirPathsAreUnique(base.BaseRule):
3654
+ """All SourceDirPaths should be unique."""
3655
+
3656
+ def elements(self):
3657
+ return ["FeedCollection"]
3658
+
3659
+ def check(self, element):
3660
+ source_dir_paths = set()
3661
+ error_log = []
3662
+ for feed_element in element.findall("Feed"):
3663
+ if element_has_text(feed_element.find("SourceDirPath")):
3664
+ source_dir_path = feed_element.find("SourceDirPath").text
3665
+ if source_dir_path in source_dir_paths:
3666
+ msg = (
3667
+ "SourceDirPath {} appears multiple times in the metadata feed."
3668
+ " SourceDirPaths must be unique.".format(source_dir_path)
3669
+ )
3670
+ error_log.append(
3671
+ loggers.LogEntry(
3672
+ msg,
3673
+ [feed_element],
3674
+ )
3675
+ )
3676
+ source_dir_paths.add(source_dir_path)
3677
+
3678
+ if error_log:
3679
+ raise loggers.ElectionError(error_log)
3680
+
3681
+
3682
+ class ElectionEventDatesAreSequential(base.DateRule):
3683
+ """Dates in an ElectionEvent element should be sequential."""
3684
+
3685
+ def elements(self):
3686
+ return ["ElectionEvent"]
3687
+
3688
+ def check(self, element):
3689
+ self.reset_instance_vars()
3690
+ self.gather_dates(element)
3691
+ self.check_end_after_start()
3692
+ if element_has_text(element.find("FullDeliveryDate")) and self.start_date:
3693
+ full_delivery_date = base.PartialDate.init_partial_date(
3694
+ element.find("FullDeliveryDate").text
3695
+ )
3696
+ date_delta = self.start_date.is_older_than(full_delivery_date)
3697
+ if date_delta > 0:
3698
+ self.error_log.append(
3699
+ loggers.LogEntry(
3700
+ "StartDate is older than FullDeliveryDate",
3701
+ [element],
3702
+ )
3703
+ )
3704
+ if element_has_text(
3705
+ element.find("InitialDeliveryDate")
3706
+ ) and element_has_text(element.find("FullDeliveryDate")):
3707
+ initial_delivery_date = base.PartialDate.init_partial_date(
3708
+ element.find("InitialDeliveryDate").text
3709
+ )
3710
+ full_delivery_date = base.PartialDate.init_partial_date(
3711
+ element.find("FullDeliveryDate").text
3712
+ )
3713
+ date_delta = full_delivery_date.is_older_than(initial_delivery_date)
3714
+ if date_delta > 0:
3715
+ self.error_log.append(
3716
+ loggers.LogEntry(
3717
+ "FullDeliveryDate is older than InitialDeliveryDate",
3718
+ [element],
3719
+ )
3720
+ )
3721
+
3722
+ if self.error_log:
3723
+ raise loggers.ElectionError(self.error_log)
3724
+
3725
+
3726
+ class OfficeHolderSubFeedDatesAreSequential(base.DateRule):
3727
+ """Dates in an OfficeHolderSubFeed element should be sequential."""
3728
+
3729
+ def elements(self):
3730
+ return ["OfficeHolderSubFeed"]
3731
+
3732
+ def check(self, element):
3733
+ if element_has_text(
3734
+ element.find("InitialDeliveryDate")
3735
+ ) and element_has_text(element.find("FullDeliveryDate")):
3736
+ initial_delivery_date = base.PartialDate.init_partial_date(
3737
+ element.find("InitialDeliveryDate").text
3738
+ )
3739
+ full_delivery_date = base.PartialDate.init_partial_date(
3740
+ element.find("FullDeliveryDate").text
3741
+ )
3742
+ date_delta = full_delivery_date.is_older_than(initial_delivery_date)
3743
+ if date_delta > 0:
3744
+ raise loggers.ElectionError.from_message(
3745
+ "FullDeliveryDate is older than InitialDeliveryDate",
3746
+ [element],
3747
+ )
3748
+
3749
+
3750
+ class FeedHasValidCountryCode(base.BaseRule):
3751
+ """Feeds should have valid country code."""
3752
+
3753
+ def elements(self):
3754
+ return ["Feed"]
3755
+
3756
+ def check(self, element):
3757
+ country_code_element = element.find("CountryCode")
3758
+ if element_has_text(country_code_element):
3759
+ country_code = country_code_element.text.upper()
3760
+ if not country_code_is_valid(country_code):
3761
+ raise loggers.ElectionError.from_message(
3762
+ "Invalid country code {}.".format(country_code),
3763
+ [element],
3764
+ )
3765
+
3766
+
3767
+ class FeedInactiveDateSetForNonEvergreenFeed(base.BaseRule):
3768
+ """All non-evergreen feeds should have a FeedInactiveDate set."""
3769
+
3770
+ def elements(self):
3771
+ return ["Feed"]
3772
+
3773
+ def check(self, element):
3774
+ feed_longevity = element.find("FeedLongevity")
3775
+ if (
3776
+ element_has_text(feed_longevity)
3777
+ and feed_longevity.text.lower() != "evergreen"
3778
+ and not element_has_text(element.find("FeedInactiveDate"))
3779
+ ):
3780
+ raise loggers.ElectionError.from_message(
3781
+ "FeedInactiveDate is not set for non-evergreen feed with FeedId {}."
3782
+ .format(element.find("FeedId").text),
3783
+ [element],
3784
+ )
3785
+
3786
+
3576
3787
  class UnreferencedEntitiesBase(base.TreeRule):
3577
3788
  """All non-top-level entities in a feed should be referenced by at least one other entity.
3578
3789
 
@@ -3831,6 +4042,13 @@ METADATA_RULES = (
3831
4042
  Encoding,
3832
4043
  OptionalAndEmpty,
3833
4044
  UniqueLabel,
4045
+ FeedTypeHasValidFeedLongevity,
4046
+ FeedIdsAreUnique,
4047
+ SourceDirPathsAreUnique,
4048
+ ElectionEventDatesAreSequential,
4049
+ OfficeHolderSubFeedDatesAreSequential,
4050
+ FeedHasValidCountryCode,
4051
+ FeedInactiveDateSetForNonEvergreenFeed,
3834
4052
  )
3835
4053
 
3836
4054
  ALL_RULES = frozenset(
@@ -58,6 +58,7 @@ setup(
58
58
  'pygithub>=1.28',
59
59
  'networkx>=2.6.3',
60
60
  'pycountry==22.1.10',
61
+ 'frozendict>=2.4.4',
61
62
  'six',
62
63
  ],
63
64
  setup_requires=['pytest-runner'],
@@ -33,7 +33,6 @@ from civics_cdf_validator import gpunit_rules
33
33
  from civics_cdf_validator import loggers
34
34
  from civics_cdf_validator import rules
35
35
  from civics_cdf_validator import version
36
- import pycountry
37
36
 
38
37
  _REGISTRY_KEY = "registry"
39
38
  _METADATA_KEY = "metadata"
@@ -82,14 +81,9 @@ def _validate_country_codes(parser, arg):
82
81
  """
83
82
  country_code = arg.strip().lower()
84
83
 
85
- # EU is part of ISO 3166/MA
86
- if country_code == "eu":
84
+ if rules.country_code_is_valid(country_code):
87
85
  return country_code
88
86
 
89
- for country in pycountry.countries:
90
- if country_code == country.alpha_2.lower():
91
- return country_code
92
-
93
87
  parser.error(
94
88
  "Invalid country code. Please make sure it is listed under the officially"
95
89
  " assigned ISO 3166-1 alpha-2 codes."
@@ -5,4 +5,4 @@ No dependencies should be added to this module.
5
5
  See https://packaging.python.org/guides/single-sourcing-package-version/
6
6
  """
7
7
 
8
- __version__ = '1.43.dev4'
8
+ __version__ = '1.43.dev6'