PyPI - followthemoney - Versions diffs - 4.3.0__py3-none-any.whl → 4.5.0__py3-none-any.whl - Mend

followthemoney 4.3.0py3-none-any.whl → 4.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

followthemoney/__init__.py +1 -1
followthemoney/compare.py +6 -0
followthemoney/dataset/dataset.py +18 -0
followthemoney/entity.py +29 -15
followthemoney/mapping/csv.py +3 -1
followthemoney/model.py +6 -5
followthemoney/property.py +23 -4
followthemoney/proxy.py +32 -11
followthemoney/schema/Company.yaml +5 -0
followthemoney/schema/CryptoWallet.yaml +4 -0
followthemoney/schema/Image.yaml +7 -0
followthemoney/schema/LegalEntity.yaml +10 -0
followthemoney/schema/Organization.yaml +5 -0
followthemoney/schema/Person.yaml +4 -0
followthemoney/schema/PublicBody.yaml +4 -0
followthemoney/schema/Thing.yaml +3 -2
followthemoney/schema.py +16 -2
followthemoney/settings.py +19 -0
followthemoney/statement/entity.py +31 -7
followthemoney/statement/serialize.py +18 -13
followthemoney/statement/statement.py +151 -42
followthemoney/statement/util.py +23 -2
followthemoney/types/address.py +3 -3
followthemoney/types/checksum.py +3 -3
followthemoney/types/country.py +19 -4
followthemoney/types/date.py +13 -3
followthemoney/types/entity.py +3 -3
followthemoney/types/gender.py +6 -6
followthemoney/types/identifier.py +8 -8
followthemoney/types/ip.py +3 -3
followthemoney/types/json.py +2 -2
followthemoney/types/language.py +3 -3
followthemoney/types/mimetype.py +3 -3
followthemoney/types/name.py +3 -3
followthemoney/types/number.py +2 -2
followthemoney/types/phone.py +3 -3
followthemoney/types/string.py +2 -2
followthemoney/types/topic.py +6 -3
followthemoney/types/url.py +3 -3
followthemoney/util.py +6 -14
{followthemoney-4.3.0.dist-info → followthemoney-4.5.0.dist-info}/METADATA +3 -3
{followthemoney-4.3.0.dist-info → followthemoney-4.5.0.dist-info}/RECORD +45 -44
{followthemoney-4.3.0.dist-info → followthemoney-4.5.0.dist-info}/WHEEL +1 -1
{followthemoney-4.3.0.dist-info → followthemoney-4.5.0.dist-info}/entry_points.txt +0 -0
{followthemoney-4.3.0.dist-info → followthemoney-4.5.0.dist-info}/licenses/LICENSE +0 -0

followthemoney/__init__.py CHANGED Viewed

@@ -9,7 +9,7 @@ from followthemoney.statement import Statement, StatementEntity, SE
 from followthemoney.dataset import Dataset, DefaultDataset, DS
 from followthemoney.util import set_model_locale
-__version__ = "4.3.0"
+__version__ = "4.5.0"
 # Data model singleton
 model = Model.instance()

followthemoney/compare.py CHANGED Viewed

@@ -77,6 +77,12 @@ def compare(
     weights: Weights = COMPARE_WEIGHTS,
 ) -> float:
     """Compare two entities and return a match score."""
+    if left.checksum == right.checksum:
+        # Check if there is any data at all (ie any basis for making a decision),
+        # if so, return a perfect match. This avoids marking two empty entities
+        # as matching. Bit ambiguous, but practical.
+        if len(left.properties) > 0 and len(right.properties) > 0:
+            return 1.0
     scores = compare_scores(left, right)
     return _compare(scores, weights)

followthemoney/dataset/dataset.py CHANGED Viewed

@@ -1,3 +1,4 @@
+from pathlib import Path
 import yaml
 import logging
 from functools import cached_property
@@ -38,6 +39,8 @@ class DatasetModel(BaseModel):
     coverage: DataCoverage | None = None
     resources: List[DataResource] = []
     children: Set[str] = set()
+    deprecation: Optional[str] = None
+    deprecated: bool = False
     @field_validator("name", mode="after")
     @classmethod
@@ -57,6 +60,18 @@ class DatasetModel(BaseModel):
             data["children"] = children
         return data
+    @model_validator(mode="after")
+    def evaluate_data(self) -> "DatasetModel":
+        # derive deprecated from deprecation notice:
+        if self.deprecation is not None:
+            self.deprecation = self.deprecation.strip()
+            if not len(self.deprecation):
+                self.deprecation = None
+        self.deprecated = self.deprecation is not None or self.deprecated
+        if self.deprecated and (self.coverage is None or self.coverage.end is None):
+            raise ValueError("Deprecated dataset coverage must have an end date.")
+        return self
     def get_resource(self, name: str) -> DataResource:
         for res in self.resources:
             if res.name == name:
@@ -121,10 +136,13 @@ class Dataset:
     ) -> DS:
         from followthemoney.dataset.catalog import DataCatalog
+        path = Path(path)
         with open(path, "r") as fh:
             data = yaml.safe_load(fh)
             if catalog is None:
                 catalog = DataCatalog(cls, {})
+            if "name" not in data:
+                data["name"] = path.stem
             return catalog.make_dataset(data)
     @classmethod

followthemoney/entity.py CHANGED Viewed

@@ -5,6 +5,7 @@ from rigour.names import pick_name
 from followthemoney.proxy import EntityProxy
 from followthemoney.schema import Schema
 from followthemoney.statement import BASE_ID, Statement
+from followthemoney.util import HASH_ENCODING
 VE = TypeVar("VE", bound="ValueEntity")
@@ -42,25 +43,28 @@ class ValueEntity(EntityProxy):
         key_prefix: Optional[str] = None,
         cleaned: bool = True,
     ):
+        self._caption: Optional[str] = data.pop("caption", None)
+        self.datasets: Set[str] = set(data.pop("datasets", []))
+        self.referents: Set[str] = set(data.pop("referents", []))
+        self.first_seen: Optional[str] = data.pop("first_seen", None)
+        self.last_seen: Optional[str] = data.pop("last_seen", None)
+        self.last_change: Optional[str] = data.pop("last_change", None)
         super().__init__(schema, data, key_prefix=key_prefix, cleaned=cleaned)
-        self._caption: Optional[str] = data.get("caption")
-        self.datasets: Set[str] = set(data.get("datasets", []))
-        self.referents: Set[str] = set(data.get("referents", []))
-        self.first_seen: Optional[str] = data.get("first_seen")
-        self.last_seen: Optional[str] = data.get("last_seen")
-        self.last_change: Optional[str] = data.get("last_change")
         # add data from statement dict if present.
         # this updates the dataset and referents set
         for stmt_data in data.pop("statements", []):
             stmt = Statement.from_dict(stmt_data)
+            prop = schema.get(stmt.prop)
+            if prop is None:
+                continue
             self.datasets.add(stmt.dataset)
             if stmt.schema != self.schema.name:
                 self.schema = schema.model.common_schema(self.schema, stmt.schema)
             if stmt.entity_id != self.id:
                 self.referents.add(stmt.entity_id)
             if stmt.prop != BASE_ID:
-                self.add(stmt.prop, stmt.value)
+                self.unsafe_add(prop, stmt.value, cleaned=cleaned)
     def merge(self: VE, other: EntityProxy) -> VE:
         merged = super().merge(other)
@@ -78,15 +82,25 @@ class ValueEntity(EntityProxy):
             merged.last_change = max(changed, default=None)
         return merged
+    @property
+    def checksum(self) -> str:
+        digest = self._checksum_digest()
+        for dataset in sorted(self.datasets):
+            digest.update(dataset.encode(HASH_ENCODING))
+            digest.update(b"\x1e")
+        for referent in sorted(self.referents):
+            digest.update(referent.encode(HASH_ENCODING))
+            digest.update(b"\x1e")
+        if self.last_change is not None:
+            digest.update(self.last_change.encode(HASH_ENCODING))
+        return digest.hexdigest()
     def to_dict(self) -> Dict[str, Any]:
-        data: Dict[str, Any] = {
-            "id": self.id,
-            "caption": self._caption or self.caption,
-            "schema": self.schema.name,
-            "properties": self.properties,
-            "referents": list(self.referents),
-            "datasets": list(self.datasets),
-        }
+        data = super().to_dict()
+        data["referents"] = list(self.referents)
+        data["datasets"] = list(self.datasets)
+        if self._caption is not None:
+            data["caption"] = self._caption
         if self.first_seen is not None:
             data["first_seen"] = self.first_seen
         if self.last_seen is not None:

followthemoney/mapping/csv.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, cast
 from typing import Any, Dict, Generator, ItemsView, Iterable, List, Optional, Set, Tuple
 from followthemoney.mapping.source import Record, Source
+from followthemoney.settings import USER_AGENT
 from followthemoney.util import sanitize_text
 from followthemoney.exc import InvalidMapping
@@ -64,7 +65,8 @@ class CSVSource(Source):
         parsed_url = urlparse(url)
         log.info("Loading: %s", url)
         if parsed_url.scheme in ["http", "https"]:
-            res = requests.get(url, stream=True)
+            headers = {"User-Agent": USER_AGENT}
+            res = requests.get(url, stream=True, headers=headers)
             if not res.ok:
                 raise InvalidMapping("Failed to open CSV: %s" % url)
             # if res.encoding is None:

followthemoney/model.py CHANGED Viewed

@@ -3,12 +3,15 @@ import yaml
 from functools import cache
 from typing import TYPE_CHECKING, Any
 from typing import Dict, Generator, Iterator, Optional, Set, TypedDict, Union
+from rigour.env import ENCODING
 from followthemoney.types import registry
 from followthemoney.types.common import PropertyType, PropertyTypeToDict
 from followthemoney.schema import Schema, SchemaToDict
 from followthemoney.property import Property
 from followthemoney.exc import InvalidModel, InvalidData
+from followthemoney.settings import MODEL_PATH
+from followthemoney.util import const
 if TYPE_CHECKING:
     from followthemoney.proxy import EntityProxy
@@ -46,10 +49,7 @@ class Model(object):
     @classmethod
     def instance(cls) -> "Model":
         if cls._instance is None:
-            model_path = os.path.dirname(__file__)
-            model_path = os.path.join(model_path, "schema")
-            model_path = os.environ.get("FTM_MODEL_PATH", model_path)
-            cls._instance = cls(model_path)
+            cls._instance = cls(MODEL_PATH)
         return cls._instance
     def generate(self) -> None:
@@ -67,11 +67,12 @@ class Model(object):
                     schema.properties[prop.name] = prop
     def _load(self, filepath: str) -> None:
-        with open(filepath, "r", encoding="utf-8") as fh:
+        with open(filepath, "r", encoding=ENCODING) as fh:
             data = yaml.safe_load(fh)
             if not isinstance(data, dict):
                 raise InvalidModel("Model file is not a mapping: %s" % filepath)
             for name, config in data.items():
+                name = const(name)
                 self.schemata[name] = Schema(self, name, config)
     def get(self, name: Union[str, Schema]) -> Optional[Schema]:

followthemoney/property.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import re
 from banal import is_mapping, as_bool
+from rigour.ids import get_identifier_format
 from typing import TYPE_CHECKING, Any, List, Optional, TypedDict
-from followthemoney.exc import InvalidModel
+from followthemoney.exc import InvalidData, InvalidModel
 from followthemoney.types import registry
 from followthemoney.util import gettext, get_entity_id, const
@@ -86,17 +87,16 @@ class Property:
         self.schema = schema
         #: Machine-readable name for this property.
-        self.name = const(name)
+        self.name = name
         if not check_property_name(self.name):
             raise InvalidModel("Invalid name: %s" % self.name)
         #: Qualified property name, which also includes the schema name.
         self.qname = const("%s:%s" % (schema.name, self.name))
-        self._hash = hash("<Property(%r)>" % self.qname)
         self._label = data.get("label", name)
         self._description = data.get("description")
+        self._hash = hash("<Property(%r)>" % self.qname)
         #: This property is deprecated and should not be used.
         self.deprecated = as_bool(data.get("deprecated", False))
@@ -157,6 +157,13 @@ class Property:
                     raise InvalidModel("Invalid reverse: %s" % self)
                 self.reverse = self.range._add_reverse(model, self._reverse, self)
+        if self.type == registry.identifier and self.format is not None:
+            format_ = get_identifier_format(self.format)
+            if format_ is None or format_.NAME != self.format:
+                raise InvalidModel("Invalid identifier format: %s" % self.format)
+            # Internalize the string:
+            self.format = format_.NAME
     @property
     def label(self) -> str:
         """User-facing title for this property."""
@@ -229,6 +236,18 @@ class Property:
             data["format"] = self.format
         return data
+    def __reduce__(self) -> Any:
+        return (self._reconstruct, (self.qname,))
+    @classmethod
+    def _reconstruct(cls, qname: str) -> "Property":
+        from followthemoney.model import Model
+        prop = Model.instance().get_qname(qname)
+        if prop is None:
+            raise InvalidData("Unknown property: %r" % qname)
+        return prop
     def __repr__(self) -> str:
         return "<Property(%r)>" % self.qname

followthemoney/proxy.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import hashlib
 import logging
 from typing import TYPE_CHECKING, cast, Any
 from typing import Dict, Generator, List, Optional, Set, Tuple, Union, Type, TypeVar
@@ -10,13 +11,14 @@ from followthemoney.types import registry
 from followthemoney.types.common import PropertyType
 from followthemoney.property import Property
 from followthemoney.value import string_list, Values
-from followthemoney.util import sanitize_text, gettext
+from followthemoney.util import HASH_ENCODING, sanitize_text, gettext
 from followthemoney.util import merge_context, make_entity_id
 from followthemoney.model import Model
 from followthemoney.schema import Schema
 if TYPE_CHECKING:
     from followthemoney.model import Model
+    from hashlib import _Hash
 log = logging.getLogger(__name__)
 P = Union[Property, str]
@@ -403,13 +405,10 @@ class EntityProxy(object):
         schema and any contextual values that were handed in initially. The resulting
         dictionary can be used to make a new proxy, and it is commonly written to disk
         or a database."""
-        data = dict(self.context)
-        extra = {
-            "id": self.id,
-            "schema": self.schema.name,
-            "properties": self.properties,
-        }
-        data.update(extra)
+        data: Dict[str, Any] = dict(self.context)
+        data["id"] = self.id
+        data["schema"] = self.schema.name
+        data["properties"] = self.properties
         return data
     def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]:
@@ -440,6 +439,28 @@ class EntityProxy(object):
             self.add(prop, values, cleaned=True, quiet=True)
         return self
+    def _checksum_digest(self) -> "_Hash":
+        """Create a SHA1 digest of the entity's ID, schema and properties for
+        change detection. This is returned as a hashlib digest object so that
+        it can be subclassed."""
+        digest = hashlib.sha1()
+        if self.id is not None:
+            digest.update(self.id.encode(HASH_ENCODING))
+        digest.update(self.schema.name.encode(HASH_ENCODING))
+        for prop in sorted(self._properties.keys()):
+            digest.update(prop.encode(HASH_ENCODING))
+            for value in sorted(self._properties[prop]):
+                digest.update(value.encode(HASH_ENCODING))
+                digest.update(b"\x1e")
+            digest.update(b"\x1f")
+        return digest
+    @property
+    def checksum(self) -> str:
+        """A SHA1 checksum hexdigest representing the current state of the
+        entity proxy. This can be used for change detection."""
+        return self._checksum_digest().hexdigest()
     def __getstate__(self) -> Dict[str, Any]:
         data = {slot: getattr(self, slot) for slot in self.__slots__}
         data["schema"] = self.schema.name
@@ -462,14 +483,14 @@ class EntityProxy(object):
         return self._size
     def __hash__(self) -> int:
-        if not self.id:
-            raise RuntimeError("Cannot hash entity without an ID")
+        if self.id is None:
+            raise RuntimeError("Unhashable entity proxy without ID.")
         return hash(self.id)
     def __eq__(self, other: Any) -> bool:
         try:
             if self.id is None or other.id is None:
-                raise RuntimeError("Cannot compare entities without IDs.")
+                raise RuntimeError("Cannot compare entity proxies without IDs.")
             return bool(self.id == other.id)
         except AttributeError:
             return False

followthemoney/schema/Company.yaml CHANGED Viewed

@@ -18,6 +18,11 @@ Company:
     - name
   caption:
     - name
+    - alias
+    - abbreviation
+    - weakAlias
+    - previousName
+    - registrationNumber
   properties:
     jurisdiction:
       label: Jurisdiction

followthemoney/schema/CryptoWallet.yaml CHANGED Viewed

@@ -26,6 +26,10 @@ CryptoWallet:
       maxLength: 128
     privateKey:
       label: Private key
+    accountId:
+      label: Account ID
+      description: Platform-specific user/account identifier
+      type: identifier
     creationDate:
       label: Creation date
       type: date

followthemoney/schema/Image.yaml CHANGED Viewed

@@ -1,4 +1,7 @@
 Image:
+  # This schema defines an image file entity within the FollowTheMoney data model.
+  # If a `checksum` property is present, consider loading it from an Aleph archive
+  # or FtM data lake. Otherwise, use `sourceUrl` to fetch the image directly.
   extends:
     - Document
   label: Image
@@ -23,3 +26,7 @@ Image:
         label: "Images"
       type: entity
       range: Person
+    credit:
+      label: "Credit"
+      description: "The credit or attribution for the image."
+      type: string

followthemoney/schema/LegalEntity.yaml CHANGED Viewed

@@ -17,6 +17,10 @@ LegalEntity:
     - name
   caption:
     - name
+    - alias
+    - abbreviation
+    - weakAlias
+    - previousName
     - email
     - phone
     - registrationNumber
@@ -26,6 +30,12 @@ LegalEntity:
     end:
       - dissolutionDate
   properties:
+    abbreviation:
+      label: Abbreviation
+      type: name
+      description: "Abbreviated name or acronym"
+      # TODO: is un-matchable wise? The idea is to handle it like `weakAlias` rather than `alias`.
+      matchable: false
     email:
       label: E-Mail
       type: email

followthemoney/schema/Organization.yaml CHANGED Viewed

@@ -17,6 +17,11 @@ Organization:
     - name
   caption:
     - name
+    - alias
+    - abbreviation
+    - weakAlias
+    - previousName
+    - registrationNumber
   properties:
     cageCode:
       label: CAGE

followthemoney/schema/Person.yaml CHANGED Viewed

@@ -14,6 +14,10 @@ Person:
     - name
   caption:
     - name
+    - alias
+    - previousName
+    - weakAlias
+    - abbreviation
     - lastName
     - email
     - phone

followthemoney/schema/PublicBody.yaml CHANGED Viewed

@@ -13,5 +13,9 @@ PublicBody:
     - status
   caption:
     - name
+    - alias
+    - abbreviation
+    - weakAlias
+    - previousName
   required:
     - name

followthemoney/schema/Thing.yaml CHANGED Viewed

@@ -24,7 +24,7 @@ Thing:
       label: Country
       type: country
     alias:
-      label: Other name
+      label: Alias
       type: name
     previousName:
       label: Previous name
@@ -32,6 +32,7 @@ Thing:
     weakAlias:
       label: Weak alias
       type: name
+      description: "A relatively broad or generic alias that should not be used for matching in screening systems. It may still may be useful for identification purposes, particularly in confirming a possible match triggered by other identifier information."
       matchable: false
     sourceUrl:
       label: Source link
@@ -55,7 +56,7 @@ Thing:
     wikidataId:
       label: Wikidata ID
       type: identifier
-      format: qid
+      format: wikidata
       maxLength: 32
     keywords:
       label: Keywords

followthemoney/schema.py CHANGED Viewed

@@ -106,7 +106,7 @@ class Schema:
     def __init__(self, model: "Model", name: str, data: SchemaSpec) -> None:
         #: Machine-readable name of the schema, used for identification.
-        self.name = const(name)
+        self.name = name
         self.model = model
         self._label = data.get("label", name)
         self._plural = data.get("plural", self.label)
@@ -191,6 +191,7 @@ class Schema:
         #: inherited from parent schemata.
         self.properties: Dict[str, Property] = {}
         for pname, prop in data.get("properties", {}).items():
+            pname = const(pname)
             self.properties[pname] = Property(self, pname, prop)
     def generate(self, model: "Model") -> None:
@@ -264,6 +265,7 @@ class Schema:
         name = data.get("name")
         if name is None:
             raise InvalidModel("Unnamed reverse: %s" % other)
+        name = const(name)
         prop = self.get(name)
         if prop is None:
@@ -272,7 +274,7 @@ class Schema:
                 "type": registry.entity.name,
                 "reverse": {"name": other.name},
                 "range": other.schema.name,
-                "hidden": data.get("hidden", other.hidden),
+                "hidden": as_bool(data.get("hidden", other.hidden)),
             }
             prop = Property(self, name, spec)
             prop.stub = True
@@ -466,6 +468,18 @@ class Schema:
         data["properties"] = properties
         return data
+    def __reduce__(self) -> Any:
+        return (self._reconstruct, (self.name,))
+    @classmethod
+    def _reconstruct(cls, name: str) -> "Schema":
+        from followthemoney.model import Model
+        schema = Model.instance().get(name)
+        if schema is None:
+            raise InvalidData("Unknown schema: %r" % name)
+        return schema
     def __eq__(self, other: Any) -> bool:
         """Compare two schemata (via hash)."""
         try:

followthemoney/settings.py ADDED Viewed

@@ -0,0 +1,19 @@
+import os
+import requests
+from typing import List
+from rigour.env import env_opt, env_str
+def get_env_list(name: str, default: List[str] = []) -> List[str]:
+    value = env_opt(name)
+    if value is not None:
+        values = value.split(":")
+        if len(values):
+            return values
+    return default
+MODEL_PATH = os.path.join(os.path.dirname(__file__), "schema")
+MODEL_PATH = env_str("FTM_MODEL_PATH", MODEL_PATH)
+USER_AGENT = env_str("FTM_USER_AGENT", requests.utils.default_user_agent())

followthemoney/statement/entity.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from hashlib import sha1
 from collections.abc import Mapping
-from typing import Any, Dict, List, Optional, Set, Type
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Type
 from typing import Generator, Iterable, Tuple, TypeVar
 from rigour.langs import LangStr
 from rigour.names.pick import pick_lang_name
@@ -10,7 +10,7 @@ from followthemoney.exc import InvalidData
 from followthemoney.schema import Schema
 from followthemoney.types.common import PropertyType
 from followthemoney.property import Property
-from followthemoney.util import gettext
+from followthemoney.util import HASH_ENCODING, gettext
 from followthemoney.proxy import P
 from followthemoney.types import registry
 from followthemoney.value import string_list, Values
@@ -21,6 +21,9 @@ from followthemoney.statement.util import BASE_ID
 SE = TypeVar("SE", bound="StatementEntity")
+if TYPE_CHECKING:
+    from hashlib import _Hash
 class StatementEntity(EntityProxy):
     """An entity object that can link to a set of datasets that it is sourced from."""
@@ -35,7 +38,12 @@ class StatementEntity(EntityProxy):
         "_statements",
     )
-    def __init__(self, dataset: Dataset, data: Dict[str, Any], cleaned: bool = True):
+    def __init__(
+        self,
+        dataset: Dataset,
+        data: Dict[str, Any],
+        cleaned: bool = True,
+    ) -> None:
         data = dict(data or {})
         schema = Model.instance().get(data.pop("schema", None))
         if schema is None:
@@ -76,8 +84,7 @@ class StatementEntity(EntityProxy):
         for stmts in self._statements.values():
             for stmt in stmts:
                 if stmt.entity_id is None and self.id is not None:
-                    stmt.entity_id = self.id
-                    stmt.id = stmt.generate_key()
+                    stmt = stmt.clone(entity_id=self.id)
                 if stmt.id is None:
                     stmt.id = stmt.generate_key()
                 yield stmt
@@ -97,9 +104,9 @@ class StatementEntity(EntityProxy):
             if stmt.first_seen is not None:
                 first_seen.add(stmt.first_seen)
         if self.id is not None:
-            digest = sha1(self.schema.name.encode("utf-8"))
+            digest = sha1(self.schema.name.encode(HASH_ENCODING))
             for id in sorted(ids):
-                digest.update(id.encode("utf-8"))
+                digest.update(id.encode(HASH_ENCODING))
             checksum = digest.hexdigest()
             # This is to make the last_change value stable across
             # serialisation:
@@ -449,6 +456,23 @@ class StatementEntity(EntityProxy):
         data["statements"] = [stmt.to_dict() for stmt in self.statements]
         return data
+    def _checksum_digest(self) -> "_Hash":
+        """Create a SHA1 digest of the entity's ID, schema and properties for
+        change detection. This is returned as a hashlib digest object so that
+        it can be subclassed."""
+        digest = sha1()
+        if self.id is not None:
+            digest.update(self.id.encode(HASH_ENCODING))
+        statement_ids: List[str] = []
+        for stmts in self._statements.values():
+            for stmt in stmts:
+                if stmt.id is not None:
+                    statement_ids.append(stmt.id)
+        for stmt_id in sorted(statement_ids):
+            digest.update(stmt_id.encode(HASH_ENCODING))
+            digest.update(b"\x1e")
+        return digest
     def __len__(self) -> int:
         return len(list(self._iter_stmt())) + 1

followthemoney 4.3.0__py3-none-any.whl → 4.5.0__py3-none-any.whl

followthemoney 4.3.0py3-none-any.whl → 4.5.0py3-none-any.whl