PyPI - followthemoney - Versions diffs - 1.3.6__py3-none-any.whl → 3.8.0__py3-none-any.whl - Mend

followthemoney 1.3.6py3-none-any.whl → 3.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (186) hide show

followthemoney/__init__.py +5 -3
followthemoney/cli/__init__.py +17 -0
followthemoney/cli/aggregate.py +56 -0
followthemoney/cli/cli.py +88 -0
followthemoney/cli/exports.py +121 -0
followthemoney/cli/mapping.py +85 -0
followthemoney/cli/sieve.py +67 -0
followthemoney/cli/util.py +142 -0
followthemoney/compare.py +132 -55
followthemoney/exc.py +19 -6
followthemoney/export/common.py +29 -0
followthemoney/export/csv.py +82 -0
followthemoney/export/excel.py +75 -0
followthemoney/export/graph.py +79 -0
followthemoney/export/neo4j.py +182 -0
followthemoney/export/rdf.py +26 -0
followthemoney/graph.py +308 -0
followthemoney/helpers.py +212 -0
followthemoney/mapping/__init__.py +1 -1
followthemoney/mapping/csv.py +67 -35
followthemoney/mapping/entity.py +116 -44
followthemoney/mapping/property.py +90 -44
followthemoney/mapping/query.py +27 -19
followthemoney/mapping/source.py +15 -5
followthemoney/mapping/sql.py +75 -61
followthemoney/messages.py +13 -7
followthemoney/model.py +108 -56
followthemoney/namespace.py +119 -0
followthemoney/offshore.py +48 -0
followthemoney/ontology.py +77 -0
followthemoney/property.py +204 -71
followthemoney/proxy.py +455 -118
followthemoney/rdf.py +9 -0
followthemoney/schema/Address.yaml +78 -0
followthemoney/schema/Airplane.yaml +17 -10
followthemoney/schema/Analyzable.yaml +54 -0
followthemoney/schema/Article.yaml +16 -0
followthemoney/schema/Assessment.yaml +32 -0
followthemoney/schema/Asset.yaml +10 -4
followthemoney/schema/Associate.yaml +41 -0
followthemoney/schema/Audio.yaml +24 -0
followthemoney/schema/BankAccount.yaml +53 -9
followthemoney/schema/Call.yaml +48 -0
followthemoney/schema/CallForTenders.yaml +117 -0
followthemoney/schema/Company.yaml +37 -12
followthemoney/schema/Contract.yaml +41 -7
followthemoney/schema/ContractAward.yaml +30 -11
followthemoney/schema/CourtCase.yaml +16 -10
followthemoney/schema/CourtCaseParty.yaml +17 -6
followthemoney/schema/CryptoWallet.yaml +48 -0
followthemoney/schema/Debt.yaml +37 -0
followthemoney/schema/Directorship.yaml +17 -4
followthemoney/schema/Document.yaml +72 -139
followthemoney/schema/Documentation.yml +38 -0
followthemoney/schema/EconomicActivity.yaml +32 -17
followthemoney/schema/Email.yaml +76 -0
followthemoney/schema/Employment.yaml +39 -0
followthemoney/schema/Event.yaml +35 -3
followthemoney/schema/Family.yaml +41 -0
followthemoney/schema/Folder.yaml +13 -0
followthemoney/schema/HyperText.yaml +21 -0
followthemoney/schema/Identification.yaml +40 -0
followthemoney/schema/Image.yaml +25 -0
followthemoney/schema/Interest.yaml +3 -6
followthemoney/schema/Interval.yaml +56 -5
followthemoney/schema/LegalEntity.yaml +81 -20
followthemoney/schema/License.yaml +7 -3
followthemoney/schema/Membership.yaml +19 -4
followthemoney/schema/Mention.yaml +54 -0
followthemoney/schema/Message.yaml +73 -0
followthemoney/schema/Note.yaml +23 -0
followthemoney/schema/Occupancy.yaml +40 -0
followthemoney/schema/Organization.yaml +38 -3
followthemoney/schema/Ownership.yaml +16 -4
followthemoney/schema/Package.yaml +17 -0
followthemoney/schema/Page.yaml +43 -0
followthemoney/schema/Pages.yaml +23 -0
followthemoney/schema/Passport.yaml +15 -17
followthemoney/schema/Payment.yaml +38 -7
followthemoney/schema/Person.yaml +61 -5
followthemoney/schema/PlainText.yaml +17 -0
followthemoney/schema/Position.yaml +50 -0
followthemoney/schema/Post.yaml +42 -0
followthemoney/schema/Project.yaml +27 -0
followthemoney/schema/ProjectParticipant.yaml +36 -0
followthemoney/schema/PublicBody.yaml +14 -3
followthemoney/schema/RealEstate.yaml +19 -3
followthemoney/schema/Representation.yaml +17 -6
followthemoney/schema/Sanction.yaml +44 -20
followthemoney/schema/Security.yaml +59 -0
followthemoney/schema/Similar.yaml +37 -0
followthemoney/schema/Succession.yaml +36 -0
followthemoney/schema/Table.yaml +32 -0
followthemoney/schema/TaxRoll.yaml +27 -9
followthemoney/schema/Thing.yaml +69 -13
followthemoney/schema/Trip.yaml +42 -0
followthemoney/schema/UnknownLink.yaml +17 -6
followthemoney/schema/UserAccount.yaml +44 -0
followthemoney/schema/Value.yaml +5 -1
followthemoney/schema/Vehicle.yaml +25 -8
followthemoney/schema/Vessel.yaml +18 -10
followthemoney/schema/Video.yaml +20 -0
followthemoney/schema/Workbook.yaml +18 -0
followthemoney/schema.py +406 -135
followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
followthemoney/translations/fr/followthemoney.po +3861 -0
followthemoney/translations/messages.pot +3021 -725
followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
followthemoney/translations/ru/followthemoney.po +4221 -0
followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
followthemoney/types/__init__.py +35 -17
followthemoney/types/address.py +41 -21
followthemoney/types/checksum.py +25 -0
followthemoney/types/common.py +233 -88
followthemoney/types/country.py +89 -56
followthemoney/types/date.py +59 -76
followthemoney/types/email.py +66 -35
followthemoney/types/entity.py +66 -13
followthemoney/types/gender.py +66 -0
followthemoney/types/iban.py +47 -28
followthemoney/types/identifier.py +49 -22
followthemoney/types/ip.py +35 -21
followthemoney/types/json.py +58 -0
followthemoney/types/language.py +124 -37
followthemoney/types/mimetype.py +44 -0
followthemoney/types/name.py +56 -12
followthemoney/types/number.py +30 -0
followthemoney/types/phone.py +92 -34
followthemoney/types/registry.py +52 -0
followthemoney/types/string.py +43 -0
followthemoney/types/topic.py +94 -0
followthemoney/types/url.py +39 -17
followthemoney/util.py +139 -45
followthemoney-3.8.0.dist-info/METADATA +153 -0
followthemoney-3.8.0.dist-info/RECORD +157 -0
{followthemoney-1.3.6.dist-info → followthemoney-3.8.0.dist-info}/WHEEL +1 -2
followthemoney-3.8.0.dist-info/entry_points.txt +17 -0
followthemoney-1.3.6.dist-info/LICENSE.txt → followthemoney-3.8.0.dist-info/licenses/LICENSE +1 -1
followthemoney/link.py +0 -75
followthemoney/schema/Associate.yml +0 -19
followthemoney/schema/Family.yml +0 -19
followthemoney/schema/Land.yml +0 -9
followthemoney/schema/Relationship.yaml +0 -26
followthemoney/types/domain.py +0 -50
followthemoney-1.3.6.dist-info/DESCRIPTION.rst +0 -3
followthemoney-1.3.6.dist-info/METADATA +0 -39
followthemoney-1.3.6.dist-info/RECORD +0 -108
followthemoney-1.3.6.dist-info/entry_points.txt +0 -3
followthemoney-1.3.6.dist-info/metadata.json +0 -1
followthemoney-1.3.6.dist-info/namespace_packages.txt +0 -1
followthemoney-1.3.6.dist-info/top_level.txt +0 -3
ns/ontology.py +0 -128
tests/types/test_addresses.py +0 -24
tests/types/test_common.py +0 -27
tests/types/test_countries.py +0 -21
tests/types/test_dates.py +0 -72
tests/types/test_domains.py +0 -23
tests/types/test_emails.py +0 -30
tests/types/test_entity.py +0 -16
tests/types/test_iban.py +0 -109
tests/types/test_identifiers.py +0 -25
tests/types/test_ip.py +0 -26
tests/types/test_languages.py +0 -20
tests/types/test_names.py +0 -33
tests/types/test_phones.py +0 -24
tests/types/test_registry.py +0 -14
tests/types/test_urls.py +0 -23
{ns → followthemoney/export}/__init__.py +0 -0
/tests/types/__init__.py → /followthemoney/py.typed +0 -0

followthemoney/types/iban.py CHANGED Viewed

@@ -1,39 +1,58 @@
-from rdflib import URIRef
-from normality import stringify
-from schwifty import IBAN
+from typing import Optional, TYPE_CHECKING
+from rigour.ids import IBAN
 from followthemoney.types.common import PropertyType
+from followthemoney.rdf import URIRef, Identifier
+from followthemoney.util import sanitize_text, defer as _
+if TYPE_CHECKING:
+    from followthemoney.proxy import EntityProxy
 class IbanType(PropertyType):
-    name = 'iban'
-    group = 'ibans'
-    prefix = 'iban'
-    strong = False
-    def validate(self, iban, **kwargs):
-        iban = stringify(iban)
-        if iban is None:
-            return False
-        try:
-            IBAN(iban)
-            return True
-        except ValueError as ex:
-            print(ex)
+    """An international bank account number, as defined in ISO 13616. IBANs are
+    managed by SWIFT used in the European SEPA payment system.
+    A notable aspect of IBANs is that they share a country prefix and validation
+    mechanism, but the specific length of an IBAN is dependent on the country
+    code defined in the first two characters: `NO8330001234567` and
+    `CY21002001950000357001234567` are both valid values."""
+    name = "iban"
+    group = "ibans"
+    label = _("IBAN")
+    plural = _("IBANs")
+    matchable = True
+    pivot = True
+    max_length = 64
+    def validate(
+        self, value: str, fuzzy: bool = False, format: Optional[str] = None
+    ) -> bool:
+        text = sanitize_text(value)
+        if text is None:
             return False
-    def clean_text(self, text, **kwargs):
+        return IBAN.is_valid(text)
+    def clean_text(
+        self,
+        text: str,
+        fuzzy: bool = False,
+        format: Optional[str] = None,
+        proxy: Optional["EntityProxy"] = None,
+    ) -> Optional[str]:
         """Create a more clean, but still user-facing version of an
         instance of the type."""
-        return text.replace(" ", "").upper()
+        return IBAN.normalize(text)
+    def country_hint(self, value: str) -> str:
+        return value[:2].lower()
-    def specificity(self, value):
-        return 1
+    def rdf(self, value: str) -> Identifier:
+        return URIRef(self.node_id(value))
-    def country_hint(self, value):
-        value = stringify(value)
-        if value is not None:
-            return value[:2].lower()
+    def node_id(self, value: str) -> str:
+        return f"iban:{value.upper()}"
-    def rdf(self, value):
-        return URIRef('iban:%s' % value)
+    def caption(self, value: str) -> str:
+        return IBAN.format(value)

followthemoney/types/identifier.py CHANGED Viewed

@@ -1,34 +1,61 @@
 import re
-from normality import normalize
+from typing import Optional, TYPE_CHECKING
+from rigour.ids import get_identifier_format_names, get_identifier_format
 from followthemoney.types.common import PropertyType
+from followthemoney.util import dampen, shortest, longest
+from followthemoney.util import defer as _
+if TYPE_CHECKING:
+    from followthemoney.proxy import EntityProxy
 class IdentifierType(PropertyType):
-    """Used for registration numbers, codes etc."""
-    COMPARE_CLEAN = re.compile('[\W_]+')
-    name = 'identifier'
-    group = 'identifiers'
-    prefix = 'ident'
-    strong = False
-    def normalize(self, text, **kwargs):
-        """Normalize for comparison."""
-        ids = super(IdentifierType, self).normalize(text, **kwargs)
-        return [normalize(i) for i in ids]
-    def clean_compare(self, value):
+    """Used for registration numbers and other codes assigned by an authority
+    to identify an entity. This might include tax identifiers and statistical
+    codes.
+    Since identifiers are high-value criteria when comparing two entities, numbers
+    should only be modelled as identifiers if they are long enough to be meaningful.
+    Four- or five-digit industry classifiers create more noise than value."""
+    COMPARE_CLEAN = re.compile(r"[\W_]+")
+    name = "identifier"
+    group = "identifiers"
+    label = _("Identifier")
+    plural = _("Identifiers")
+    matchable = True
+    pivot = True
+    max_length = 64
+    def clean_text(
+        self,
+        text: str,
+        fuzzy: bool = False,
+        format: Optional[str] = None,
+        proxy: Optional["EntityProxy"] = None,
+    ) -> Optional[str]:
+        if format in get_identifier_format_names():
+            format_ = get_identifier_format(format)
+            return format_.normalize(text)
+        return text
+    def clean_compare(self, value: str) -> str:
         # TODO: should this be used for normalization?
-        value = self.COMPARE_CLEAN.sub('', value)
+        value = self.COMPARE_CLEAN.sub("", value)
         return value.lower()
-    def compare(self, left, right):
+    def compare(self, left: str, right: str) -> float:
         left = self.clean_compare(left)
         right = self.clean_compare(right)
         if left == right:
-            return .9
-        if left in right:
-            return .7
-        if right in left:
-            return .7
-        return 0
+            return 1.0
+        elif left in right or right in left:
+            return len(shortest(left, right)) / len(longest(left, right))
+        return 0.0
+    def _specificity(self, value: str) -> float:
+        return dampen(4, 10, value)
+    def node_id(self, value: str) -> str:
+        return f"id:{value}"

followthemoney/types/ip.py CHANGED Viewed

@@ -1,36 +1,50 @@
-from rdflib import URIRef
-from normality import stringify
+from typing import Optional, TYPE_CHECKING
 from ipaddress import ip_address
 from followthemoney.types.common import PropertyType
+from followthemoney.rdf import URIRef, Identifier
+from followthemoney.util import defer as _
+if TYPE_CHECKING:
+    from followthemoney.proxy import EntityProxy
 class IpType(PropertyType):
-    name = 'ip'
-    group = 'ips'
-    prefix = 'ip'
-    strong = False
+    """Internet protocol addresses. This supports both addresses used
+    by the protocol versions 4 (e.g. `192.168.1.143`) and 6
+    (e.g. `0:0:0:0:0:ffff:c0a8:18f`)."""
+    name = "ip"
+    group = "ips"
+    label = _("IP-Address")
+    plural = _("IP-Addresses")
+    matchable = True
+    pivot = True
+    max_length = 64
-    def validate(self, ip, **kwargs):
+    def validate(
+        self, value: str, fuzzy: bool = False, format: Optional[str] = None
+    ) -> bool:
         """Check to see if this is a valid ip address."""
         try:
-            ip_address(ip)
+            ip_address(value)
             return True
         except ValueError:
             return False
-    def clean(self, text, **kwargs):
+    def clean_text(
+        self,
+        text: str,
+        fuzzy: bool = False,
+        format: Optional[str] = None,
+        proxy: Optional["EntityProxy"] = None,
+    ) -> Optional[str]:
         """Create a more clean, but still user-facing version of an
         instance of the type."""
-        text = stringify(text)
-        if text is not None:
-            try:
-                return str(ip_address(text))
-            except ValueError:
-                return None
-    def specificity(self, value):
-        return 1
-    def rdf(self, value):
-        return URIRef('ip:%s' % value)
+        try:
+            return str(ip_address(text))
+        except ValueError:
+            return None
+    def rdf(self, value: str) -> Identifier:
+        return URIRef(f"ip:{value}")

followthemoney/types/json.py ADDED Viewed

@@ -0,0 +1,58 @@
+import json
+from typing import Any, Optional, Sequence, TYPE_CHECKING
+from banal import ensure_list
+from followthemoney.types.common import PropertyType
+from followthemoney.util import sanitize_text, defer as _
+if TYPE_CHECKING:
+    from followthemoney.proxy import EntityProxy
+class JsonType(PropertyType):
+    """An encoded JSON object. This is used to store raw HTTP headers for documents
+    and some other edge cases. It's a really bad idea and we should try to get rid
+    of JSON properties."""
+    name = "json"
+    group = None
+    label = _("Nested data")
+    plural = _("Nested data")
+    matchable = False
+    def pack(self, obj: Any) -> Optional[str]:
+        """Encode a given value to JSON."""
+        # TODO: use a JSON encoder that handles more types?
+        if obj is None:
+            return None
+        return json.dumps(obj)
+    def unpack(self, obj: str) -> Any:
+        """Decode a given JSON object."""
+        try:
+            return json.loads(obj)
+        except Exception:
+            return obj
+    def clean(
+        self,
+        raw: Any,
+        fuzzy: bool = False,
+        format: Optional[str] = None,
+        proxy: Optional["EntityProxy"] = None,
+    ) -> Optional[str]:
+        if not isinstance(raw, str):
+            return self.pack(raw)
+        else:
+            return sanitize_text(raw)
+    def join(self, values: Sequence[str]) -> str:
+        """Turn multiple values into a JSON array."""
+        values = [self.unpack(v) for v in ensure_list(values)]
+        data = self.pack(values)
+        if data is None:
+            return "[]"
+        return data
+    def node_id(self, value: str) -> None:
+        return None

followthemoney/types/language.py CHANGED Viewed

@@ -1,37 +1,124 @@
-from rdflib import URIRef
-from normality import stringify
-from followthemoney.types.common import PropertyType
-from followthemoney.util import get_locale
-class LanguageType(PropertyType):
-    name = 'language'
-    group = 'languages'
-    prefix = 'lang'
-    def __init__(self, *args):
-        self._names = {}
-    @property
-    def names(self):
-        locale = get_locale()
-        if locale not in self._names:
-            self._names[locale] = {}
-            for code, label in locale.languages.items():
-                self._names[locale][code.lower()] = label
-        return self._names[locale]
-    def validate(self, text, **kwargs):
-        text = stringify(text)
-        if text is None:
-            return False
-        return text.lower() in self.names
-    def clean_text(self, text, **kwargs):
-        code = text.lower().strip()
-        if code in self.names:
-            return code
-    def rdf(self, value):
-        return URIRef('iso-639:%s' % value)
+from typing import Optional, TYPE_CHECKING
+from babel.core import Locale
+from rigour.langs import iso_639_alpha3
+from followthemoney.types.common import EnumType, EnumValues
+from followthemoney.rdf import URIRef, Identifier
+from followthemoney.util import defer as _, gettext
+from followthemoney.util import get_env_list
+if TYPE_CHECKING:
+    from followthemoney.proxy import EntityProxy
+class LanguageType(EnumType):
+    """A human written language. This list is arbitrarily limited for some
+    weird upstream technical reasons, but we'll happily accept pull requests
+    for additional languages once there is a specific need for them to be
+    supported."""
+    name = "language"
+    group = "languages"
+    label = _("Language")
+    plural = _("Languages")
+    matchable = False
+    max_length = 16
+    # Language whitelist
+    LANGUAGES = [
+        "eng",
+        "fra",
+        "deu",
+        "rus",
+        "spa",
+        "nld",
+        "ron",
+        "kat",
+        "ara",
+        "tur",
+        "ltz",
+        "ell",
+        "lit",
+        "ukr",
+        "zho",
+        "bel",
+        "bul",
+        "bos",
+        "jpn",
+        "ces",
+        "lav",
+        "por",
+        "pol",
+        "hye",
+        "hrv",
+        "hin",
+        "heb",
+        "uzb",
+        "mon",
+        "urd",
+        "sqi",
+        "kor",
+        "isl",
+        "ita",
+        "est",
+        "nor",
+        "fas",
+        "swa",
+        "slv",
+        "slk",
+        "aze",
+        "tgk",
+        "kaz",
+        "tuk",
+        "kir",
+        "hun",
+        "dan",
+        "afr",
+        "swe",
+        "srp",
+        "ind",
+        "kan",
+        "mkd",
+        "mlt",
+        "msa",
+        "fin",
+        "cat",
+        "nep",
+        "tgl",
+        "fil",
+        "mya",
+        "khm",
+        "cnr",
+    ]
+    LANGUAGES = get_env_list("FTM_LANGUAGES", LANGUAGES)
+    LANGUAGES = [lang.lower().strip() for lang in LANGUAGES]
+    def _locale_names(self, locale: Locale) -> EnumValues:
+        names = {
+            "ara": gettext("Arabic"),
+            "nor": gettext("Norwegian"),
+            "cnr": gettext("Montenegrin"),
+        }
+        for lang in self.LANGUAGES:
+            if lang not in names:
+                names[lang] = lang
+        for code, label in locale.languages.items():
+            code = iso_639_alpha3(code)
+            if code in self.LANGUAGES and names[code] == code:
+                names[code] = label
+        return names
+    def clean_text(
+        self,
+        text: str,
+        fuzzy: bool = False,
+        format: Optional[str] = None,
+        proxy: Optional["EntityProxy"] = None,
+    ) -> Optional[str]:
+        code = iso_639_alpha3(text)
+        if code not in self.LANGUAGES:
+            return None
+        return code
+    def rdf(self, value: str) -> Identifier:
+        return URIRef(f"iso-639:{value}")

followthemoney/types/mimetype.py ADDED Viewed

@@ -0,0 +1,44 @@
+from typing import Optional, TYPE_CHECKING
+from rigour.mime import normalize_mimetype, parse_mimetype
+from rigour.mime import DEFAULT
+from followthemoney.types.common import PropertyType
+from followthemoney.rdf import URIRef, Identifier
+from followthemoney.util import defer as _
+if TYPE_CHECKING:
+    from followthemoney.proxy import EntityProxy
+class MimeType(PropertyType):
+    """A MIME media type are a specification of a content type on a network.
+    Each MIME type is assigned by IANA and consists of two parts: the type
+    and sub-type. Common examples are: `text/plain`, `application/json` and
+    `application/pdf`.
+    MIME type properties do not contain parameters as used in HTTP headers,
+    like `charset=UTF-8`."""
+    name = "mimetype"
+    group = "mimetypes"
+    label = _("MIME-Type")
+    plural = _("MIME-Types")
+    matchable = False
+    def clean_text(
+        self,
+        text: str,
+        fuzzy: bool = False,
+        format: Optional[str] = None,
+        proxy: Optional["EntityProxy"] = None,
+    ) -> Optional[str]:
+        text = normalize_mimetype(text)
+        if text != DEFAULT:
+            return text
+        return None
+    def rdf(self, value: str) -> Identifier:
+        return URIRef(f"urn:mimetype:{value}")
+    def caption(self, value: str) -> str:
+        return parse_mimetype(value).label or value

followthemoney/types/name.py CHANGED Viewed

@@ -1,24 +1,68 @@
-from Levenshtein import jaro_winkler
+from typing import TYPE_CHECKING, Optional, Sequence
+from normality import slugify
 from normality.cleaning import collapse_spaces, strip_quotes
+from rigour.env import MAX_NAME_LENGTH
+from rigour.names import pick_name
+from rigour.text.distance import levenshtein_similarity
+from fingerprints.cleanup import clean_name_light
 from followthemoney.types.common import PropertyType
 from followthemoney.util import dampen
+from followthemoney.util import defer as _
+if TYPE_CHECKING:
+    from followthemoney.proxy import EntityProxy
 class NameType(PropertyType):
-    name = 'name'
-    group = 'names'
-    prefix = 'n'
+    """A name used for a person or company. This is assumed to be as complete
+    a name as available - when a first name, family name or patronymic are given
+    separately, these are stored to string-type properties instead.
+    No validation rules apply, and things having multiple names must be considered
+    a perfectly ordinary case."""
+    name = "name"
+    group = "names"
+    label = _("Name")
+    plural = _("Names")
+    matchable = True
+    pivot = True
+    max_length = MAX_NAME_LENGTH
-    def clean_text(self, name, **kwargs):
+    def clean_text(
+        self,
+        text: str,
+        fuzzy: bool = False,
+        format: Optional[str] = None,
+        proxy: Optional["EntityProxy"] = None,
+    ) -> Optional[str]:
         """Basic clean-up."""
-        name = strip_quotes(name)
-        name = collapse_spaces(name)
-        return name
+        name = strip_quotes(text)
+        return collapse_spaces(name)
-    def specificity(self, value):
+    def pick(self, values: Sequence[str]) -> Optional[str]:
+        """From a set of names, pick the most plausible user-facing one."""
+        return pick_name(list(values))
+    def _specificity(self, value: str) -> float:
         # TODO: insert artificial intelligence here.
-        return dampen(3, 50, value) * .8
+        return dampen(3, 50, value)
+    def compare(self, left: str, right: str) -> float:
+        """Compare two names for similarity."""
+        left_clean = clean_name_light(left)
+        right_clean = clean_name_light(right)
+        if left_clean is None or right_clean is None:
+            return 0.0
+        return levenshtein_similarity(
+            left_clean,
+            right_clean,
+            max_length=self.max_length,
+        )
-    def compare(self, left, right):
-        return jaro_winkler(left, right)
+    def node_id(self, value: str) -> Optional[str]:
+        slug = slugify(value)
+        if slug is None:
+            return None
+        return f"name:{slug}"

followthemoney/types/number.py ADDED Viewed

@@ -0,0 +1,30 @@
+import re
+from typing import Optional
+from followthemoney.types.common import PropertyType
+from followthemoney.util import defer as _
+class NumberType(PropertyType):
+    """A numeric value, like the size of a piece of land, or the value of a
+    contract. Since all property values in FtM are strings, this is also a
+    string and there is no specified format (e.g. `1,000.00` vs. `1.000,00`).
+    In the future we might want to enable annotations for format, units, or
+    even to introduce a separate property type for monetary values."""
+    CAST_RE = re.compile(r"[^0-9\-\.]")
+    name = "number"
+    label = _("Number")
+    plural = _("Numbers")
+    matchable = False
+    def node_id(self, value: str) -> None:
+        return None
+    def to_number(self, value: str) -> Optional[float]:
+        try:
+            value = self.CAST_RE.sub("", value)
+            return float(value)
+        except Exception:
+            return None

followthemoney 1.3.6__py3-none-any.whl → 3.8.0__py3-none-any.whl

followthemoney 1.3.6py3-none-any.whl → 3.8.0py3-none-any.whl