PyPI - followthemoney - Versions diffs - 1.3.6__py3-none-any.whl → 3.8.0__py3-none-any.whl - Mend

followthemoney 1.3.6py3-none-any.whl → 3.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (186) hide show

followthemoney/__init__.py +5 -3
followthemoney/cli/__init__.py +17 -0
followthemoney/cli/aggregate.py +56 -0
followthemoney/cli/cli.py +88 -0
followthemoney/cli/exports.py +121 -0
followthemoney/cli/mapping.py +85 -0
followthemoney/cli/sieve.py +67 -0
followthemoney/cli/util.py +142 -0
followthemoney/compare.py +132 -55
followthemoney/exc.py +19 -6
followthemoney/export/common.py +29 -0
followthemoney/export/csv.py +82 -0
followthemoney/export/excel.py +75 -0
followthemoney/export/graph.py +79 -0
followthemoney/export/neo4j.py +182 -0
followthemoney/export/rdf.py +26 -0
followthemoney/graph.py +308 -0
followthemoney/helpers.py +212 -0
followthemoney/mapping/__init__.py +1 -1
followthemoney/mapping/csv.py +67 -35
followthemoney/mapping/entity.py +116 -44
followthemoney/mapping/property.py +90 -44
followthemoney/mapping/query.py +27 -19
followthemoney/mapping/source.py +15 -5
followthemoney/mapping/sql.py +75 -61
followthemoney/messages.py +13 -7
followthemoney/model.py +108 -56
followthemoney/namespace.py +119 -0
followthemoney/offshore.py +48 -0
followthemoney/ontology.py +77 -0
followthemoney/property.py +204 -71
followthemoney/proxy.py +455 -118
followthemoney/rdf.py +9 -0
followthemoney/schema/Address.yaml +78 -0
followthemoney/schema/Airplane.yaml +17 -10
followthemoney/schema/Analyzable.yaml +54 -0
followthemoney/schema/Article.yaml +16 -0
followthemoney/schema/Assessment.yaml +32 -0
followthemoney/schema/Asset.yaml +10 -4
followthemoney/schema/Associate.yaml +41 -0
followthemoney/schema/Audio.yaml +24 -0
followthemoney/schema/BankAccount.yaml +53 -9
followthemoney/schema/Call.yaml +48 -0
followthemoney/schema/CallForTenders.yaml +117 -0
followthemoney/schema/Company.yaml +37 -12
followthemoney/schema/Contract.yaml +41 -7
followthemoney/schema/ContractAward.yaml +30 -11
followthemoney/schema/CourtCase.yaml +16 -10
followthemoney/schema/CourtCaseParty.yaml +17 -6
followthemoney/schema/CryptoWallet.yaml +48 -0
followthemoney/schema/Debt.yaml +37 -0
followthemoney/schema/Directorship.yaml +17 -4
followthemoney/schema/Document.yaml +72 -139
followthemoney/schema/Documentation.yml +38 -0
followthemoney/schema/EconomicActivity.yaml +32 -17
followthemoney/schema/Email.yaml +76 -0
followthemoney/schema/Employment.yaml +39 -0
followthemoney/schema/Event.yaml +35 -3
followthemoney/schema/Family.yaml +41 -0
followthemoney/schema/Folder.yaml +13 -0
followthemoney/schema/HyperText.yaml +21 -0
followthemoney/schema/Identification.yaml +40 -0
followthemoney/schema/Image.yaml +25 -0
followthemoney/schema/Interest.yaml +3 -6
followthemoney/schema/Interval.yaml +56 -5
followthemoney/schema/LegalEntity.yaml +81 -20
followthemoney/schema/License.yaml +7 -3
followthemoney/schema/Membership.yaml +19 -4
followthemoney/schema/Mention.yaml +54 -0
followthemoney/schema/Message.yaml +73 -0
followthemoney/schema/Note.yaml +23 -0
followthemoney/schema/Occupancy.yaml +40 -0
followthemoney/schema/Organization.yaml +38 -3
followthemoney/schema/Ownership.yaml +16 -4
followthemoney/schema/Package.yaml +17 -0
followthemoney/schema/Page.yaml +43 -0
followthemoney/schema/Pages.yaml +23 -0
followthemoney/schema/Passport.yaml +15 -17
followthemoney/schema/Payment.yaml +38 -7
followthemoney/schema/Person.yaml +61 -5
followthemoney/schema/PlainText.yaml +17 -0
followthemoney/schema/Position.yaml +50 -0
followthemoney/schema/Post.yaml +42 -0
followthemoney/schema/Project.yaml +27 -0
followthemoney/schema/ProjectParticipant.yaml +36 -0
followthemoney/schema/PublicBody.yaml +14 -3
followthemoney/schema/RealEstate.yaml +19 -3
followthemoney/schema/Representation.yaml +17 -6
followthemoney/schema/Sanction.yaml +44 -20
followthemoney/schema/Security.yaml +59 -0
followthemoney/schema/Similar.yaml +37 -0
followthemoney/schema/Succession.yaml +36 -0
followthemoney/schema/Table.yaml +32 -0
followthemoney/schema/TaxRoll.yaml +27 -9
followthemoney/schema/Thing.yaml +69 -13
followthemoney/schema/Trip.yaml +42 -0
followthemoney/schema/UnknownLink.yaml +17 -6
followthemoney/schema/UserAccount.yaml +44 -0
followthemoney/schema/Value.yaml +5 -1
followthemoney/schema/Vehicle.yaml +25 -8
followthemoney/schema/Vessel.yaml +18 -10
followthemoney/schema/Video.yaml +20 -0
followthemoney/schema/Workbook.yaml +18 -0
followthemoney/schema.py +406 -135
followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
followthemoney/translations/fr/followthemoney.po +3861 -0
followthemoney/translations/messages.pot +3021 -725
followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
followthemoney/translations/ru/followthemoney.po +4221 -0
followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
followthemoney/types/__init__.py +35 -17
followthemoney/types/address.py +41 -21
followthemoney/types/checksum.py +25 -0
followthemoney/types/common.py +233 -88
followthemoney/types/country.py +89 -56
followthemoney/types/date.py +59 -76
followthemoney/types/email.py +66 -35
followthemoney/types/entity.py +66 -13
followthemoney/types/gender.py +66 -0
followthemoney/types/iban.py +47 -28
followthemoney/types/identifier.py +49 -22
followthemoney/types/ip.py +35 -21
followthemoney/types/json.py +58 -0
followthemoney/types/language.py +124 -37
followthemoney/types/mimetype.py +44 -0
followthemoney/types/name.py +56 -12
followthemoney/types/number.py +30 -0
followthemoney/types/phone.py +92 -34
followthemoney/types/registry.py +52 -0
followthemoney/types/string.py +43 -0
followthemoney/types/topic.py +94 -0
followthemoney/types/url.py +39 -17
followthemoney/util.py +139 -45
followthemoney-3.8.0.dist-info/METADATA +153 -0
followthemoney-3.8.0.dist-info/RECORD +157 -0
{followthemoney-1.3.6.dist-info → followthemoney-3.8.0.dist-info}/WHEEL +1 -2
followthemoney-3.8.0.dist-info/entry_points.txt +17 -0
followthemoney-1.3.6.dist-info/LICENSE.txt → followthemoney-3.8.0.dist-info/licenses/LICENSE +1 -1
followthemoney/link.py +0 -75
followthemoney/schema/Associate.yml +0 -19
followthemoney/schema/Family.yml +0 -19
followthemoney/schema/Land.yml +0 -9
followthemoney/schema/Relationship.yaml +0 -26
followthemoney/types/domain.py +0 -50
followthemoney-1.3.6.dist-info/DESCRIPTION.rst +0 -3
followthemoney-1.3.6.dist-info/METADATA +0 -39
followthemoney-1.3.6.dist-info/RECORD +0 -108
followthemoney-1.3.6.dist-info/entry_points.txt +0 -3
followthemoney-1.3.6.dist-info/metadata.json +0 -1
followthemoney-1.3.6.dist-info/namespace_packages.txt +0 -1
followthemoney-1.3.6.dist-info/top_level.txt +0 -3
ns/ontology.py +0 -128
tests/types/test_addresses.py +0 -24
tests/types/test_common.py +0 -27
tests/types/test_countries.py +0 -21
tests/types/test_dates.py +0 -72
tests/types/test_domains.py +0 -23
tests/types/test_emails.py +0 -30
tests/types/test_entity.py +0 -16
tests/types/test_iban.py +0 -109
tests/types/test_identifiers.py +0 -25
tests/types/test_ip.py +0 -26
tests/types/test_languages.py +0 -20
tests/types/test_names.py +0 -33
tests/types/test_phones.py +0 -24
tests/types/test_registry.py +0 -14
tests/types/test_urls.py +0 -23
{ns → followthemoney/export}/__init__.py +0 -0
/tests/types/__init__.py → /followthemoney/py.typed +0 -0

followthemoney/types/phone.py CHANGED Viewed

@@ -1,31 +1,54 @@
-from rdflib import URIRef
-from banal import ensure_list
-from phonenumbers import geocoder
+from typing import Iterable, Optional, TYPE_CHECKING
 from phonenumbers import parse as parse_number
-from phonenumbers import is_possible_number, is_valid_number, format_number
-from phonenumbers import PhoneNumberFormat
-from phonenumbers.phonenumberutil import NumberParseException
+from phonenumbers import is_valid_number, format_number
+from phonenumbers import PhoneNumber, PhoneNumberFormat
+from phonenumbers.phonenumberutil import region_code_for_number, NumberParseException
 from followthemoney.types.common import PropertyType
+from followthemoney.rdf import URIRef, Identifier
+from followthemoney.util import defer as _
+from followthemoney.util import dampen
+if TYPE_CHECKING:
+    from followthemoney.proxy import EntityProxy
+# TODO: for json schema export
+# https://stackoverflow.com/questions/6478875/regular-expression-matching-e-164-formatted-phone-numbers
 class PhoneType(PropertyType):
-    name = 'phone'
-    group = 'phones'
-    prefix = 'tel'
-    strong = False
-    def _clean_countries(self, countries, country):
-        result = set([None])
-        countries = ensure_list(countries)
-        countries.extend(ensure_list(country))
-        for country in countries:
-            if isinstance(country, str):
-                country = country.strip().upper()
-                result.add(country)
-        return result
-    def clean_text(self, number, countries=None, country=None, **kwargs):
+    """A phone number in E.164 format. This means that phone numbers always
+    include an international country prefix (e.g. `+38760183628`). The
+    cleaning and validation functions for this try to be smart about by
+    accepting a list of countries as an argument in order to add the number
+    prefix.
+    When adding a property of this type to an entity, any country-type properties
+    defined for the entity are considered for validation. That means that adding a
+    phone number to an entity before adding a country can have a different
+    validation outcome from doing the two operations the other way around. Always
+    define the country first."""
+    name = "phone"
+    group = "phones"
+    label = _("Phone number")
+    plural = _("Phone numbers")
+    matchable = True
+    pivot = True
+    max_length = 64
+    def _clean_countries(
+        self, proxy: Optional["EntityProxy"]
+    ) -> Iterable[Optional[str]]:
+        yield None
+        if proxy is not None:
+            for country in proxy.countries:
+                yield country.upper()
+    def _parse_number(
+        self, number: str, proxy: Optional["EntityProxy"] = None
+    ) -> Iterable[PhoneNumber]:
         """Parse a phone number and return in international format.
         If no valid phone number can be detected, None is returned. If
@@ -34,24 +57,59 @@ class PhoneType(PropertyType):
         https://github.com/daviddrysdale/python-phonenumbers
         """
-        for code in self._clean_countries(countries, country):
+        for code in self._clean_countries(proxy):
             try:
-                num = parse_number(number, code)
-                if is_possible_number(num):
-                    if is_valid_number(num):
-                        return format_number(num, PhoneNumberFormat.E164)
+                yield parse_number(number, code)
             except NumberParseException:
                 pass
-    def specificity(self, value):
-        return 1
+    def validate(
+        self, value: str, fuzzy: bool = False, format: Optional[str] = None
+    ) -> bool:
+        for num in self._parse_number(value):
+            if is_valid_number(num):
+                return True
+        return False
+    def clean_text(
+        self,
+        text: str,
+        fuzzy: bool = False,
+        format: Optional[str] = None,
+        proxy: Optional["EntityProxy"] = None,
+    ) -> Optional[str]:
+        for num in self._parse_number(text, proxy=proxy):
+            if is_valid_number(num):
+                return str(format_number(num, PhoneNumberFormat.E164))
+        return None
-    def country_hint(self, value):
+    def country_hint(self, value: str) -> Optional[str]:
         try:
             number = parse_number(value)
-            return geocoder.region_code_for_number(number).lower()
+            code = region_code_for_number(number)
+            if code is None:
+                return None
+            return str(code).lower()
         except NumberParseException:
-                pass
+            return None
+    def _specificity(self, value: str) -> float:
+        # TODO: insert artificial intelligence here.
+        return dampen(7, 11, value)
+    def rdf(self, value: str) -> Identifier:
+        node_id = self.node_id(value)
+        if node_id is not None:
+            return URIRef(node_id)
+        raise ValueError("Invalid phone number for serialisation: %s" % value)
+    def node_id(self, value: str) -> Optional[str]:
+        return f"tel:{value}"
-    def rdf(self, value):
-        return URIRef('tel:%s' % value)
+    def caption(self, value: str) -> str:
+        try:
+            number = parse_number(value)
+            formatted = format_number(number, PhoneNumberFormat.INTERNATIONAL)
+            return str(formatted)
+        except NumberParseException:
+            return value

followthemoney/types/registry.py ADDED Viewed

@@ -0,0 +1,52 @@
+from banal import ensure_list
+from typing import Iterable, Set, Dict, Type, Union, List, Optional
+from followthemoney.types.common import PropertyType
+class Registry(object):
+    """This registry keeps the processing helpers for all property types
+    in the system. They are instantiated as singletons when the system is first
+    loaded. The registry can be used to get a type, which can itself then
+    clean, validate or format values of that type."""
+    def __init__(self) -> None:
+        self.named: Dict[str, PropertyType] = {}
+        self.matchable: Set[PropertyType] = set()
+        self.types: Set[PropertyType] = set()
+        self.groups: Dict[str, PropertyType] = {}
+        self.pivots: Set[PropertyType] = set()
+    def add(self, clazz: Type[PropertyType]) -> None:
+        """Add a singleton class."""
+        type_ = clazz()
+        self.named[clazz.name] = type_
+        self.types.add(type_)
+        if type_.matchable:
+            self.matchable.add(type_)
+        if type_.pivot:
+            self.pivots.add(type_)
+        if type_.group is not None:
+            self.groups[type_.group] = type_
+    def get(self, name: Union[str, PropertyType]) -> Optional[PropertyType]:
+        """For a given property type name, get its type object. This can also
+        be used via getattr, e.g. ``registry.phone``."""
+        # Allow transparent re-checking.
+        if isinstance(name, PropertyType):
+            return name
+        return self.named.get(name)
+    def get_types(
+        self, names: Iterable[Union[str, PropertyType]]
+    ) -> List[PropertyType]:
+        """Get a list of all type names."""
+        names = ensure_list(names)
+        types = [self.get(n) for n in names]
+        return [t for t in types if t is not None]
+    def __getitem__(self, name: str) -> PropertyType:
+        return self.named[name]
+    def __getattr__(self, name: str) -> PropertyType:
+        return self.named[name]

followthemoney/types/string.py ADDED Viewed

@@ -0,0 +1,43 @@
+from followthemoney.types.common import PropertyType
+from followthemoney.util import defer as _
+from followthemoney.util import MEGABYTE
+class StringType(PropertyType):
+    """A simple string property with no additional semantics."""
+    name = "string"
+    label = _("Label")
+    plural = _("Labels")
+    matchable = False
+    max_length = 1024
+    def node_id(self, value: str) -> None:
+        return None
+class TextType(StringType):
+    """Longer text fragments, such as descriptions or document text. Unlike
+    string properties, it might make sense to treat properties of this type as
+    full-text search material."""
+    name = "text"
+    label = _("Text")
+    plural = _("Texts")
+    total_size = 30 * MEGABYTE
+    max_length = 65000
+class HTMLType(StringType):
+    """Properties that contain raw hypertext markup (HTML).
+    User interfaces rendering properties of this type need to take extreme
+    care not to allow attacks such as cross-site scripting. It is recommended
+    to perform server-side sanitisation, or to not render this property at all.
+    """
+    name = "html"
+    label = _("HTML")
+    plural = _("HTMLs")
+    total_size = 30 * MEGABYTE
+    max_length = 65000

followthemoney/types/topic.py ADDED Viewed

@@ -0,0 +1,94 @@
+from babel.core import Locale
+from followthemoney.types.common import EnumType, EnumValues
+from followthemoney.rdf import URIRef, Identifier
+from followthemoney.util import gettext, defer as _
+class TopicType(EnumType):
+    """Topics define a controlled vocabulary of terms applicable to some
+    entities, such as companies and people. They describe categories of
+    journalistic interest which may apply to the given entity, for example
+    if a given person is a criminal or a politician.
+    Besides the informative value, topics are ultimately supposed to bear
+    fruits in the context of graph-based data analysis, where they would
+    enable queries such as _find all paths between a government procurement
+    award and a politician_."""
+    name = "topic"
+    group = "topics"
+    label = _("Topic")
+    plural = _("Topics")
+    matchable = False
+    max_length = 64
+    _TOPICS = {
+        "crime": _("Crime"),
+        "crime.fraud": _("Fraud"),
+        "crime.cyber": _("Cybercrime"),
+        "crime.fin": _("Financial crime"),
+        "crime.env": _("Environmental violations"),
+        "crime.theft": _("Theft"),
+        "crime.war": _("War crimes"),
+        "crime.boss": _("Criminal leadership"),
+        "crime.terror": _("Terrorism"),
+        "crime.traffick": _("Trafficking"),
+        "crime.traffick.drug": _("Drug trafficking"),
+        "crime.traffick.human": _("Human trafficking"),
+        "wanted": _("Wanted"),
+        "corp.offshore": _("Offshore"),
+        "corp.shell": _("Shell company"),
+        "corp.public": _("Public listed company"),
+        "corp.disqual": _("Disqualified"),
+        "gov": _("Government"),
+        "gov.national": _("National government"),
+        "gov.state": _("State government"),
+        "gov.muni": _("Municipal government"),
+        "gov.soe": _("State-owned enterprise"),
+        "gov.igo": _("Intergovernmental organization"),
+        "gov.head": _("Head of government or state"),
+        "gov.admin": _("Civil service"),
+        "gov.executive": _("Executive branch of government"),
+        "gov.legislative": _("Legislative branch of government"),
+        "gov.judicial": _("Judicial branch of government"),
+        "gov.security": _("Security services"),
+        "gov.financial": _("Central banking and financial integrity"),
+        "fin": _("Financial services"),
+        "fin.bank": _("Bank"),
+        "fin.fund": _("Fund"),
+        "fin.adivsor": _("Financial advisor"),
+        "reg.action": _("Regulator action"),
+        "reg.warn": _("Regulator warning"),
+        "role.pep": _("Politician"),
+        "role.pol": _("Non-PEP"),
+        "role.rca": _("Close Associate"),
+        "role.judge": _("Judge"),
+        "role.civil": _("Civil servant"),
+        "role.diplo": _("Diplomat"),
+        "role.lawyer": _("Lawyer"),
+        "role.acct": _("Accountant"),
+        "role.spy": _("Spy"),
+        "role.oligarch": _("Oligarch"),
+        "role.journo": _("Journalist"),
+        "role.act": _("Activist"),
+        "role.lobby": _("Lobbyist"),
+        "pol.party": _("Political party"),
+        "pol.union": _("Union"),
+        "rel": _("Religion"),
+        "mil": _("Military"),
+        "asset.frozen": _("Frozen asset"),
+        "sanction": _("Sanctioned entity"),
+        "sanction.linked": _("Sanction-linked entity"),
+        "sanction.counter": _("Counter-sanctioned entity"),
+        "export.control": _("Export controlled"),
+        "export.risk": _("Trade risk"),
+        "debarment": _("Debarred entity"),
+        "poi": _("Person of interest"),
+    }
+    def _locale_names(self, locale: Locale) -> EnumValues:
+        return {k: gettext(v) for (k, v) in self._TOPICS.items()}
+    def rdf(self, value: str) -> Identifier:
+        return URIRef(f"ftm:topic:{value}")

followthemoney/types/url.py CHANGED Viewed

@@ -1,27 +1,49 @@
-from rdflib import URIRef
-from urlnormalizer import normalize_url, is_valid_url
+from typing import Optional, TYPE_CHECKING
+from rigour.urls import clean_url, compare_urls
 from followthemoney.types.common import PropertyType
+from followthemoney.rdf import URIRef, Identifier
+from followthemoney.util import dampen, defer as _
+if TYPE_CHECKING:
+    from followthemoney.proxy import EntityProxy
 class UrlType(PropertyType):
-    name = 'url'
-    group = 'urls'
-    prefix = 'url'
+    """A uniform resource locator (URL). This will perform some normalisation
+    on the URL so that it's sure to be using valid encoding/quoting, and to
+    make sure the URL has a schema (e.g. `http`, `https`, ...)."""
+    SCHEMES = ("http", "https", "ftp", "mailto")
+    DEFAULT_SCHEME = "http"
-    def validate(self, url, **kwargs):
-        """Check if `url` is a valid URL."""
-        return is_valid_url(url)
+    name = "url"
+    group = "urls"
+    label = _("URL")
+    plural = _("URLs")
+    matchable = True
+    pivot = True
+    max_length = 4096
-    def clean_text(self, url, **kwargs):
-        """Perform intensive care on URLs, see `urlnormalizer`."""
-        try:
-            return normalize_url(url)
-        except UnicodeDecodeError:
-            return None
+    def clean_text(
+        self,
+        text: str,
+        fuzzy: bool = False,
+        format: Optional[str] = None,
+        proxy: Optional["EntityProxy"] = None,
+    ) -> Optional[str]:
+        """Perform intensive care on URLs to make sure they have a scheme
+        and a host name. If no scheme is given HTTP is assumed."""
+        return clean_url(text)
-    def specificity(self, value):
-        return 1
+    def compare(self, left: str, right: str) -> float:
+        return compare_urls(left, right)
-    def rdf(self, value):
+    def _specificity(self, value: str) -> float:
+        return dampen(10, 120, value)
+    def rdf(self, value: str) -> Identifier:
         return URIRef(value)
+    def node_id(self, value: str) -> Optional[str]:
+        return f"url:{value}"

followthemoney/util.py CHANGED Viewed

@@ -1,63 +1,157 @@
 import os
-from threading import local
-from normality import stringify
+import logging
+from hashlib import sha1
 from babel import Locale
 from gettext import translation
-from rdflib import Namespace
-from banal import is_mapping, is_sequence
-from banal import unique_list, ensure_list
-NAMESPACE = Namespace('https://w3id.org/ftm#')
-DEFAULT_LOCALE = 'en'
-i18n_path = os.path.join(os.path.dirname(__file__), 'translations')
+from threading import local
+from typing import cast, Dict, Any, List, Optional, TypeVar, Union, Sequence
+from normality import stringify
+from normality.cleaning import compose_nfc
+from normality.cleaning import remove_unsafe_chars
+from normality.encoding import DEFAULT_ENCODING
+from banal import is_mapping, unique_list, ensure_list
+MEGABYTE = 1024 * 1024
+DEFAULT_LOCALE = "en"
+ENTITY_ID_LEN = 200
+T = TypeVar("T")
+K = TypeVar("K")
+V = TypeVar("V")
+PathLike = Union[str, os.PathLike[str]]
+i18n_path = os.path.join(os.path.dirname(__file__), "translations")
 state = local()
+log = logging.getLogger(__name__)
+def gettext(*args: Optional[str], **kwargs: Dict[str, str]) -> str:
+    if not hasattr(state, "translation"):
+        set_model_locale(Locale.parse(DEFAULT_LOCALE))
+    return cast(str, state.translation.gettext(*args, **kwargs))
-def gettext(*args, **kwargs):
-    if not hasattr(state, 'translation'):
-        set_model_locale(DEFAULT_LOCALE)
-    return state.translation.gettext(*args, **kwargs)
+def defer(text: str) -> str:
+    return text
-def set_model_locale(locale):
+def set_model_locale(locale: Locale) -> None:
     state.locale = locale
-    state.translation = translation('followthemoney', i18n_path, [locale],
-                                    fallback=True)
+    state.translation = translation(
+        "followthemoney", i18n_path, [str(locale)], fallback=True
+    )
+def get_locale() -> Locale:
+    if not hasattr(state, "locale"):
+        return Locale.parse(DEFAULT_LOCALE)
+    return Locale.parse(state.locale)
+def get_env_list(name: str, default: List[str] = []) -> List[str]:
+    value = stringify(os.environ.get(name))
+    if value is not None:
+        values = value.split(":")
+        if len(values):
+            return values
+    return default
+def sanitize_text(text: Any, encoding: str = DEFAULT_ENCODING) -> Optional[str]:
+    text = stringify(text, encoding_default=encoding)
+    if text is None:
+        return None
+    try:
+        text = compose_nfc(text)
+    except (SystemError, Exception) as ex:
+        log.warning("Cannot NFC text: %s", ex)
+        return None
+    text = remove_unsafe_chars(text)
+    if text is None:
+        return None
+    byte_text = text.encode(DEFAULT_ENCODING, "replace")
+    return cast(str, byte_text.decode(DEFAULT_ENCODING, "replace"))
-def get_locale():
-    if not hasattr(state, 'locale'):
-        return Locale(DEFAULT_LOCALE)
-    return Locale(state.locale)
+def value_list(value: Union[T, Sequence[T]]) -> List[T]:
+    if not isinstance(value, (str, bytes)):
+        try:
+            return [v for v in cast(Sequence[T], value)]
+        except TypeError:
+            pass
+    return [cast(T, value)]
-def key_bytes(key):
+def key_bytes(key: Any) -> bytes:
     """Convert the given data to a value appropriate for hashing."""
-    key = stringify(key) or ''
-    return key.encode('utf-8')
-def merge_data(old, new):
-    """Extend the values of the new doc with extra values from the old."""
-    if is_sequence(old) or is_sequence(new):
-        new = ensure_list(new)
-        new.extend(ensure_list(old))
-        return unique_list(new)
-    if is_mapping(old) or is_mapping(new):
-        old = old if is_mapping(old) else {}
-        new = new if is_mapping(new) else {}
-        keys = set(new.keys())
-        keys.update(old.keys())
-        combined = {}
-        for key in keys:
-            value = merge_data(old.get(key), new.get(key))
-            if value is not None:
-                combined[key] = value
-        return combined
-    return new or old
-def dampen(short, long, text):
+    if isinstance(key, bytes):
+        return key
+    text = stringify(key)
+    if text is None:
+        return b""
+    return text.encode("utf-8")
+def join_text(*parts: Any, sep: str = " ") -> Optional[str]:
+    """Join all the non-null arguments using sep."""
+    texts: List[str] = []
+    for part in parts:
+        text = stringify(part)
+        if text is not None:
+            texts.append(text)
+    if not len(texts):
+        return None
+    return sep.join(texts)
+def get_entity_id(obj: Any) -> Optional[str]:
+    """Given an entity-ish object, try to get the ID."""
+    if is_mapping(obj):
+        obj = obj.get("id")
+    else:
+        try:
+            obj = obj.id
+        except AttributeError:
+            pass
+    return stringify(obj)
+def make_entity_id(*parts: Any, key_prefix: Optional[str] = None) -> Optional[str]:
+    digest = sha1()
+    if key_prefix:
+        digest.update(key_bytes(key_prefix))
+    base = digest.digest()
+    for part in parts:
+        digest.update(key_bytes(part))
+    if digest.digest() == base:
+        return None
+    return digest.hexdigest()
+def merge_context(left: Dict[K, V], right: Dict[K, V]) -> Dict[K, List[V]]:
+    """When merging two entities, make lists of all the duplicate context
+    keys."""
+    combined = {}
+    keys = [*left.keys(), *right.keys()]
+    for key in set(keys):
+        if key in ("caption",):
+            continue
+        lval: List[V] = [i for i in ensure_list(left.get(key)) if i is not None]
+        rval: List[V] = [i for i in ensure_list(right.get(key)) if i is not None]
+        combined[key] = unique_list([*lval, *rval])
+    return combined
+def dampen(short: int, long: int, text: str) -> float:
     length = len(text) - short
     baseline = max(1.0, (long - short))
     return max(0, min(1.0, (length / baseline)))
+def shortest(*texts: str) -> str:
+    return min(texts, key=len)
+def longest(*texts: str) -> str:
+    return max(texts, key=len)

followthemoney 1.3.6__py3-none-any.whl → 3.8.0__py3-none-any.whl

followthemoney 1.3.6py3-none-any.whl → 3.8.0py3-none-any.whl