followthemoney 1.3.7__py3-none-any.whl → 3.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- followthemoney/__init__.py +5 -3
- followthemoney/cli/__init__.py +17 -0
- followthemoney/cli/aggregate.py +56 -0
- followthemoney/cli/cli.py +88 -0
- followthemoney/cli/exports.py +121 -0
- followthemoney/cli/mapping.py +85 -0
- followthemoney/cli/sieve.py +67 -0
- followthemoney/cli/util.py +142 -0
- followthemoney/compare.py +130 -60
- followthemoney/exc.py +19 -6
- followthemoney/export/common.py +29 -0
- followthemoney/export/csv.py +82 -0
- followthemoney/export/excel.py +75 -0
- followthemoney/export/graph.py +79 -0
- followthemoney/export/neo4j.py +182 -0
- followthemoney/export/rdf.py +26 -0
- followthemoney/graph.py +308 -0
- followthemoney/helpers.py +212 -0
- followthemoney/mapping/__init__.py +1 -1
- followthemoney/mapping/csv.py +67 -35
- followthemoney/mapping/entity.py +116 -44
- followthemoney/mapping/property.py +90 -44
- followthemoney/mapping/query.py +27 -19
- followthemoney/mapping/source.py +15 -5
- followthemoney/mapping/sql.py +75 -61
- followthemoney/messages.py +13 -7
- followthemoney/model.py +108 -56
- followthemoney/namespace.py +119 -0
- followthemoney/offshore.py +48 -0
- followthemoney/ontology.py +77 -0
- followthemoney/property.py +204 -71
- followthemoney/proxy.py +455 -118
- followthemoney/rdf.py +9 -0
- followthemoney/schema/Address.yaml +78 -0
- followthemoney/schema/Airplane.yaml +17 -10
- followthemoney/schema/Analyzable.yaml +54 -0
- followthemoney/schema/Article.yaml +16 -0
- followthemoney/schema/Assessment.yaml +32 -0
- followthemoney/schema/Asset.yaml +10 -4
- followthemoney/schema/Associate.yaml +41 -0
- followthemoney/schema/Audio.yaml +24 -0
- followthemoney/schema/BankAccount.yaml +53 -9
- followthemoney/schema/Call.yaml +48 -0
- followthemoney/schema/CallForTenders.yaml +117 -0
- followthemoney/schema/Company.yaml +37 -12
- followthemoney/schema/Contract.yaml +41 -7
- followthemoney/schema/ContractAward.yaml +30 -11
- followthemoney/schema/CourtCase.yaml +16 -10
- followthemoney/schema/CourtCaseParty.yaml +17 -6
- followthemoney/schema/CryptoWallet.yaml +48 -0
- followthemoney/schema/Debt.yaml +37 -0
- followthemoney/schema/Directorship.yaml +17 -4
- followthemoney/schema/Document.yaml +72 -139
- followthemoney/schema/Documentation.yml +38 -0
- followthemoney/schema/EconomicActivity.yaml +32 -17
- followthemoney/schema/Email.yaml +76 -0
- followthemoney/schema/Employment.yaml +39 -0
- followthemoney/schema/Event.yaml +35 -3
- followthemoney/schema/Family.yaml +41 -0
- followthemoney/schema/Folder.yaml +13 -0
- followthemoney/schema/HyperText.yaml +21 -0
- followthemoney/schema/Identification.yaml +40 -0
- followthemoney/schema/Image.yaml +25 -0
- followthemoney/schema/Interest.yaml +3 -6
- followthemoney/schema/Interval.yaml +56 -5
- followthemoney/schema/LegalEntity.yaml +81 -20
- followthemoney/schema/License.yaml +7 -3
- followthemoney/schema/Membership.yaml +19 -4
- followthemoney/schema/Mention.yaml +54 -0
- followthemoney/schema/Message.yaml +78 -0
- followthemoney/schema/Note.yaml +23 -0
- followthemoney/schema/Occupancy.yaml +44 -0
- followthemoney/schema/Organization.yaml +38 -3
- followthemoney/schema/Ownership.yaml +16 -4
- followthemoney/schema/Package.yaml +17 -0
- followthemoney/schema/Page.yaml +43 -0
- followthemoney/schema/Pages.yaml +23 -0
- followthemoney/schema/Passport.yaml +16 -17
- followthemoney/schema/Payment.yaml +38 -7
- followthemoney/schema/Person.yaml +61 -5
- followthemoney/schema/PlainText.yaml +17 -0
- followthemoney/schema/Position.yaml +50 -0
- followthemoney/schema/Post.yaml +42 -0
- followthemoney/schema/Project.yaml +27 -0
- followthemoney/schema/ProjectParticipant.yaml +36 -0
- followthemoney/schema/PublicBody.yaml +14 -3
- followthemoney/schema/RealEstate.yaml +19 -3
- followthemoney/schema/Representation.yaml +17 -6
- followthemoney/schema/Sanction.yaml +45 -21
- followthemoney/schema/Security.yaml +59 -0
- followthemoney/schema/Similar.yaml +37 -0
- followthemoney/schema/Succession.yaml +36 -0
- followthemoney/schema/Table.yaml +32 -0
- followthemoney/schema/TaxRoll.yaml +27 -9
- followthemoney/schema/Thing.yaml +69 -13
- followthemoney/schema/Trip.yaml +42 -0
- followthemoney/schema/UnknownLink.yaml +17 -6
- followthemoney/schema/UserAccount.yaml +44 -0
- followthemoney/schema/Value.yaml +5 -1
- followthemoney/schema/Vehicle.yaml +25 -8
- followthemoney/schema/Vessel.yaml +18 -10
- followthemoney/schema/Video.yaml +20 -0
- followthemoney/schema/Workbook.yaml +18 -0
- followthemoney/schema.py +436 -135
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
- followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
- followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
- followthemoney/translations/fr/followthemoney.po +3861 -0
- followthemoney/translations/messages.pot +3021 -725
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
- followthemoney/translations/ru/followthemoney.po +4221 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
- followthemoney/types/__init__.py +35 -17
- followthemoney/types/address.py +50 -21
- followthemoney/types/checksum.py +25 -0
- followthemoney/types/common.py +233 -88
- followthemoney/types/country.py +50 -56
- followthemoney/types/date.py +59 -76
- followthemoney/types/email.py +66 -35
- followthemoney/types/entity.py +66 -13
- followthemoney/types/gender.py +66 -0
- followthemoney/types/iban.py +47 -28
- followthemoney/types/identifier.py +49 -22
- followthemoney/types/ip.py +35 -21
- followthemoney/types/json.py +58 -0
- followthemoney/types/language.py +124 -37
- followthemoney/types/mimetype.py +44 -0
- followthemoney/types/name.py +56 -12
- followthemoney/types/number.py +30 -0
- followthemoney/types/phone.py +92 -34
- followthemoney/types/registry.py +52 -0
- followthemoney/types/string.py +43 -0
- followthemoney/types/topic.py +94 -0
- followthemoney/types/url.py +39 -17
- followthemoney/util.py +139 -45
- followthemoney-3.8.1.dist-info/METADATA +153 -0
- followthemoney-3.8.1.dist-info/RECORD +157 -0
- {followthemoney-1.3.7.dist-info → followthemoney-3.8.1.dist-info}/WHEEL +1 -2
- followthemoney-3.8.1.dist-info/entry_points.txt +17 -0
- followthemoney-1.3.7.dist-info/LICENSE.txt → followthemoney-3.8.1.dist-info/licenses/LICENSE +1 -1
- followthemoney/link.py +0 -75
- followthemoney/schema/Associate.yml +0 -19
- followthemoney/schema/Family.yml +0 -19
- followthemoney/schema/Land.yml +0 -9
- followthemoney/schema/Relationship.yaml +0 -26
- followthemoney/types/domain.py +0 -50
- followthemoney-1.3.7.dist-info/DESCRIPTION.rst +0 -3
- followthemoney-1.3.7.dist-info/METADATA +0 -39
- followthemoney-1.3.7.dist-info/RECORD +0 -108
- followthemoney-1.3.7.dist-info/entry_points.txt +0 -3
- followthemoney-1.3.7.dist-info/metadata.json +0 -1
- followthemoney-1.3.7.dist-info/namespace_packages.txt +0 -1
- followthemoney-1.3.7.dist-info/top_level.txt +0 -3
- ns/ontology.py +0 -128
- tests/types/test_addresses.py +0 -24
- tests/types/test_common.py +0 -32
- tests/types/test_countries.py +0 -27
- tests/types/test_dates.py +0 -73
- tests/types/test_domains.py +0 -23
- tests/types/test_emails.py +0 -32
- tests/types/test_entity.py +0 -19
- tests/types/test_iban.py +0 -109
- tests/types/test_identifiers.py +0 -27
- tests/types/test_ip.py +0 -29
- tests/types/test_languages.py +0 -23
- tests/types/test_names.py +0 -33
- tests/types/test_phones.py +0 -24
- tests/types/test_registry.py +0 -14
- tests/types/test_urls.py +0 -23
- {ns → followthemoney/export}/__init__.py +0 -0
- /tests/types/__init__.py → /followthemoney/py.typed +0 -0
followthemoney/types/ip.py
CHANGED
|
@@ -1,36 +1,50 @@
|
|
|
1
|
-
from
|
|
2
|
-
from normality import stringify
|
|
1
|
+
from typing import Optional, TYPE_CHECKING
|
|
3
2
|
from ipaddress import ip_address
|
|
4
3
|
|
|
5
4
|
from followthemoney.types.common import PropertyType
|
|
5
|
+
from followthemoney.rdf import URIRef, Identifier
|
|
6
|
+
from followthemoney.util import defer as _
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from followthemoney.proxy import EntityProxy
|
|
6
10
|
|
|
7
11
|
|
|
8
12
|
class IpType(PropertyType):
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
+
"""Internet protocol addresses. This supports both addresses used
|
|
14
|
+
by the protocol versions 4 (e.g. `192.168.1.143`) and 6
|
|
15
|
+
(e.g. `0:0:0:0:0:ffff:c0a8:18f`)."""
|
|
16
|
+
|
|
17
|
+
name = "ip"
|
|
18
|
+
group = "ips"
|
|
19
|
+
label = _("IP-Address")
|
|
20
|
+
plural = _("IP-Addresses")
|
|
21
|
+
matchable = True
|
|
22
|
+
pivot = True
|
|
23
|
+
max_length = 64
|
|
13
24
|
|
|
14
|
-
def validate(
|
|
25
|
+
def validate(
|
|
26
|
+
self, value: str, fuzzy: bool = False, format: Optional[str] = None
|
|
27
|
+
) -> bool:
|
|
15
28
|
"""Check to see if this is a valid ip address."""
|
|
16
29
|
try:
|
|
17
|
-
ip_address(
|
|
30
|
+
ip_address(value)
|
|
18
31
|
return True
|
|
19
32
|
except ValueError:
|
|
20
33
|
return False
|
|
21
34
|
|
|
22
|
-
def
|
|
35
|
+
def clean_text(
|
|
36
|
+
self,
|
|
37
|
+
text: str,
|
|
38
|
+
fuzzy: bool = False,
|
|
39
|
+
format: Optional[str] = None,
|
|
40
|
+
proxy: Optional["EntityProxy"] = None,
|
|
41
|
+
) -> Optional[str]:
|
|
23
42
|
"""Create a more clean, but still user-facing version of an
|
|
24
43
|
instance of the type."""
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def specificity(self, value):
|
|
33
|
-
return 1
|
|
34
|
-
|
|
35
|
-
def rdf(self, value):
|
|
36
|
-
return URIRef('ip:%s' % value)
|
|
44
|
+
try:
|
|
45
|
+
return str(ip_address(text))
|
|
46
|
+
except ValueError:
|
|
47
|
+
return None
|
|
48
|
+
|
|
49
|
+
def rdf(self, value: str) -> Identifier:
|
|
50
|
+
return URIRef(f"ip:{value}")
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Optional, Sequence, TYPE_CHECKING
|
|
3
|
+
from banal import ensure_list
|
|
4
|
+
|
|
5
|
+
from followthemoney.types.common import PropertyType
|
|
6
|
+
from followthemoney.util import sanitize_text, defer as _
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from followthemoney.proxy import EntityProxy
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class JsonType(PropertyType):
|
|
13
|
+
"""An encoded JSON object. This is used to store raw HTTP headers for documents
|
|
14
|
+
and some other edge cases. It's a really bad idea and we should try to get rid
|
|
15
|
+
of JSON properties."""
|
|
16
|
+
|
|
17
|
+
name = "json"
|
|
18
|
+
group = None
|
|
19
|
+
label = _("Nested data")
|
|
20
|
+
plural = _("Nested data")
|
|
21
|
+
matchable = False
|
|
22
|
+
|
|
23
|
+
def pack(self, obj: Any) -> Optional[str]:
|
|
24
|
+
"""Encode a given value to JSON."""
|
|
25
|
+
# TODO: use a JSON encoder that handles more types?
|
|
26
|
+
if obj is None:
|
|
27
|
+
return None
|
|
28
|
+
return json.dumps(obj)
|
|
29
|
+
|
|
30
|
+
def unpack(self, obj: str) -> Any:
|
|
31
|
+
"""Decode a given JSON object."""
|
|
32
|
+
try:
|
|
33
|
+
return json.loads(obj)
|
|
34
|
+
except Exception:
|
|
35
|
+
return obj
|
|
36
|
+
|
|
37
|
+
def clean(
|
|
38
|
+
self,
|
|
39
|
+
raw: Any,
|
|
40
|
+
fuzzy: bool = False,
|
|
41
|
+
format: Optional[str] = None,
|
|
42
|
+
proxy: Optional["EntityProxy"] = None,
|
|
43
|
+
) -> Optional[str]:
|
|
44
|
+
if not isinstance(raw, str):
|
|
45
|
+
return self.pack(raw)
|
|
46
|
+
else:
|
|
47
|
+
return sanitize_text(raw)
|
|
48
|
+
|
|
49
|
+
def join(self, values: Sequence[str]) -> str:
|
|
50
|
+
"""Turn multiple values into a JSON array."""
|
|
51
|
+
values = [self.unpack(v) for v in ensure_list(values)]
|
|
52
|
+
data = self.pack(values)
|
|
53
|
+
if data is None:
|
|
54
|
+
return "[]"
|
|
55
|
+
return data
|
|
56
|
+
|
|
57
|
+
def node_id(self, value: str) -> None:
|
|
58
|
+
return None
|
followthemoney/types/language.py
CHANGED
|
@@ -1,37 +1,124 @@
|
|
|
1
|
-
from
|
|
2
|
-
from
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
from followthemoney.
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
1
|
+
from typing import Optional, TYPE_CHECKING
|
|
2
|
+
from babel.core import Locale
|
|
3
|
+
from rigour.langs import iso_639_alpha3
|
|
4
|
+
|
|
5
|
+
from followthemoney.types.common import EnumType, EnumValues
|
|
6
|
+
from followthemoney.rdf import URIRef, Identifier
|
|
7
|
+
from followthemoney.util import defer as _, gettext
|
|
8
|
+
from followthemoney.util import get_env_list
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from followthemoney.proxy import EntityProxy
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LanguageType(EnumType):
|
|
15
|
+
"""A human written language. This list is arbitrarily limited for some
|
|
16
|
+
weird upstream technical reasons, but we'll happily accept pull requests
|
|
17
|
+
for additional languages once there is a specific need for them to be
|
|
18
|
+
supported."""
|
|
19
|
+
|
|
20
|
+
name = "language"
|
|
21
|
+
group = "languages"
|
|
22
|
+
label = _("Language")
|
|
23
|
+
plural = _("Languages")
|
|
24
|
+
matchable = False
|
|
25
|
+
max_length = 16
|
|
26
|
+
|
|
27
|
+
# Language whitelist
|
|
28
|
+
LANGUAGES = [
|
|
29
|
+
"eng",
|
|
30
|
+
"fra",
|
|
31
|
+
"deu",
|
|
32
|
+
"rus",
|
|
33
|
+
"spa",
|
|
34
|
+
"nld",
|
|
35
|
+
"ron",
|
|
36
|
+
"kat",
|
|
37
|
+
"ara",
|
|
38
|
+
"tur",
|
|
39
|
+
"ltz",
|
|
40
|
+
"ell",
|
|
41
|
+
"lit",
|
|
42
|
+
"ukr",
|
|
43
|
+
"zho",
|
|
44
|
+
"bel",
|
|
45
|
+
"bul",
|
|
46
|
+
"bos",
|
|
47
|
+
"jpn",
|
|
48
|
+
"ces",
|
|
49
|
+
"lav",
|
|
50
|
+
"por",
|
|
51
|
+
"pol",
|
|
52
|
+
"hye",
|
|
53
|
+
"hrv",
|
|
54
|
+
"hin",
|
|
55
|
+
"heb",
|
|
56
|
+
"uzb",
|
|
57
|
+
"mon",
|
|
58
|
+
"urd",
|
|
59
|
+
"sqi",
|
|
60
|
+
"kor",
|
|
61
|
+
"isl",
|
|
62
|
+
"ita",
|
|
63
|
+
"est",
|
|
64
|
+
"nor",
|
|
65
|
+
"fas",
|
|
66
|
+
"swa",
|
|
67
|
+
"slv",
|
|
68
|
+
"slk",
|
|
69
|
+
"aze",
|
|
70
|
+
"tgk",
|
|
71
|
+
"kaz",
|
|
72
|
+
"tuk",
|
|
73
|
+
"kir",
|
|
74
|
+
"hun",
|
|
75
|
+
"dan",
|
|
76
|
+
"afr",
|
|
77
|
+
"swe",
|
|
78
|
+
"srp",
|
|
79
|
+
"ind",
|
|
80
|
+
"kan",
|
|
81
|
+
"mkd",
|
|
82
|
+
"mlt",
|
|
83
|
+
"msa",
|
|
84
|
+
"fin",
|
|
85
|
+
"cat",
|
|
86
|
+
"nep",
|
|
87
|
+
"tgl",
|
|
88
|
+
"fil",
|
|
89
|
+
"mya",
|
|
90
|
+
"khm",
|
|
91
|
+
"cnr",
|
|
92
|
+
]
|
|
93
|
+
LANGUAGES = get_env_list("FTM_LANGUAGES", LANGUAGES)
|
|
94
|
+
LANGUAGES = [lang.lower().strip() for lang in LANGUAGES]
|
|
95
|
+
|
|
96
|
+
def _locale_names(self, locale: Locale) -> EnumValues:
|
|
97
|
+
names = {
|
|
98
|
+
"ara": gettext("Arabic"),
|
|
99
|
+
"nor": gettext("Norwegian"),
|
|
100
|
+
"cnr": gettext("Montenegrin"),
|
|
101
|
+
}
|
|
102
|
+
for lang in self.LANGUAGES:
|
|
103
|
+
if lang not in names:
|
|
104
|
+
names[lang] = lang
|
|
105
|
+
for code, label in locale.languages.items():
|
|
106
|
+
code = iso_639_alpha3(code)
|
|
107
|
+
if code in self.LANGUAGES and names[code] == code:
|
|
108
|
+
names[code] = label
|
|
109
|
+
return names
|
|
110
|
+
|
|
111
|
+
def clean_text(
|
|
112
|
+
self,
|
|
113
|
+
text: str,
|
|
114
|
+
fuzzy: bool = False,
|
|
115
|
+
format: Optional[str] = None,
|
|
116
|
+
proxy: Optional["EntityProxy"] = None,
|
|
117
|
+
) -> Optional[str]:
|
|
118
|
+
code = iso_639_alpha3(text)
|
|
119
|
+
if code not in self.LANGUAGES:
|
|
120
|
+
return None
|
|
121
|
+
return code
|
|
122
|
+
|
|
123
|
+
def rdf(self, value: str) -> Identifier:
|
|
124
|
+
return URIRef(f"iso-639:{value}")
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
from typing import Optional, TYPE_CHECKING
|
|
2
|
+
from rigour.mime import normalize_mimetype, parse_mimetype
|
|
3
|
+
from rigour.mime import DEFAULT
|
|
4
|
+
|
|
5
|
+
from followthemoney.types.common import PropertyType
|
|
6
|
+
from followthemoney.rdf import URIRef, Identifier
|
|
7
|
+
from followthemoney.util import defer as _
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from followthemoney.proxy import EntityProxy
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class MimeType(PropertyType):
|
|
14
|
+
"""A MIME media type are a specification of a content type on a network.
|
|
15
|
+
Each MIME type is assigned by IANA and consists of two parts: the type
|
|
16
|
+
and sub-type. Common examples are: `text/plain`, `application/json` and
|
|
17
|
+
`application/pdf`.
|
|
18
|
+
|
|
19
|
+
MIME type properties do not contain parameters as used in HTTP headers,
|
|
20
|
+
like `charset=UTF-8`."""
|
|
21
|
+
|
|
22
|
+
name = "mimetype"
|
|
23
|
+
group = "mimetypes"
|
|
24
|
+
label = _("MIME-Type")
|
|
25
|
+
plural = _("MIME-Types")
|
|
26
|
+
matchable = False
|
|
27
|
+
|
|
28
|
+
def clean_text(
|
|
29
|
+
self,
|
|
30
|
+
text: str,
|
|
31
|
+
fuzzy: bool = False,
|
|
32
|
+
format: Optional[str] = None,
|
|
33
|
+
proxy: Optional["EntityProxy"] = None,
|
|
34
|
+
) -> Optional[str]:
|
|
35
|
+
text = normalize_mimetype(text)
|
|
36
|
+
if text != DEFAULT:
|
|
37
|
+
return text
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
def rdf(self, value: str) -> Identifier:
|
|
41
|
+
return URIRef(f"urn:mimetype:{value}")
|
|
42
|
+
|
|
43
|
+
def caption(self, value: str) -> str:
|
|
44
|
+
return parse_mimetype(value).label or value
|
followthemoney/types/name.py
CHANGED
|
@@ -1,24 +1,68 @@
|
|
|
1
|
-
from
|
|
1
|
+
from typing import TYPE_CHECKING, Optional, Sequence
|
|
2
|
+
from normality import slugify
|
|
2
3
|
from normality.cleaning import collapse_spaces, strip_quotes
|
|
4
|
+
from rigour.env import MAX_NAME_LENGTH
|
|
5
|
+
from rigour.names import pick_name
|
|
6
|
+
from rigour.text.distance import levenshtein_similarity
|
|
7
|
+
from fingerprints.cleanup import clean_name_light
|
|
3
8
|
|
|
4
9
|
from followthemoney.types.common import PropertyType
|
|
5
10
|
from followthemoney.util import dampen
|
|
11
|
+
from followthemoney.util import defer as _
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from followthemoney.proxy import EntityProxy
|
|
6
15
|
|
|
7
16
|
|
|
8
17
|
class NameType(PropertyType):
|
|
9
|
-
name
|
|
10
|
-
|
|
11
|
-
|
|
18
|
+
"""A name used for a person or company. This is assumed to be as complete
|
|
19
|
+
a name as available - when a first name, family name or patronymic are given
|
|
20
|
+
separately, these are stored to string-type properties instead.
|
|
21
|
+
|
|
22
|
+
No validation rules apply, and things having multiple names must be considered
|
|
23
|
+
a perfectly ordinary case."""
|
|
24
|
+
|
|
25
|
+
name = "name"
|
|
26
|
+
group = "names"
|
|
27
|
+
label = _("Name")
|
|
28
|
+
plural = _("Names")
|
|
29
|
+
matchable = True
|
|
30
|
+
pivot = True
|
|
31
|
+
max_length = MAX_NAME_LENGTH
|
|
12
32
|
|
|
13
|
-
def clean_text(
|
|
33
|
+
def clean_text(
|
|
34
|
+
self,
|
|
35
|
+
text: str,
|
|
36
|
+
fuzzy: bool = False,
|
|
37
|
+
format: Optional[str] = None,
|
|
38
|
+
proxy: Optional["EntityProxy"] = None,
|
|
39
|
+
) -> Optional[str]:
|
|
14
40
|
"""Basic clean-up."""
|
|
15
|
-
name = strip_quotes(
|
|
16
|
-
|
|
17
|
-
return name
|
|
41
|
+
name = strip_quotes(text)
|
|
42
|
+
return collapse_spaces(name)
|
|
18
43
|
|
|
19
|
-
def
|
|
44
|
+
def pick(self, values: Sequence[str]) -> Optional[str]:
|
|
45
|
+
"""From a set of names, pick the most plausible user-facing one."""
|
|
46
|
+
return pick_name(list(values))
|
|
47
|
+
|
|
48
|
+
def _specificity(self, value: str) -> float:
|
|
20
49
|
# TODO: insert artificial intelligence here.
|
|
21
|
-
return dampen(3, 50, value)
|
|
50
|
+
return dampen(3, 50, value)
|
|
51
|
+
|
|
52
|
+
def compare(self, left: str, right: str) -> float:
|
|
53
|
+
"""Compare two names for similarity."""
|
|
54
|
+
left_clean = clean_name_light(left)
|
|
55
|
+
right_clean = clean_name_light(right)
|
|
56
|
+
if left_clean is None or right_clean is None:
|
|
57
|
+
return 0.0
|
|
58
|
+
return levenshtein_similarity(
|
|
59
|
+
left_clean,
|
|
60
|
+
right_clean,
|
|
61
|
+
max_length=self.max_length,
|
|
62
|
+
)
|
|
22
63
|
|
|
23
|
-
def
|
|
24
|
-
|
|
64
|
+
def node_id(self, value: str) -> Optional[str]:
|
|
65
|
+
slug = slugify(value)
|
|
66
|
+
if slug is None:
|
|
67
|
+
return None
|
|
68
|
+
return f"name:{slug}"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from followthemoney.types.common import PropertyType
|
|
5
|
+
from followthemoney.util import defer as _
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class NumberType(PropertyType):
|
|
9
|
+
"""A numeric value, like the size of a piece of land, or the value of a
|
|
10
|
+
contract. Since all property values in FtM are strings, this is also a
|
|
11
|
+
string and there is no specified format (e.g. `1,000.00` vs. `1.000,00`).
|
|
12
|
+
|
|
13
|
+
In the future we might want to enable annotations for format, units, or
|
|
14
|
+
even to introduce a separate property type for monetary values."""
|
|
15
|
+
|
|
16
|
+
CAST_RE = re.compile(r"[^0-9\-\.]")
|
|
17
|
+
name = "number"
|
|
18
|
+
label = _("Number")
|
|
19
|
+
plural = _("Numbers")
|
|
20
|
+
matchable = False
|
|
21
|
+
|
|
22
|
+
def node_id(self, value: str) -> None:
|
|
23
|
+
return None
|
|
24
|
+
|
|
25
|
+
def to_number(self, value: str) -> Optional[float]:
|
|
26
|
+
try:
|
|
27
|
+
value = self.CAST_RE.sub("", value)
|
|
28
|
+
return float(value)
|
|
29
|
+
except Exception:
|
|
30
|
+
return None
|
followthemoney/types/phone.py
CHANGED
|
@@ -1,31 +1,54 @@
|
|
|
1
|
-
from
|
|
2
|
-
from banal import ensure_list
|
|
3
|
-
from phonenumbers import geocoder
|
|
1
|
+
from typing import Iterable, Optional, TYPE_CHECKING
|
|
4
2
|
from phonenumbers import parse as parse_number
|
|
5
|
-
from phonenumbers import
|
|
6
|
-
from phonenumbers import PhoneNumberFormat
|
|
7
|
-
from phonenumbers.phonenumberutil import NumberParseException
|
|
3
|
+
from phonenumbers import is_valid_number, format_number
|
|
4
|
+
from phonenumbers import PhoneNumber, PhoneNumberFormat
|
|
5
|
+
from phonenumbers.phonenumberutil import region_code_for_number, NumberParseException
|
|
8
6
|
|
|
9
7
|
from followthemoney.types.common import PropertyType
|
|
8
|
+
from followthemoney.rdf import URIRef, Identifier
|
|
9
|
+
from followthemoney.util import defer as _
|
|
10
|
+
from followthemoney.util import dampen
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from followthemoney.proxy import EntityProxy
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# TODO: for json schema export
|
|
17
|
+
# https://stackoverflow.com/questions/6478875/regular-expression-matching-e-164-formatted-phone-numbers
|
|
10
18
|
|
|
11
19
|
|
|
12
20
|
class PhoneType(PropertyType):
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
21
|
+
"""A phone number in E.164 format. This means that phone numbers always
|
|
22
|
+
include an international country prefix (e.g. `+38760183628`). The
|
|
23
|
+
cleaning and validation functions for this try to be smart about by
|
|
24
|
+
accepting a list of countries as an argument in order to add the number
|
|
25
|
+
prefix.
|
|
26
|
+
|
|
27
|
+
When adding a property of this type to an entity, any country-type properties
|
|
28
|
+
defined for the entity are considered for validation. That means that adding a
|
|
29
|
+
phone number to an entity before adding a country can have a different
|
|
30
|
+
validation outcome from doing the two operations the other way around. Always
|
|
31
|
+
define the country first."""
|
|
32
|
+
|
|
33
|
+
name = "phone"
|
|
34
|
+
group = "phones"
|
|
35
|
+
label = _("Phone number")
|
|
36
|
+
plural = _("Phone numbers")
|
|
37
|
+
matchable = True
|
|
38
|
+
pivot = True
|
|
39
|
+
max_length = 64
|
|
40
|
+
|
|
41
|
+
def _clean_countries(
|
|
42
|
+
self, proxy: Optional["EntityProxy"]
|
|
43
|
+
) -> Iterable[Optional[str]]:
|
|
44
|
+
yield None
|
|
45
|
+
if proxy is not None:
|
|
46
|
+
for country in proxy.countries:
|
|
47
|
+
yield country.upper()
|
|
48
|
+
|
|
49
|
+
def _parse_number(
|
|
50
|
+
self, number: str, proxy: Optional["EntityProxy"] = None
|
|
51
|
+
) -> Iterable[PhoneNumber]:
|
|
29
52
|
"""Parse a phone number and return in international format.
|
|
30
53
|
|
|
31
54
|
If no valid phone number can be detected, None is returned. If
|
|
@@ -34,24 +57,59 @@ class PhoneType(PropertyType):
|
|
|
34
57
|
|
|
35
58
|
https://github.com/daviddrysdale/python-phonenumbers
|
|
36
59
|
"""
|
|
37
|
-
for code in self._clean_countries(
|
|
60
|
+
for code in self._clean_countries(proxy):
|
|
38
61
|
try:
|
|
39
|
-
|
|
40
|
-
if is_possible_number(num):
|
|
41
|
-
if is_valid_number(num):
|
|
42
|
-
return format_number(num, PhoneNumberFormat.E164)
|
|
62
|
+
yield parse_number(number, code)
|
|
43
63
|
except NumberParseException:
|
|
44
64
|
pass
|
|
45
65
|
|
|
46
|
-
def
|
|
47
|
-
|
|
66
|
+
def validate(
|
|
67
|
+
self, value: str, fuzzy: bool = False, format: Optional[str] = None
|
|
68
|
+
) -> bool:
|
|
69
|
+
for num in self._parse_number(value):
|
|
70
|
+
if is_valid_number(num):
|
|
71
|
+
return True
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
def clean_text(
|
|
75
|
+
self,
|
|
76
|
+
text: str,
|
|
77
|
+
fuzzy: bool = False,
|
|
78
|
+
format: Optional[str] = None,
|
|
79
|
+
proxy: Optional["EntityProxy"] = None,
|
|
80
|
+
) -> Optional[str]:
|
|
81
|
+
for num in self._parse_number(text, proxy=proxy):
|
|
82
|
+
if is_valid_number(num):
|
|
83
|
+
return str(format_number(num, PhoneNumberFormat.E164))
|
|
84
|
+
return None
|
|
48
85
|
|
|
49
|
-
def country_hint(self, value):
|
|
86
|
+
def country_hint(self, value: str) -> Optional[str]:
|
|
50
87
|
try:
|
|
51
88
|
number = parse_number(value)
|
|
52
|
-
|
|
89
|
+
code = region_code_for_number(number)
|
|
90
|
+
if code is None:
|
|
91
|
+
return None
|
|
92
|
+
return str(code).lower()
|
|
53
93
|
except NumberParseException:
|
|
54
|
-
|
|
94
|
+
return None
|
|
95
|
+
|
|
96
|
+
def _specificity(self, value: str) -> float:
|
|
97
|
+
# TODO: insert artificial intelligence here.
|
|
98
|
+
return dampen(7, 11, value)
|
|
99
|
+
|
|
100
|
+
def rdf(self, value: str) -> Identifier:
|
|
101
|
+
node_id = self.node_id(value)
|
|
102
|
+
if node_id is not None:
|
|
103
|
+
return URIRef(node_id)
|
|
104
|
+
raise ValueError("Invalid phone number for serialisation: %s" % value)
|
|
105
|
+
|
|
106
|
+
def node_id(self, value: str) -> Optional[str]:
|
|
107
|
+
return f"tel:{value}"
|
|
55
108
|
|
|
56
|
-
def
|
|
57
|
-
|
|
109
|
+
def caption(self, value: str) -> str:
|
|
110
|
+
try:
|
|
111
|
+
number = parse_number(value)
|
|
112
|
+
formatted = format_number(number, PhoneNumberFormat.INTERNATIONAL)
|
|
113
|
+
return str(formatted)
|
|
114
|
+
except NumberParseException:
|
|
115
|
+
return value
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from banal import ensure_list
|
|
2
|
+
from typing import Iterable, Set, Dict, Type, Union, List, Optional
|
|
3
|
+
|
|
4
|
+
from followthemoney.types.common import PropertyType
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Registry(object):
|
|
8
|
+
"""This registry keeps the processing helpers for all property types
|
|
9
|
+
in the system. They are instantiated as singletons when the system is first
|
|
10
|
+
loaded. The registry can be used to get a type, which can itself then
|
|
11
|
+
clean, validate or format values of that type."""
|
|
12
|
+
|
|
13
|
+
def __init__(self) -> None:
|
|
14
|
+
self.named: Dict[str, PropertyType] = {}
|
|
15
|
+
self.matchable: Set[PropertyType] = set()
|
|
16
|
+
self.types: Set[PropertyType] = set()
|
|
17
|
+
self.groups: Dict[str, PropertyType] = {}
|
|
18
|
+
self.pivots: Set[PropertyType] = set()
|
|
19
|
+
|
|
20
|
+
def add(self, clazz: Type[PropertyType]) -> None:
|
|
21
|
+
"""Add a singleton class."""
|
|
22
|
+
type_ = clazz()
|
|
23
|
+
self.named[clazz.name] = type_
|
|
24
|
+
self.types.add(type_)
|
|
25
|
+
if type_.matchable:
|
|
26
|
+
self.matchable.add(type_)
|
|
27
|
+
if type_.pivot:
|
|
28
|
+
self.pivots.add(type_)
|
|
29
|
+
if type_.group is not None:
|
|
30
|
+
self.groups[type_.group] = type_
|
|
31
|
+
|
|
32
|
+
def get(self, name: Union[str, PropertyType]) -> Optional[PropertyType]:
|
|
33
|
+
"""For a given property type name, get its type object. This can also
|
|
34
|
+
be used via getattr, e.g. ``registry.phone``."""
|
|
35
|
+
# Allow transparent re-checking.
|
|
36
|
+
if isinstance(name, PropertyType):
|
|
37
|
+
return name
|
|
38
|
+
return self.named.get(name)
|
|
39
|
+
|
|
40
|
+
def get_types(
|
|
41
|
+
self, names: Iterable[Union[str, PropertyType]]
|
|
42
|
+
) -> List[PropertyType]:
|
|
43
|
+
"""Get a list of all type names."""
|
|
44
|
+
names = ensure_list(names)
|
|
45
|
+
types = [self.get(n) for n in names]
|
|
46
|
+
return [t for t in types if t is not None]
|
|
47
|
+
|
|
48
|
+
def __getitem__(self, name: str) -> PropertyType:
|
|
49
|
+
return self.named[name]
|
|
50
|
+
|
|
51
|
+
def __getattr__(self, name: str) -> PropertyType:
|
|
52
|
+
return self.named[name]
|