followthemoney 1.3.6__py3-none-any.whl → 3.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. followthemoney/__init__.py +5 -3
  2. followthemoney/cli/__init__.py +17 -0
  3. followthemoney/cli/aggregate.py +56 -0
  4. followthemoney/cli/cli.py +88 -0
  5. followthemoney/cli/exports.py +121 -0
  6. followthemoney/cli/mapping.py +85 -0
  7. followthemoney/cli/sieve.py +67 -0
  8. followthemoney/cli/util.py +142 -0
  9. followthemoney/compare.py +132 -55
  10. followthemoney/exc.py +19 -6
  11. followthemoney/export/common.py +29 -0
  12. followthemoney/export/csv.py +82 -0
  13. followthemoney/export/excel.py +75 -0
  14. followthemoney/export/graph.py +79 -0
  15. followthemoney/export/neo4j.py +182 -0
  16. followthemoney/export/rdf.py +26 -0
  17. followthemoney/graph.py +308 -0
  18. followthemoney/helpers.py +212 -0
  19. followthemoney/mapping/__init__.py +1 -1
  20. followthemoney/mapping/csv.py +67 -35
  21. followthemoney/mapping/entity.py +116 -44
  22. followthemoney/mapping/property.py +90 -44
  23. followthemoney/mapping/query.py +27 -19
  24. followthemoney/mapping/source.py +15 -5
  25. followthemoney/mapping/sql.py +75 -61
  26. followthemoney/messages.py +13 -7
  27. followthemoney/model.py +108 -56
  28. followthemoney/namespace.py +119 -0
  29. followthemoney/offshore.py +48 -0
  30. followthemoney/ontology.py +77 -0
  31. followthemoney/property.py +204 -71
  32. followthemoney/proxy.py +455 -118
  33. followthemoney/rdf.py +9 -0
  34. followthemoney/schema/Address.yaml +78 -0
  35. followthemoney/schema/Airplane.yaml +17 -10
  36. followthemoney/schema/Analyzable.yaml +54 -0
  37. followthemoney/schema/Article.yaml +16 -0
  38. followthemoney/schema/Assessment.yaml +32 -0
  39. followthemoney/schema/Asset.yaml +10 -4
  40. followthemoney/schema/Associate.yaml +41 -0
  41. followthemoney/schema/Audio.yaml +24 -0
  42. followthemoney/schema/BankAccount.yaml +53 -9
  43. followthemoney/schema/Call.yaml +48 -0
  44. followthemoney/schema/CallForTenders.yaml +117 -0
  45. followthemoney/schema/Company.yaml +37 -12
  46. followthemoney/schema/Contract.yaml +41 -7
  47. followthemoney/schema/ContractAward.yaml +30 -11
  48. followthemoney/schema/CourtCase.yaml +16 -10
  49. followthemoney/schema/CourtCaseParty.yaml +17 -6
  50. followthemoney/schema/CryptoWallet.yaml +48 -0
  51. followthemoney/schema/Debt.yaml +37 -0
  52. followthemoney/schema/Directorship.yaml +17 -4
  53. followthemoney/schema/Document.yaml +72 -139
  54. followthemoney/schema/Documentation.yml +38 -0
  55. followthemoney/schema/EconomicActivity.yaml +32 -17
  56. followthemoney/schema/Email.yaml +76 -0
  57. followthemoney/schema/Employment.yaml +39 -0
  58. followthemoney/schema/Event.yaml +35 -3
  59. followthemoney/schema/Family.yaml +41 -0
  60. followthemoney/schema/Folder.yaml +13 -0
  61. followthemoney/schema/HyperText.yaml +21 -0
  62. followthemoney/schema/Identification.yaml +40 -0
  63. followthemoney/schema/Image.yaml +25 -0
  64. followthemoney/schema/Interest.yaml +3 -6
  65. followthemoney/schema/Interval.yaml +56 -5
  66. followthemoney/schema/LegalEntity.yaml +81 -20
  67. followthemoney/schema/License.yaml +7 -3
  68. followthemoney/schema/Membership.yaml +19 -4
  69. followthemoney/schema/Mention.yaml +54 -0
  70. followthemoney/schema/Message.yaml +73 -0
  71. followthemoney/schema/Note.yaml +23 -0
  72. followthemoney/schema/Occupancy.yaml +40 -0
  73. followthemoney/schema/Organization.yaml +38 -3
  74. followthemoney/schema/Ownership.yaml +16 -4
  75. followthemoney/schema/Package.yaml +17 -0
  76. followthemoney/schema/Page.yaml +43 -0
  77. followthemoney/schema/Pages.yaml +23 -0
  78. followthemoney/schema/Passport.yaml +15 -17
  79. followthemoney/schema/Payment.yaml +38 -7
  80. followthemoney/schema/Person.yaml +61 -5
  81. followthemoney/schema/PlainText.yaml +17 -0
  82. followthemoney/schema/Position.yaml +50 -0
  83. followthemoney/schema/Post.yaml +42 -0
  84. followthemoney/schema/Project.yaml +27 -0
  85. followthemoney/schema/ProjectParticipant.yaml +36 -0
  86. followthemoney/schema/PublicBody.yaml +14 -3
  87. followthemoney/schema/RealEstate.yaml +19 -3
  88. followthemoney/schema/Representation.yaml +17 -6
  89. followthemoney/schema/Sanction.yaml +44 -20
  90. followthemoney/schema/Security.yaml +59 -0
  91. followthemoney/schema/Similar.yaml +37 -0
  92. followthemoney/schema/Succession.yaml +36 -0
  93. followthemoney/schema/Table.yaml +32 -0
  94. followthemoney/schema/TaxRoll.yaml +27 -9
  95. followthemoney/schema/Thing.yaml +69 -13
  96. followthemoney/schema/Trip.yaml +42 -0
  97. followthemoney/schema/UnknownLink.yaml +17 -6
  98. followthemoney/schema/UserAccount.yaml +44 -0
  99. followthemoney/schema/Value.yaml +5 -1
  100. followthemoney/schema/Vehicle.yaml +25 -8
  101. followthemoney/schema/Vessel.yaml +18 -10
  102. followthemoney/schema/Video.yaml +20 -0
  103. followthemoney/schema/Workbook.yaml +18 -0
  104. followthemoney/schema.py +406 -135
  105. followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
  106. followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
  107. followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
  108. followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
  109. followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
  110. followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
  111. followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
  112. followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
  113. followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
  114. followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
  115. followthemoney/translations/fr/followthemoney.po +3861 -0
  116. followthemoney/translations/messages.pot +3021 -725
  117. followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
  118. followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
  119. followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
  120. followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
  121. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
  122. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
  123. followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
  124. followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
  125. followthemoney/translations/ru/followthemoney.po +4221 -0
  126. followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
  127. followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
  128. followthemoney/types/__init__.py +35 -17
  129. followthemoney/types/address.py +41 -21
  130. followthemoney/types/checksum.py +25 -0
  131. followthemoney/types/common.py +233 -88
  132. followthemoney/types/country.py +89 -56
  133. followthemoney/types/date.py +59 -76
  134. followthemoney/types/email.py +66 -35
  135. followthemoney/types/entity.py +66 -13
  136. followthemoney/types/gender.py +66 -0
  137. followthemoney/types/iban.py +47 -28
  138. followthemoney/types/identifier.py +49 -22
  139. followthemoney/types/ip.py +35 -21
  140. followthemoney/types/json.py +58 -0
  141. followthemoney/types/language.py +124 -37
  142. followthemoney/types/mimetype.py +44 -0
  143. followthemoney/types/name.py +56 -12
  144. followthemoney/types/number.py +30 -0
  145. followthemoney/types/phone.py +92 -34
  146. followthemoney/types/registry.py +52 -0
  147. followthemoney/types/string.py +43 -0
  148. followthemoney/types/topic.py +94 -0
  149. followthemoney/types/url.py +39 -17
  150. followthemoney/util.py +139 -45
  151. followthemoney-3.8.0.dist-info/METADATA +153 -0
  152. followthemoney-3.8.0.dist-info/RECORD +157 -0
  153. {followthemoney-1.3.6.dist-info → followthemoney-3.8.0.dist-info}/WHEEL +1 -2
  154. followthemoney-3.8.0.dist-info/entry_points.txt +17 -0
  155. followthemoney-1.3.6.dist-info/LICENSE.txt → followthemoney-3.8.0.dist-info/licenses/LICENSE +1 -1
  156. followthemoney/link.py +0 -75
  157. followthemoney/schema/Associate.yml +0 -19
  158. followthemoney/schema/Family.yml +0 -19
  159. followthemoney/schema/Land.yml +0 -9
  160. followthemoney/schema/Relationship.yaml +0 -26
  161. followthemoney/types/domain.py +0 -50
  162. followthemoney-1.3.6.dist-info/DESCRIPTION.rst +0 -3
  163. followthemoney-1.3.6.dist-info/METADATA +0 -39
  164. followthemoney-1.3.6.dist-info/RECORD +0 -108
  165. followthemoney-1.3.6.dist-info/entry_points.txt +0 -3
  166. followthemoney-1.3.6.dist-info/metadata.json +0 -1
  167. followthemoney-1.3.6.dist-info/namespace_packages.txt +0 -1
  168. followthemoney-1.3.6.dist-info/top_level.txt +0 -3
  169. ns/ontology.py +0 -128
  170. tests/types/test_addresses.py +0 -24
  171. tests/types/test_common.py +0 -27
  172. tests/types/test_countries.py +0 -21
  173. tests/types/test_dates.py +0 -72
  174. tests/types/test_domains.py +0 -23
  175. tests/types/test_emails.py +0 -30
  176. tests/types/test_entity.py +0 -16
  177. tests/types/test_iban.py +0 -109
  178. tests/types/test_identifiers.py +0 -25
  179. tests/types/test_ip.py +0 -26
  180. tests/types/test_languages.py +0 -20
  181. tests/types/test_names.py +0 -33
  182. tests/types/test_phones.py +0 -24
  183. tests/types/test_registry.py +0 -14
  184. tests/types/test_urls.py +0 -23
  185. {ns → followthemoney/export}/__init__.py +0 -0
  186. /tests/types/__init__.py → /followthemoney/py.typed +0 -0
@@ -1,30 +1,48 @@
1
+ from followthemoney.types.registry import Registry
1
2
  from followthemoney.types.url import UrlType
2
3
  from followthemoney.types.name import NameType
3
- from followthemoney.types.domain import DomainType
4
4
  from followthemoney.types.email import EmailType
5
5
  from followthemoney.types.ip import IpType
6
- from followthemoney.types.iban import IbanType
7
6
  from followthemoney.types.address import AddressType
8
7
  from followthemoney.types.date import DateType
9
8
  from followthemoney.types.phone import PhoneType
10
9
  from followthemoney.types.country import CountryType
11
10
  from followthemoney.types.language import LanguageType
11
+ from followthemoney.types.mimetype import MimeType
12
+ from followthemoney.types.checksum import ChecksumType
12
13
  from followthemoney.types.identifier import IdentifierType
14
+ from followthemoney.types.iban import IbanType
13
15
  from followthemoney.types.entity import EntityType
14
- from followthemoney.types.common import TextType, Registry
16
+ from followthemoney.types.topic import TopicType
17
+ from followthemoney.types.gender import GenderType
18
+ from followthemoney.types.json import JsonType
19
+ from followthemoney.types.string import TextType
20
+ from followthemoney.types.string import HTMLType
21
+ from followthemoney.types.string import StringType
22
+ from followthemoney.types.number import NumberType
23
+ from followthemoney.types.common import PropertyType
15
24
 
16
25
  registry = Registry()
17
- urls = registry.add(UrlType())
18
- domains = registry.add(DomainType())
19
- emails = registry.add(EmailType())
20
- ips = registry.add(IpType())
21
- ibans = registry.add(IbanType())
22
- addresses = registry.add(AddressType())
23
- dates = registry.add(DateType())
24
- phones = registry.add(PhoneType())
25
- countries = registry.add(CountryType())
26
- languages = registry.add(LanguageType())
27
- identifiers = registry.add(IdentifierType())
28
- entities = registry.add(EntityType())
29
- texts = registry.add(TextType())
30
- names = registry.add(NameType())
26
+ registry.add(UrlType)
27
+ registry.add(NameType)
28
+ registry.add(EmailType)
29
+ registry.add(IpType)
30
+ registry.add(AddressType)
31
+ registry.add(DateType)
32
+ registry.add(PhoneType)
33
+ registry.add(CountryType)
34
+ registry.add(LanguageType)
35
+ registry.add(MimeType)
36
+ registry.add(ChecksumType)
37
+ registry.add(IdentifierType)
38
+ registry.add(IbanType) # TODO: remove
39
+ registry.add(EntityType)
40
+ registry.add(TopicType)
41
+ registry.add(GenderType)
42
+ registry.add(JsonType)
43
+ registry.add(TextType)
44
+ registry.add(HTMLType)
45
+ registry.add(StringType)
46
+ registry.add(NumberType)
47
+
48
+ __all__ = ["PropertyType", "registry"]
@@ -1,31 +1,51 @@
1
1
  import re
2
+ from typing import Optional, TYPE_CHECKING
3
+ from normality import slugify
2
4
  from normality.cleaning import collapse_spaces
3
5
 
4
6
  from followthemoney.types.common import PropertyType
7
+ from followthemoney.util import defer as _
5
8
  from followthemoney.util import dampen
6
9
 
10
+ if TYPE_CHECKING:
11
+ from followthemoney.proxy import EntityProxy
12
+
7
13
 
8
14
  class AddressType(PropertyType):
9
- LINE_BREAKS = re.compile(r'(\r\n|\n|<BR/>|<BR>|\t|ESQ\.,|ESQ,|;)')
10
- COMMATA = re.compile(r'(,\s?[,\.])')
11
- name = 'address'
12
- group = 'addresses'
13
- prefix = 'addr'
15
+ """A geographic address used to describe a location of a residence or post
16
+ box. There is no specified order for the sub-parts of an address (e.g. street,
17
+ city, postal code), and we should consider introducing an Address schema type
18
+ to retain fidelity in cases where address parts are specified."""
19
+
20
+ LINE_BREAKS = re.compile(r"(\r\n|\n|<BR/>|<BR>|\t|ESQ\.,|ESQ,|;)")
21
+ COMMATA = re.compile(r"(,\s?[,\.])")
22
+ name = "address"
23
+ group = "addresses"
24
+ label = _("Address")
25
+ plural = _("Addresses")
26
+ matchable = True
27
+ pivot = True
14
28
 
15
- def clean_text(self, address, **kwargs):
29
+ def clean_text(
30
+ self,
31
+ text: str,
32
+ fuzzy: bool = False,
33
+ format: Optional[str] = None,
34
+ proxy: Optional["EntityProxy"] = None,
35
+ ) -> Optional[str]:
16
36
  """Basic clean-up."""
17
- address = self.LINE_BREAKS.sub(', ', address)
18
- address = self.COMMATA.sub(', ', address)
19
- address = collapse_spaces(address)
20
- if len(address):
21
- return address
22
-
23
- # TODO: normalize well-known parts like "Street", "Road", etc.
24
- # TODO: consider using https://github.com/openvenues/pypostal
25
- # def normalize(self, address, **kwargs):
26
- # """Make the address more compareable."""
27
- # addresses = super(AddressType, self).normalize(address, **kwargs)
28
- # return addresses
29
-
30
- def specificity(self, value):
31
- return dampen(10, 50, value) * .6
37
+ address = self.LINE_BREAKS.sub(", ", text)
38
+ address = self.COMMATA.sub(", ", address)
39
+ collapsed = collapse_spaces(address)
40
+ if collapsed is None or not len(collapsed):
41
+ return None
42
+ return collapsed
43
+
44
+ def _specificity(self, value: str) -> float:
45
+ return dampen(10, 60, value)
46
+
47
+ def node_id(self, value: str) -> Optional[str]:
48
+ slug = slugify(value)
49
+ if slug is None:
50
+ return None
51
+ return f"addr:{value}"
@@ -0,0 +1,25 @@
1
+ from followthemoney.rdf import URIRef, Identifier
2
+ from followthemoney.types.common import PropertyType
3
+ from followthemoney.util import defer as _
4
+
5
+
6
+ class ChecksumType(PropertyType):
7
+ """Content hashes calculated using SHA1. Checksum references are used by
8
+ document-typed entities in Aleph to refer to raw data in the archive
9
+ (e.g. the document from which the entity is extracted).
10
+
11
+ Unfortunately, this has some security implications: in order to avoid people
12
+ getting access to documents for which they know the checksum, properties
13
+ of this type are scrubbed when submitted via the normal API. Checksums can only
14
+ be defined by uploading a document to be ingested."""
15
+
16
+ name = "checksum"
17
+ group = "checksums"
18
+ label = _("Checksum")
19
+ plural = _("Checksums")
20
+ matchable = True
21
+ pivot = True
22
+ max_length = 40
23
+
24
+ def rdf(self, value: str) -> Identifier:
25
+ return URIRef(f"hash:{value}")
@@ -1,131 +1,276 @@
1
+ from inspect import cleandoc
1
2
  from itertools import product
2
- from rdflib import Literal
3
- from banal import ensure_list, is_mapping
3
+ from babel.core import Locale
4
+ from banal import ensure_list
4
5
  from normality import stringify
6
+ from typing import Any, Dict, Optional, Sequence, Callable, TYPE_CHECKING, TypedDict
7
+
8
+ from followthemoney.rdf import Literal, Identifier
9
+ from followthemoney.util import get_locale
10
+ from followthemoney.util import gettext, sanitize_text
11
+
12
+ if TYPE_CHECKING:
13
+ from followthemoney.proxy import EntityProxy
14
+
15
+ EnumValues = Dict[str, str]
16
+
17
+
18
+ class PropertyTypeToDict(TypedDict, total=False):
19
+ label: str
20
+ plural: str
21
+ description: Optional[str]
22
+ maxLength: int
23
+ group: Optional[str]
24
+ matchable: Optional[bool]
25
+ pivot: Optional[bool]
26
+ values: Optional[EnumValues]
5
27
 
6
28
 
7
29
  class PropertyType(object):
8
- """Base class for all types."""
9
- name = None
10
- group = None
11
- prefix = None
12
- strong = False
13
-
14
- def validate(self, text, **kwargs):
15
- """Returns a boolean to indicate if this is a valid instance of
30
+ """Base class for all property types."""
31
+
32
+ name: str = "any"
33
+ """A machine-facing, variable safe name for the given type."""
34
+
35
+ group: Optional[str] = None
36
+ """Groups are used to invert all the properties of an entity that have a
37
+ given type into a single list before indexing them. This way, in Aleph,
38
+ you can query for ``countries:gb`` instead of having to make a set of filters
39
+ like ``properties.jurisdiction:gb OR properties.country:gb OR ...``."""
40
+
41
+ label: str = "Any"
42
+ """A name for this type to be shown to users."""
43
+
44
+ plural: str = "Any"
45
+ """A plural name for this type which can be used in appropriate places in
46
+ a user interface."""
47
+
48
+ matchable: bool = True
49
+ """Matchable types allow properties to be compared with each other in order to
50
+ assess entity similarity. While it makes sense to compare names, countries or
51
+ phone numbers, the same isn't true for raw JSON blobs or descriptive text
52
+ snippets."""
53
+
54
+ pivot: bool = False
55
+ """Pivot property types are like a stronger form of :attr:`~matchable` types:
56
+ they will be used when value-based lookups are used to find commonalities
57
+ between entities. For example, pivot typed-properties are used to show all the
58
+ other entities that mention the same phone number, email address or name as the
59
+ one currently seen by the user."""
60
+
61
+ max_length: int = 250
62
+ """The maximum length of a single value of this type. This is used to warn when
63
+ adding individual values that may be malformed or too long to be stored in
64
+ downstream databases with fixed column lengths. The unit is unicode codepoints
65
+ (not bytes), the output of Python len()."""
66
+
67
+ total_size: Optional[int] = None
68
+ """Some types have overall size limitations in place in order to avoid generating
69
+ entities that are very large (upstream ElasticSearch has a 100MB document limit).
70
+ Once the total size of all properties of this type has exceed the given limit,
71
+ an entity will refuse to add further values."""
72
+
73
+ @property
74
+ def docs(self) -> Optional[str]:
75
+ if not self.__doc__:
76
+ return None
77
+
78
+ return cleandoc(self.__doc__)
79
+
80
+ def validate(
81
+ self, value: str, fuzzy: bool = False, format: Optional[str] = None
82
+ ) -> bool:
83
+ """Returns a boolean to indicate if the given value is a valid instance of
16
84
  the type."""
17
- cleaned = self.clean(text, **kwargs)
85
+ cleaned = self.clean(value, fuzzy=fuzzy, format=format)
18
86
  return cleaned is not None
19
87
 
20
- def clean(self, text, **kwargs):
21
- """Create a more clean, but still user-facing version of an
22
- instance of the type."""
23
- text = stringify(text)
24
- if text is not None:
25
- return self.clean_text(text, **kwargs)
88
+ def clean(
89
+ self,
90
+ raw: Any,
91
+ fuzzy: bool = False,
92
+ format: Optional[str] = None,
93
+ proxy: Optional["EntityProxy"] = None,
94
+ ) -> Optional[str]:
95
+ """Create a clean version of a value of the type, suitable for storage
96
+ in an entity proxy."""
97
+ text = sanitize_text(raw)
98
+ if text is None:
99
+ return None
100
+ return self.clean_text(text, fuzzy=fuzzy, format=format, proxy=proxy)
26
101
 
27
- def clean_text(self, text, **kwargs):
102
+ def clean_text(
103
+ self,
104
+ text: str,
105
+ fuzzy: bool = False,
106
+ format: Optional[str] = None,
107
+ proxy: Optional["EntityProxy"] = None,
108
+ ) -> Optional[str]:
109
+ """Specific types can apply their own cleaning routines here (this is called
110
+ by ``clean`` after the value has been converted to a string and null values
111
+ have been filtered)."""
28
112
  return text
29
113
 
30
- def normalize(self, text, cleaned=False, **kwargs):
31
- """Create a represenation ideal for comparisons, but not to be
32
- shown to the user."""
33
- if not cleaned:
34
- text = self.clean(text, **kwargs)
35
- return ensure_list(text)
36
-
37
- def normalize_set(self, items, **kwargs):
38
- """Utility to normalize a whole set of values and get unique
39
- values."""
40
- values = set()
41
- for item in ensure_list(items):
42
- values.update(self.normalize(item, **kwargs))
43
- return list(values)
44
-
45
- def specificity(self, value):
46
- return 0
47
-
48
- def compare_safe(self, left, right):
114
+ def join(self, values: Sequence[str]) -> str:
115
+ """Helper function for converting multi-valued FtM data into formats that
116
+ allow only a single value per field (e.g. CSV). This is not fully reversible
117
+ and should be used as a last option."""
118
+ values = ensure_list(values)
119
+ return "; ".join(values)
120
+
121
+ def _specificity(self, value: str) -> float:
122
+ return 1.0
123
+
124
+ def specificity(self, value: Optional[str]) -> float:
125
+ """Return a score for how specific the given value is. This can be used as a
126
+ weighting factor in entity comparisons in order to rate matching property
127
+ values by how specific they are. For example: a longer address is considered
128
+ to be more specific than a short one, a full date more specific than just a
129
+ year number, etc."""
130
+ if not self.matchable or value is None:
131
+ return 0.0
132
+ return self._specificity(value)
133
+
134
+ def compare_safe(self, left: Optional[str], right: Optional[str]) -> float:
135
+ """Compare, but support None values on either side of the comparison."""
49
136
  left = stringify(left)
50
137
  right = stringify(right)
51
138
  if left is None or right is None:
52
- return 0
139
+ return 0.0
53
140
  return self.compare(left, right)
54
141
 
55
- def compare(self, left, right):
142
+ def compare(self, left: str, right: str) -> float:
56
143
  """Comparisons are a float between 0 and 1. They can assume
57
144
  that the given data is cleaned, but not normalised."""
58
145
  if left.lower() == right.lower():
59
- return 1 * self.specificity(left)
60
- return 0
146
+ return 1.0 * self.specificity(left)
147
+ return 0.0
61
148
 
62
- def compare_sets(self, left, right, func=max):
63
- """Compare two sets of values and select a specific result."""
149
+ def compare_sets(
150
+ self,
151
+ left: Sequence[str],
152
+ right: Sequence[str],
153
+ func: Callable[[Sequence[float]], float] = max,
154
+ ) -> float:
155
+ """Compare two sets of values and select the highest-scored result."""
64
156
  results = []
65
- for (l, r) in product(ensure_list(left), ensure_list(right)):
66
- results.append(self.compare_safe(l, r))
157
+ for le, ri in product(ensure_list(left), ensure_list(right)):
158
+ results.append(self.compare(le, ri))
67
159
  if not len(results):
68
- return 0
160
+ return 0.0
69
161
  return func(results)
70
162
 
71
- def country_hint(self, value):
72
- """Determine if the given value allows us to infer a country
73
- that it may be related to."""
163
+ def country_hint(self, value: str) -> Optional[str]:
164
+ """Determine if the given value allows us to infer a country that it may
165
+ be related to (e.g. using a country prefix on a phone number or IBAN)."""
74
166
  return None
75
167
 
76
- def ref(self, value):
77
- """Generate a qualified form for storage in a triplestore."""
78
- if self.prefix is None:
79
- return
80
- if is_mapping(value):
81
- value = value.get('id')
82
- value = stringify(value)
168
+ def rdf(self, value: str) -> Identifier:
169
+ """Return an RDF term to represent the given value - either a string
170
+ literal, or a URI reference."""
171
+ return Literal(value)
172
+
173
+ def pick(self, values: Sequence[str]) -> Optional[str]:
174
+ """Pick the best value to show to the user."""
175
+ raise NotImplementedError
176
+
177
+ def node_id(self, value: str) -> Optional[str]:
178
+ """Return an ID suitable to identify this entity as a typed node in a
179
+ graph representation of some FtM data. It's usually the same as the the
180
+ RDF form."""
181
+ return str(self.rdf(value))
182
+
183
+ def node_id_safe(self, value: Optional[str]) -> Optional[str]:
184
+ """Wrapper for node_id to handle None values."""
83
185
  if value is None:
84
- return
85
- return ':'.join((self.prefix, value))
186
+ return None
187
+ return self.node_id(value)
86
188
 
87
- def rdf(self, value):
88
- return Literal(value)
189
+ def caption(self, value: str) -> Optional[str]:
190
+ """Return a label for the given property value. This is often the same as the
191
+ value, but for types like countries or languages, it would return the label,
192
+ while other values like phone numbers can be formatted to be nicer to read."""
193
+ return value
89
194
 
90
- def __eq__(self, other):
195
+ def to_dict(self) -> PropertyTypeToDict:
196
+ """Return a serialisable description of this data type."""
197
+ data: PropertyTypeToDict = {
198
+ "label": gettext(self.label),
199
+ "plural": gettext(self.plural),
200
+ "description": gettext(self.docs),
201
+ "maxLength": self.max_length,
202
+ }
203
+ if self.group:
204
+ data["group"] = self.group
205
+ if self.matchable:
206
+ data["matchable"] = True
207
+ if self.pivot:
208
+ data["pivot"] = True
209
+ return data
210
+
211
+ def __eq__(self, other: Any) -> bool:
212
+ if not isinstance(other, PropertyType):
213
+ return False
91
214
  return self.name == other.name
92
215
 
93
- def __hash__(self):
216
+ def __hash__(self) -> int:
94
217
  return hash(self.name)
95
218
 
96
- def __str__(self):
219
+ def __str__(self) -> str:
97
220
  return self.name
98
221
 
99
- def __repr__(self):
100
- return '<%s()>' % type(self).__name__
222
+ def __repr__(self) -> str:
223
+ return f"<{self.name}>"
224
+
101
225
 
226
+ class EnumType(PropertyType):
227
+ """Enumerated type properties are used for types which have a defined set
228
+ of possible values, like languages and countries."""
102
229
 
103
- class TextType(PropertyType):
104
- name = 'text'
230
+ def __init__(self) -> None:
231
+ self._names: Dict[Locale, EnumValues] = {}
232
+ self.codes = set(self.names.keys())
105
233
 
234
+ def _locale_names(self, locale: Locale) -> EnumValues:
235
+ return {}
106
236
 
107
- class Registry(object):
237
+ @property
238
+ def names(self) -> EnumValues:
239
+ """Return a mapping from property values to their labels in the current
240
+ locale."""
241
+ locale = get_locale()
242
+ if locale not in self._names:
243
+ self._names[locale] = self._locale_names(locale)
244
+ return self._names[locale]
108
245
 
109
- def __init__(self):
110
- self.prefixes = {}
111
- self.groups = {}
112
- self.names = {}
246
+ def validate(
247
+ self, value: str, fuzzy: bool = False, format: Optional[str] = None
248
+ ) -> bool:
249
+ """Make sure that the given code value is one of the supported set."""
250
+ if value is None:
251
+ return False
252
+ return str(value).lower().strip() in self.codes
113
253
 
114
- def add(self, instance):
115
- setattr(self, instance.name, instance)
116
- self.names[instance.name] = instance
117
- if instance.prefix is not None:
118
- self.prefixes[instance.prefix] = instance
119
- if instance.group is not None:
120
- self.groups[instance.group] = instance
121
- return instance
254
+ def clean_text(
255
+ self,
256
+ code: str,
257
+ fuzzy: bool = False,
258
+ format: Optional[str] = None,
259
+ proxy: Optional["EntityProxy"] = None,
260
+ ) -> Optional[str]:
261
+ """All code values are cleaned to be lowercase and trailing whitespace is
262
+ removed."""
263
+ code = code.lower().strip()
264
+ if code not in self.codes:
265
+ return None
266
+ return code
122
267
 
123
- def get(self, name):
124
- try:
125
- return getattr(self, name)
126
- except AttributeError:
127
- pass
268
+ def caption(self, value: str) -> str:
269
+ """Given a code value, return the label that should be shown to a user."""
270
+ return self.names.get(value, value)
128
271
 
129
- def deref(self, ref):
130
- prefix, value = ref.split(':', 1)
131
- return self.prefixes.get(prefix), value
272
+ def to_dict(self) -> PropertyTypeToDict:
273
+ """When serialising the model to JSON, include all values."""
274
+ data = super(EnumType, self).to_dict()
275
+ data["values"] = self.names
276
+ return data