followthemoney 1.3.7__py3-none-any.whl → 3.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- followthemoney/__init__.py +5 -3
- followthemoney/cli/__init__.py +17 -0
- followthemoney/cli/aggregate.py +56 -0
- followthemoney/cli/cli.py +88 -0
- followthemoney/cli/exports.py +121 -0
- followthemoney/cli/mapping.py +85 -0
- followthemoney/cli/sieve.py +67 -0
- followthemoney/cli/util.py +142 -0
- followthemoney/compare.py +130 -60
- followthemoney/exc.py +19 -6
- followthemoney/export/common.py +29 -0
- followthemoney/export/csv.py +82 -0
- followthemoney/export/excel.py +75 -0
- followthemoney/export/graph.py +79 -0
- followthemoney/export/neo4j.py +182 -0
- followthemoney/export/rdf.py +26 -0
- followthemoney/graph.py +308 -0
- followthemoney/helpers.py +212 -0
- followthemoney/mapping/__init__.py +1 -1
- followthemoney/mapping/csv.py +67 -35
- followthemoney/mapping/entity.py +116 -44
- followthemoney/mapping/property.py +90 -44
- followthemoney/mapping/query.py +27 -19
- followthemoney/mapping/source.py +15 -5
- followthemoney/mapping/sql.py +75 -61
- followthemoney/messages.py +13 -7
- followthemoney/model.py +108 -56
- followthemoney/namespace.py +119 -0
- followthemoney/offshore.py +48 -0
- followthemoney/ontology.py +77 -0
- followthemoney/property.py +204 -71
- followthemoney/proxy.py +455 -118
- followthemoney/rdf.py +9 -0
- followthemoney/schema/Address.yaml +78 -0
- followthemoney/schema/Airplane.yaml +17 -10
- followthemoney/schema/Analyzable.yaml +54 -0
- followthemoney/schema/Article.yaml +16 -0
- followthemoney/schema/Assessment.yaml +32 -0
- followthemoney/schema/Asset.yaml +10 -4
- followthemoney/schema/Associate.yaml +41 -0
- followthemoney/schema/Audio.yaml +24 -0
- followthemoney/schema/BankAccount.yaml +53 -9
- followthemoney/schema/Call.yaml +48 -0
- followthemoney/schema/CallForTenders.yaml +117 -0
- followthemoney/schema/Company.yaml +37 -12
- followthemoney/schema/Contract.yaml +41 -7
- followthemoney/schema/ContractAward.yaml +30 -11
- followthemoney/schema/CourtCase.yaml +16 -10
- followthemoney/schema/CourtCaseParty.yaml +17 -6
- followthemoney/schema/CryptoWallet.yaml +48 -0
- followthemoney/schema/Debt.yaml +37 -0
- followthemoney/schema/Directorship.yaml +17 -4
- followthemoney/schema/Document.yaml +72 -139
- followthemoney/schema/Documentation.yml +38 -0
- followthemoney/schema/EconomicActivity.yaml +32 -17
- followthemoney/schema/Email.yaml +76 -0
- followthemoney/schema/Employment.yaml +39 -0
- followthemoney/schema/Event.yaml +35 -3
- followthemoney/schema/Family.yaml +41 -0
- followthemoney/schema/Folder.yaml +13 -0
- followthemoney/schema/HyperText.yaml +21 -0
- followthemoney/schema/Identification.yaml +40 -0
- followthemoney/schema/Image.yaml +25 -0
- followthemoney/schema/Interest.yaml +3 -6
- followthemoney/schema/Interval.yaml +56 -5
- followthemoney/schema/LegalEntity.yaml +81 -20
- followthemoney/schema/License.yaml +7 -3
- followthemoney/schema/Membership.yaml +19 -4
- followthemoney/schema/Mention.yaml +54 -0
- followthemoney/schema/Message.yaml +78 -0
- followthemoney/schema/Note.yaml +23 -0
- followthemoney/schema/Occupancy.yaml +44 -0
- followthemoney/schema/Organization.yaml +38 -3
- followthemoney/schema/Ownership.yaml +16 -4
- followthemoney/schema/Package.yaml +17 -0
- followthemoney/schema/Page.yaml +43 -0
- followthemoney/schema/Pages.yaml +23 -0
- followthemoney/schema/Passport.yaml +16 -17
- followthemoney/schema/Payment.yaml +38 -7
- followthemoney/schema/Person.yaml +61 -5
- followthemoney/schema/PlainText.yaml +17 -0
- followthemoney/schema/Position.yaml +50 -0
- followthemoney/schema/Post.yaml +42 -0
- followthemoney/schema/Project.yaml +27 -0
- followthemoney/schema/ProjectParticipant.yaml +36 -0
- followthemoney/schema/PublicBody.yaml +14 -3
- followthemoney/schema/RealEstate.yaml +19 -3
- followthemoney/schema/Representation.yaml +17 -6
- followthemoney/schema/Sanction.yaml +45 -21
- followthemoney/schema/Security.yaml +59 -0
- followthemoney/schema/Similar.yaml +37 -0
- followthemoney/schema/Succession.yaml +36 -0
- followthemoney/schema/Table.yaml +32 -0
- followthemoney/schema/TaxRoll.yaml +27 -9
- followthemoney/schema/Thing.yaml +69 -13
- followthemoney/schema/Trip.yaml +42 -0
- followthemoney/schema/UnknownLink.yaml +17 -6
- followthemoney/schema/UserAccount.yaml +44 -0
- followthemoney/schema/Value.yaml +5 -1
- followthemoney/schema/Vehicle.yaml +25 -8
- followthemoney/schema/Vessel.yaml +18 -10
- followthemoney/schema/Video.yaml +20 -0
- followthemoney/schema/Workbook.yaml +18 -0
- followthemoney/schema.py +436 -135
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
- followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
- followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
- followthemoney/translations/fr/followthemoney.po +3861 -0
- followthemoney/translations/messages.pot +3021 -725
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
- followthemoney/translations/ru/followthemoney.po +4221 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
- followthemoney/types/__init__.py +35 -17
- followthemoney/types/address.py +50 -21
- followthemoney/types/checksum.py +25 -0
- followthemoney/types/common.py +233 -88
- followthemoney/types/country.py +50 -56
- followthemoney/types/date.py +59 -76
- followthemoney/types/email.py +66 -35
- followthemoney/types/entity.py +66 -13
- followthemoney/types/gender.py +66 -0
- followthemoney/types/iban.py +47 -28
- followthemoney/types/identifier.py +49 -22
- followthemoney/types/ip.py +35 -21
- followthemoney/types/json.py +58 -0
- followthemoney/types/language.py +124 -37
- followthemoney/types/mimetype.py +44 -0
- followthemoney/types/name.py +56 -12
- followthemoney/types/number.py +30 -0
- followthemoney/types/phone.py +92 -34
- followthemoney/types/registry.py +52 -0
- followthemoney/types/string.py +43 -0
- followthemoney/types/topic.py +94 -0
- followthemoney/types/url.py +39 -17
- followthemoney/util.py +139 -45
- followthemoney-3.8.1.dist-info/METADATA +153 -0
- followthemoney-3.8.1.dist-info/RECORD +157 -0
- {followthemoney-1.3.7.dist-info → followthemoney-3.8.1.dist-info}/WHEEL +1 -2
- followthemoney-3.8.1.dist-info/entry_points.txt +17 -0
- followthemoney-1.3.7.dist-info/LICENSE.txt → followthemoney-3.8.1.dist-info/licenses/LICENSE +1 -1
- followthemoney/link.py +0 -75
- followthemoney/schema/Associate.yml +0 -19
- followthemoney/schema/Family.yml +0 -19
- followthemoney/schema/Land.yml +0 -9
- followthemoney/schema/Relationship.yaml +0 -26
- followthemoney/types/domain.py +0 -50
- followthemoney-1.3.7.dist-info/DESCRIPTION.rst +0 -3
- followthemoney-1.3.7.dist-info/METADATA +0 -39
- followthemoney-1.3.7.dist-info/RECORD +0 -108
- followthemoney-1.3.7.dist-info/entry_points.txt +0 -3
- followthemoney-1.3.7.dist-info/metadata.json +0 -1
- followthemoney-1.3.7.dist-info/namespace_packages.txt +0 -1
- followthemoney-1.3.7.dist-info/top_level.txt +0 -3
- ns/ontology.py +0 -128
- tests/types/test_addresses.py +0 -24
- tests/types/test_common.py +0 -32
- tests/types/test_countries.py +0 -27
- tests/types/test_dates.py +0 -73
- tests/types/test_domains.py +0 -23
- tests/types/test_emails.py +0 -32
- tests/types/test_entity.py +0 -19
- tests/types/test_iban.py +0 -109
- tests/types/test_identifiers.py +0 -27
- tests/types/test_ip.py +0 -29
- tests/types/test_languages.py +0 -23
- tests/types/test_names.py +0 -33
- tests/types/test_phones.py +0 -24
- tests/types/test_registry.py +0 -14
- tests/types/test_urls.py +0 -23
- {ns → followthemoney/export}/__init__.py +0 -0
- /tests/types/__init__.py → /followthemoney/py.typed +0 -0
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any, Dict, Iterable, List, Optional, Set, TextIO
|
|
5
|
+
import stringcase # type: ignore
|
|
6
|
+
|
|
7
|
+
from followthemoney.export.csv import CSVMixin, CSVWriter
|
|
8
|
+
from followthemoney.export.graph import GraphExporter, DEFAULT_EDGE_TYPES
|
|
9
|
+
from followthemoney.graph import Edge, Node
|
|
10
|
+
from followthemoney.schema import Schema
|
|
11
|
+
from followthemoney.util import PathLike
|
|
12
|
+
|
|
13
|
+
log = logging.getLogger(__name__)
|
|
14
|
+
NEO4J_ADMIN_PATH = os.environ.get("NEO4J_ADMIN_PATH", "neo4j-admin")
|
|
15
|
+
NEO4J_DATABASE_NAME = os.environ.get("NEO4J_DATABASE_NAME", "graph.db")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Neo4JCSVExporter(CSVMixin, GraphExporter):
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
directory: PathLike,
|
|
22
|
+
extra: Optional[List[str]] = None,
|
|
23
|
+
edge_types: Iterable[str] = DEFAULT_EDGE_TYPES,
|
|
24
|
+
) -> None:
|
|
25
|
+
super(Neo4JCSVExporter, self).__init__(edge_types=edge_types)
|
|
26
|
+
self._configure(directory, extra=extra)
|
|
27
|
+
|
|
28
|
+
self.links_handler, self.links_writer = self._open_csv_file("_links")
|
|
29
|
+
self.links_writer.writerow([":TYPE", ":START_ID", ":END_ID", "weight"])
|
|
30
|
+
|
|
31
|
+
self.nodes_handler, self.nodes_writer = self._open_csv_file("_nodes")
|
|
32
|
+
self.nodes_writer.writerow(["id:ID", ":LABEL", "caption"])
|
|
33
|
+
self.nodes_seen: Set[str] = set()
|
|
34
|
+
|
|
35
|
+
def _write_header(self, writer: CSVWriter, schema: Schema) -> None:
|
|
36
|
+
headers = []
|
|
37
|
+
if not schema.edge:
|
|
38
|
+
headers = ["id:ID", ":LABEL", "caption"]
|
|
39
|
+
else:
|
|
40
|
+
headers = ["id", ":TYPE", ":START_ID", ":END_ID"]
|
|
41
|
+
|
|
42
|
+
headers.extend(self.extra)
|
|
43
|
+
for prop in self.exportable_properties(schema):
|
|
44
|
+
headers.append(prop.name)
|
|
45
|
+
writer.writerow(headers)
|
|
46
|
+
|
|
47
|
+
def write_graph(self, extra: Optional[List[str]] = None) -> None:
|
|
48
|
+
extra_ = extra or []
|
|
49
|
+
for node in self.graph.iternodes():
|
|
50
|
+
self.write_node(node, extra_)
|
|
51
|
+
|
|
52
|
+
for edge in self.graph.iteredges():
|
|
53
|
+
self.write_edge(edge, extra_)
|
|
54
|
+
|
|
55
|
+
self.graph.flush()
|
|
56
|
+
|
|
57
|
+
def write_node(self, node: Node, extra: List[str]) -> None:
|
|
58
|
+
if node.id is None:
|
|
59
|
+
return None
|
|
60
|
+
if not node.is_entity and node.id not in self.nodes_seen:
|
|
61
|
+
row = [node.id, node.type.name, node.caption]
|
|
62
|
+
self.nodes_writer.writerow(row)
|
|
63
|
+
self.nodes_seen.add(node.id)
|
|
64
|
+
if node.proxy is not None and node.schema is not None:
|
|
65
|
+
label = ";".join(node.schema.names)
|
|
66
|
+
cells = [node.id, label, node.caption]
|
|
67
|
+
cells.extend(extra or [])
|
|
68
|
+
for prop, values in self.exportable_fields(node.proxy):
|
|
69
|
+
cells.append(prop.type.join(values))
|
|
70
|
+
writer = self._get_writer(node.schema)
|
|
71
|
+
writer.writerow(cells)
|
|
72
|
+
|
|
73
|
+
def write_edge(self, edge: Edge, extra: List[str]) -> None:
|
|
74
|
+
if edge.prop is not None:
|
|
75
|
+
type_ = stringcase.constcase(edge.prop.name)
|
|
76
|
+
row = [type_, edge.source_id, edge.target_id, edge.weight]
|
|
77
|
+
self.links_writer.writerow(row)
|
|
78
|
+
if edge.proxy is not None:
|
|
79
|
+
proxy = edge.proxy
|
|
80
|
+
type_ = stringcase.constcase(proxy.schema.name)
|
|
81
|
+
# That potentially may lead to multiple edges with same id
|
|
82
|
+
cells = [proxy.id, type_, edge.source_id, edge.target_id]
|
|
83
|
+
cells.extend(extra or [])
|
|
84
|
+
|
|
85
|
+
for prop, values in self.exportable_fields(edge.proxy):
|
|
86
|
+
cells.append(prop.type.join(values))
|
|
87
|
+
|
|
88
|
+
writer = self._get_writer(proxy.schema)
|
|
89
|
+
writer.writerow(cells)
|
|
90
|
+
|
|
91
|
+
def finalize_graph(self) -> None:
|
|
92
|
+
script_path = self.directory.joinpath("neo4j_import.sh")
|
|
93
|
+
with open(script_path, mode="w") as fp:
|
|
94
|
+
cmd = "{} import --id-type=STRING --database={} \\\n"
|
|
95
|
+
fp.write(cmd.format(NEO4J_ADMIN_PATH, NEO4J_DATABASE_NAME))
|
|
96
|
+
fp.write("\t--multiline-fields=true \\\n")
|
|
97
|
+
cmd = "\t--relationships={} \\\n"
|
|
98
|
+
fp.write(cmd.format(os.path.basename(self.links_handler.name)))
|
|
99
|
+
cmd = "\t--nodes={} \\\n"
|
|
100
|
+
fp.write(cmd.format(os.path.basename(self.nodes_handler.name)))
|
|
101
|
+
|
|
102
|
+
for schema, (handle, writer) in self.handles.items():
|
|
103
|
+
file_name = os.path.basename(handle.name)
|
|
104
|
+
if schema.edge:
|
|
105
|
+
cmd = "\t--relationships={} \\\n"
|
|
106
|
+
fp.write(cmd.format(file_name))
|
|
107
|
+
else:
|
|
108
|
+
cmd = "\t--nodes={} \\\n"
|
|
109
|
+
fp.write(cmd.format(file_name))
|
|
110
|
+
|
|
111
|
+
self.links_handler.close()
|
|
112
|
+
self.nodes_handler.close()
|
|
113
|
+
self.close()
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class CypherGraphExporter(GraphExporter):
|
|
117
|
+
"""Cypher query format, used for import to Neo4J. This is a bit like
|
|
118
|
+
writing SQL with individual statements - so for large datasets it
|
|
119
|
+
might be a better idea to do a CSV-based import."""
|
|
120
|
+
|
|
121
|
+
# https://www.opencypher.org/
|
|
122
|
+
# MATCH (n) DETACH DELETE n;
|
|
123
|
+
|
|
124
|
+
def __init__(self, fh: TextIO, edge_types: Iterable[str] = DEFAULT_EDGE_TYPES):
|
|
125
|
+
super(CypherGraphExporter, self).__init__(edge_types=edge_types)
|
|
126
|
+
self.fh = fh
|
|
127
|
+
self.proxy_nodes: Set[str] = set()
|
|
128
|
+
|
|
129
|
+
def _to_map(self, data: Dict[str, Any]) -> str:
|
|
130
|
+
values = []
|
|
131
|
+
for key, value in data.items():
|
|
132
|
+
if value:
|
|
133
|
+
value = "%s: %s" % (key, json.dumps(value))
|
|
134
|
+
values.append(value)
|
|
135
|
+
return ", ".join(values)
|
|
136
|
+
|
|
137
|
+
def write_graph(self) -> None:
|
|
138
|
+
"""Export queries for each graph element."""
|
|
139
|
+
for node in self.graph.iternodes():
|
|
140
|
+
if node.value in self.proxy_nodes:
|
|
141
|
+
continue
|
|
142
|
+
if node.id is None:
|
|
143
|
+
continue
|
|
144
|
+
if node.proxy is not None:
|
|
145
|
+
self.proxy_nodes.add(node.value)
|
|
146
|
+
attributes = self.get_attributes(node)
|
|
147
|
+
attributes["id"] = node.id
|
|
148
|
+
if node.caption is not None:
|
|
149
|
+
attributes["caption"] = node.caption
|
|
150
|
+
if node.schema:
|
|
151
|
+
labels = list(node.schema.names)
|
|
152
|
+
else:
|
|
153
|
+
labels = [node.type.name]
|
|
154
|
+
cypher = "MERGE (p { %(id)s }) " "SET p += { %(map)s } SET p :%(label)s;\n"
|
|
155
|
+
self.fh.write(
|
|
156
|
+
cypher
|
|
157
|
+
% {
|
|
158
|
+
"id": self._to_map({"id": node.id}),
|
|
159
|
+
"map": self._to_map(attributes),
|
|
160
|
+
"label": ":".join(labels),
|
|
161
|
+
}
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
for edge in self.graph.iteredges():
|
|
165
|
+
attributes = self.get_attributes(edge)
|
|
166
|
+
attributes["id"] = edge.id
|
|
167
|
+
attributes["weight"] = str(edge.weight)
|
|
168
|
+
cypher = (
|
|
169
|
+
"MATCH (s { %(source)s }), (t { %(target)s }) "
|
|
170
|
+
"MERGE (s)-[:%(type)s { %(map)s }]->(t);\n"
|
|
171
|
+
)
|
|
172
|
+
self.fh.write(
|
|
173
|
+
cypher
|
|
174
|
+
% {
|
|
175
|
+
"source": self._to_map({"id": edge.source_id}),
|
|
176
|
+
"target": self._to_map({"id": edge.target_id}),
|
|
177
|
+
"type": stringcase.constcase(edge.type_name),
|
|
178
|
+
"map": self._to_map(attributes),
|
|
179
|
+
}
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
self.graph.flush()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from rdflib import Graph
|
|
3
|
+
from typing import List, Optional, TextIO
|
|
4
|
+
|
|
5
|
+
from followthemoney.export.common import Exporter
|
|
6
|
+
from followthemoney.proxy import E
|
|
7
|
+
|
|
8
|
+
log = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class RDFExporter(Exporter):
|
|
12
|
+
def __init__(self, fh: TextIO, qualified: bool = True) -> None:
|
|
13
|
+
super(RDFExporter, self).__init__()
|
|
14
|
+
self.fh = fh
|
|
15
|
+
self.qualified = qualified
|
|
16
|
+
|
|
17
|
+
def write(self, proxy: E, extra: Optional[List[str]] = None) -> None:
|
|
18
|
+
graph = Graph()
|
|
19
|
+
|
|
20
|
+
for triple in proxy.triples(qualified=self.qualified):
|
|
21
|
+
graph.add(triple)
|
|
22
|
+
try:
|
|
23
|
+
nt = graph.serialize(format="nt11").strip()
|
|
24
|
+
self.fh.write(nt + "\n")
|
|
25
|
+
except Exception:
|
|
26
|
+
log.exception("Failed to serialize ntriples.")
|
followthemoney/graph.py
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Converting FtM data to a property graph data model.
|
|
3
|
+
|
|
4
|
+
This module provides an abstract data object that represents a property
|
|
5
|
+
graph. This is used by the exporter modules to convert data
|
|
6
|
+
to a specific output format, like Cypher or NetworkX.
|
|
7
|
+
"""
|
|
8
|
+
import logging
|
|
9
|
+
from typing import Any, Dict, Generator, Iterable, List, Optional
|
|
10
|
+
|
|
11
|
+
from followthemoney.types import registry
|
|
12
|
+
from followthemoney.types.common import PropertyType
|
|
13
|
+
from followthemoney.schema import Schema
|
|
14
|
+
from followthemoney.proxy import EntityProxy
|
|
15
|
+
from followthemoney.property import Property
|
|
16
|
+
from followthemoney.exc import InvalidModel
|
|
17
|
+
|
|
18
|
+
log = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Node(object):
|
|
22
|
+
"""A node represents either an entity that can be rendered as a
|
|
23
|
+
node in a graph, or as a re-ified value, like a name, email
|
|
24
|
+
address or phone number."""
|
|
25
|
+
|
|
26
|
+
__slots__ = ["type", "value", "id", "proxy", "schema"]
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
type_: PropertyType,
|
|
31
|
+
value: str,
|
|
32
|
+
proxy: Optional[EntityProxy] = None,
|
|
33
|
+
schema: Optional[Schema] = None,
|
|
34
|
+
) -> None:
|
|
35
|
+
self.type = type_
|
|
36
|
+
self.value = value
|
|
37
|
+
# _id = type_.node_id_safe(value)
|
|
38
|
+
# if _id is None:
|
|
39
|
+
# raise InvalidData("No ID for node")
|
|
40
|
+
self.id = type_.node_id_safe(value)
|
|
41
|
+
self.proxy = proxy
|
|
42
|
+
self.schema = schema if proxy is None else proxy.schema
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def is_entity(self) -> bool:
|
|
46
|
+
"""Check to see if the node represents an entity. If this is false, the
|
|
47
|
+
node represents a non-entity property value that has been reified, like
|
|
48
|
+
a phone number or a name."""
|
|
49
|
+
return self.type == registry.entity
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def caption(self) -> str:
|
|
53
|
+
"""A user-facing label for the current node."""
|
|
54
|
+
if self.type == registry.entity and self.proxy is not None:
|
|
55
|
+
return self.proxy.caption
|
|
56
|
+
caption = self.type.caption(self.value)
|
|
57
|
+
return caption or self.value
|
|
58
|
+
|
|
59
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
60
|
+
"""Return a simple dictionary to reflect this graph node."""
|
|
61
|
+
return {
|
|
62
|
+
"id": self.id,
|
|
63
|
+
"type": self.type.name,
|
|
64
|
+
"value": self.value,
|
|
65
|
+
"caption": self.caption,
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def from_proxy(cls, proxy: EntityProxy) -> "Node":
|
|
70
|
+
"""For a given :class:`~followthemoney.proxy.EntityProxy`, return a node
|
|
71
|
+
based on the entity."""
|
|
72
|
+
return cls(registry.entity, proxy.id, proxy=proxy)
|
|
73
|
+
|
|
74
|
+
def __str__(self) -> str:
|
|
75
|
+
return self.caption
|
|
76
|
+
|
|
77
|
+
def __repr__(self) -> str:
|
|
78
|
+
return "<Node(%r, %r, %r)>" % (self.id, self.type, self.caption)
|
|
79
|
+
|
|
80
|
+
def __hash__(self) -> int:
|
|
81
|
+
return hash(self.id)
|
|
82
|
+
|
|
83
|
+
def __eq__(self, other: Any) -> bool:
|
|
84
|
+
return bool(self.id == other.id)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class Edge(object):
|
|
88
|
+
"""A link between two nodes."""
|
|
89
|
+
|
|
90
|
+
__slots__ = [
|
|
91
|
+
"id",
|
|
92
|
+
"weight",
|
|
93
|
+
"source_id",
|
|
94
|
+
"target_id",
|
|
95
|
+
"prop",
|
|
96
|
+
"proxy",
|
|
97
|
+
"schema",
|
|
98
|
+
"graph",
|
|
99
|
+
]
|
|
100
|
+
|
|
101
|
+
def __init__(
|
|
102
|
+
self,
|
|
103
|
+
graph: "Graph",
|
|
104
|
+
source: Node,
|
|
105
|
+
target: Node,
|
|
106
|
+
proxy: Optional[EntityProxy] = None,
|
|
107
|
+
prop: Optional[Property] = None,
|
|
108
|
+
value: Optional[str] = None,
|
|
109
|
+
):
|
|
110
|
+
self.graph = graph
|
|
111
|
+
self.id = f"{source.id}<>{target.id}"
|
|
112
|
+
self.source_id = source.id
|
|
113
|
+
self.target_id = target.id
|
|
114
|
+
self.weight = 1.0
|
|
115
|
+
self.prop = prop
|
|
116
|
+
self.proxy = proxy
|
|
117
|
+
self.schema: Optional[Schema] = None
|
|
118
|
+
if prop is not None and value is not None:
|
|
119
|
+
self.weight = prop.specificity(value)
|
|
120
|
+
if proxy is not None:
|
|
121
|
+
self.id = f"{source.id}<{proxy.id}>{target.id}"
|
|
122
|
+
self.schema = proxy.schema
|
|
123
|
+
|
|
124
|
+
@property
|
|
125
|
+
def source(self) -> Optional[Node]:
|
|
126
|
+
"""The graph node from which the edge originates."""
|
|
127
|
+
if self.source_id is None:
|
|
128
|
+
return None
|
|
129
|
+
return self.graph.nodes.get(self.source_id)
|
|
130
|
+
|
|
131
|
+
@property
|
|
132
|
+
def source_prop(self) -> Property:
|
|
133
|
+
"""Get the entity property originating this edge."""
|
|
134
|
+
if self.schema is not None and self.schema.source_prop is not None:
|
|
135
|
+
if self.schema.source_prop.reverse is not None:
|
|
136
|
+
return self.schema.source_prop.reverse
|
|
137
|
+
if self.prop is None:
|
|
138
|
+
raise InvalidModel("Contradiction: %r" % self)
|
|
139
|
+
return self.prop
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def target(self) -> Optional[Node]:
|
|
143
|
+
"""The graph node to which the edge points."""
|
|
144
|
+
if self.target_id is None:
|
|
145
|
+
return None
|
|
146
|
+
return self.graph.nodes.get(self.target_id)
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def target_prop(self) -> Optional[Property]:
|
|
150
|
+
"""Get the entity property originating this edge."""
|
|
151
|
+
if self.schema is not None and self.schema.target_prop is not None:
|
|
152
|
+
return self.schema.target_prop.reverse
|
|
153
|
+
if self.prop is not None:
|
|
154
|
+
return self.prop.reverse
|
|
155
|
+
# NOTE: this edge points at a value node.
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def type_name(self) -> str:
|
|
160
|
+
"""Return a machine-readable description of the type of the edge.
|
|
161
|
+
This is either a property name or a schema name."""
|
|
162
|
+
if self.schema is not None:
|
|
163
|
+
return self.schema.name
|
|
164
|
+
if self.prop is None:
|
|
165
|
+
raise InvalidModel("Invalid edge: %r" % self)
|
|
166
|
+
return self.prop.name
|
|
167
|
+
|
|
168
|
+
def to_dict(self) -> Dict[str, Optional[str]]:
|
|
169
|
+
return {
|
|
170
|
+
"id": self.id,
|
|
171
|
+
"source_id": self.source_id,
|
|
172
|
+
"target_id": self.target_id,
|
|
173
|
+
"type_name": self.type_name,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
def __repr__(self) -> str:
|
|
177
|
+
return "<Edge(%r)>" % self.id
|
|
178
|
+
|
|
179
|
+
def __hash__(self) -> int:
|
|
180
|
+
return hash(self.id)
|
|
181
|
+
|
|
182
|
+
def __eq__(self, other: Any) -> bool:
|
|
183
|
+
return bool(self.id == other.id)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class Graph(object):
|
|
187
|
+
"""A set of nodes and edges, derived from entities and their properties.
|
|
188
|
+
This represents an alternative interpretation of FtM data as a property
|
|
189
|
+
graph.
|
|
190
|
+
|
|
191
|
+
This class is meant to be extensible in order to support additional
|
|
192
|
+
backends, like Aleph.
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
def __init__(self, edge_types: Iterable[PropertyType] = registry.pivots) -> None:
|
|
196
|
+
types = registry.get_types(edge_types)
|
|
197
|
+
self.edge_types = [t for t in types if t.matchable]
|
|
198
|
+
self.flush()
|
|
199
|
+
|
|
200
|
+
def flush(self) -> None:
|
|
201
|
+
"""Remove all nodes, edges and proxies from the graph."""
|
|
202
|
+
self.edges: Dict[str, Edge] = {}
|
|
203
|
+
self.nodes: Dict[str, Node] = {}
|
|
204
|
+
self.proxies: Dict[str, Optional[EntityProxy]] = {}
|
|
205
|
+
|
|
206
|
+
def queue(self, id_: str, proxy: Optional[EntityProxy] = None) -> None:
|
|
207
|
+
"""Register a reference to an entity in the graph."""
|
|
208
|
+
if id_ not in self.proxies or proxy is not None:
|
|
209
|
+
self.proxies[id_] = proxy
|
|
210
|
+
|
|
211
|
+
@property
|
|
212
|
+
def queued(self) -> List[str]:
|
|
213
|
+
"""Return a list of all the entities which are referenced from the graph
|
|
214
|
+
but that haven't been loaded yet. This can be used to get a list of
|
|
215
|
+
entities that should be included to expand the whole graph by one degree.
|
|
216
|
+
"""
|
|
217
|
+
return [i for (i, p) in self.proxies.items() if p is None]
|
|
218
|
+
|
|
219
|
+
def _get_node_stub(self, prop: Property, value: str) -> Node:
|
|
220
|
+
if prop.type == registry.entity:
|
|
221
|
+
self.queue(value)
|
|
222
|
+
node = Node(prop.type, value, schema=prop.range)
|
|
223
|
+
if node.id is None:
|
|
224
|
+
return node
|
|
225
|
+
if node.id not in self.nodes:
|
|
226
|
+
self.nodes[node.id] = node
|
|
227
|
+
return self.nodes[node.id]
|
|
228
|
+
|
|
229
|
+
def _add_edge(self, proxy: EntityProxy, source: str, target: str) -> None:
|
|
230
|
+
if proxy.schema.source_prop is None:
|
|
231
|
+
raise InvalidModel("Invalid edge entity: %r" % proxy)
|
|
232
|
+
source_node = self._get_node_stub(proxy.schema.source_prop, source)
|
|
233
|
+
if proxy.schema.target_prop is None:
|
|
234
|
+
raise InvalidModel("Invalid edge entity: %r" % proxy)
|
|
235
|
+
target_node = self._get_node_stub(proxy.schema.target_prop, target)
|
|
236
|
+
if source_node.id is not None and target_node.id is not None:
|
|
237
|
+
edge = Edge(self, source_node, target_node, proxy=proxy)
|
|
238
|
+
self.edges[edge.id] = edge
|
|
239
|
+
|
|
240
|
+
def _add_node(self, proxy: EntityProxy) -> None:
|
|
241
|
+
"""Derive a node and its value edges from the given proxy."""
|
|
242
|
+
entity = Node.from_proxy(proxy)
|
|
243
|
+
if entity.id is not None:
|
|
244
|
+
self.nodes[entity.id] = entity
|
|
245
|
+
for prop, value in proxy.itervalues():
|
|
246
|
+
if prop.type not in self.edge_types:
|
|
247
|
+
continue
|
|
248
|
+
node = self._get_node_stub(prop, value)
|
|
249
|
+
if node.id is None:
|
|
250
|
+
continue
|
|
251
|
+
edge = Edge(self, entity, node, prop=prop, value=value)
|
|
252
|
+
if edge.weight > 0:
|
|
253
|
+
self.edges[edge.id] = edge
|
|
254
|
+
|
|
255
|
+
def add(self, proxy: EntityProxy) -> None:
|
|
256
|
+
"""Add an :class:`~followthemoney.proxy.EntityProxy` to the graph and make
|
|
257
|
+
it either a :class:`~followthemoney.graph.Node` or an
|
|
258
|
+
:class:`~followthemoney.graph.Edge`."""
|
|
259
|
+
if proxy is None:
|
|
260
|
+
return
|
|
261
|
+
self.queue(proxy.id, proxy)
|
|
262
|
+
if proxy.schema.edge:
|
|
263
|
+
for (source, target) in proxy.edgepairs():
|
|
264
|
+
self._add_edge(proxy, source, target)
|
|
265
|
+
else:
|
|
266
|
+
self._add_node(proxy)
|
|
267
|
+
|
|
268
|
+
def iternodes(self) -> Iterable[Node]:
|
|
269
|
+
"""Iterate all :class:`nodes <followthemoney.graph.Node>` in the graph."""
|
|
270
|
+
return self.nodes.values()
|
|
271
|
+
|
|
272
|
+
def iteredges(self) -> Iterable[Edge]:
|
|
273
|
+
"""Iterate all :class:`edges <followthemoney.graph.Edge>` in the graph."""
|
|
274
|
+
return self.edges.values()
|
|
275
|
+
|
|
276
|
+
def get_outbound(
|
|
277
|
+
self, node: Node, prop: Optional[Property] = None
|
|
278
|
+
) -> Generator[Edge, None, None]:
|
|
279
|
+
"""Get all edges pointed out from the given node."""
|
|
280
|
+
for edge in self.iteredges():
|
|
281
|
+
if edge.source == node:
|
|
282
|
+
if prop and edge.source_prop != prop:
|
|
283
|
+
continue
|
|
284
|
+
yield edge
|
|
285
|
+
|
|
286
|
+
def get_inbound(
|
|
287
|
+
self, node: Node, prop: Optional[Property] = None
|
|
288
|
+
) -> Generator[Edge, None, None]:
|
|
289
|
+
"""Get all edges pointed at the given node."""
|
|
290
|
+
for edge in self.iteredges():
|
|
291
|
+
if edge.target == node:
|
|
292
|
+
if prop and edge.target_prop != prop:
|
|
293
|
+
continue
|
|
294
|
+
yield edge
|
|
295
|
+
|
|
296
|
+
def get_adjacent(
|
|
297
|
+
self, node: Node, prop: Optional[Property] = None
|
|
298
|
+
) -> Generator[Edge, None, None]:
|
|
299
|
+
"Get all adjacent edges of the given node."
|
|
300
|
+
yield from self.get_outbound(node, prop=prop)
|
|
301
|
+
yield from self.get_inbound(node, prop=prop)
|
|
302
|
+
|
|
303
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
304
|
+
"""Return a dictionary with the graph nodes and edges."""
|
|
305
|
+
return {
|
|
306
|
+
"nodes": [n.to_dict() for n in self.iternodes()],
|
|
307
|
+
"edges": [e.to_dict() for e in self.iteredges()],
|
|
308
|
+
}
|