followthemoney 1.3.7__py3-none-any.whl → 3.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. followthemoney/__init__.py +5 -3
  2. followthemoney/cli/__init__.py +17 -0
  3. followthemoney/cli/aggregate.py +56 -0
  4. followthemoney/cli/cli.py +88 -0
  5. followthemoney/cli/exports.py +121 -0
  6. followthemoney/cli/mapping.py +85 -0
  7. followthemoney/cli/sieve.py +67 -0
  8. followthemoney/cli/util.py +142 -0
  9. followthemoney/compare.py +130 -60
  10. followthemoney/exc.py +19 -6
  11. followthemoney/export/common.py +29 -0
  12. followthemoney/export/csv.py +82 -0
  13. followthemoney/export/excel.py +75 -0
  14. followthemoney/export/graph.py +79 -0
  15. followthemoney/export/neo4j.py +182 -0
  16. followthemoney/export/rdf.py +26 -0
  17. followthemoney/graph.py +308 -0
  18. followthemoney/helpers.py +212 -0
  19. followthemoney/mapping/__init__.py +1 -1
  20. followthemoney/mapping/csv.py +67 -35
  21. followthemoney/mapping/entity.py +116 -44
  22. followthemoney/mapping/property.py +90 -44
  23. followthemoney/mapping/query.py +27 -19
  24. followthemoney/mapping/source.py +15 -5
  25. followthemoney/mapping/sql.py +75 -61
  26. followthemoney/messages.py +13 -7
  27. followthemoney/model.py +108 -56
  28. followthemoney/namespace.py +119 -0
  29. followthemoney/offshore.py +48 -0
  30. followthemoney/ontology.py +77 -0
  31. followthemoney/property.py +204 -71
  32. followthemoney/proxy.py +455 -118
  33. followthemoney/rdf.py +9 -0
  34. followthemoney/schema/Address.yaml +78 -0
  35. followthemoney/schema/Airplane.yaml +17 -10
  36. followthemoney/schema/Analyzable.yaml +54 -0
  37. followthemoney/schema/Article.yaml +16 -0
  38. followthemoney/schema/Assessment.yaml +32 -0
  39. followthemoney/schema/Asset.yaml +10 -4
  40. followthemoney/schema/Associate.yaml +41 -0
  41. followthemoney/schema/Audio.yaml +24 -0
  42. followthemoney/schema/BankAccount.yaml +53 -9
  43. followthemoney/schema/Call.yaml +48 -0
  44. followthemoney/schema/CallForTenders.yaml +117 -0
  45. followthemoney/schema/Company.yaml +37 -12
  46. followthemoney/schema/Contract.yaml +41 -7
  47. followthemoney/schema/ContractAward.yaml +30 -11
  48. followthemoney/schema/CourtCase.yaml +16 -10
  49. followthemoney/schema/CourtCaseParty.yaml +17 -6
  50. followthemoney/schema/CryptoWallet.yaml +48 -0
  51. followthemoney/schema/Debt.yaml +37 -0
  52. followthemoney/schema/Directorship.yaml +17 -4
  53. followthemoney/schema/Document.yaml +72 -139
  54. followthemoney/schema/Documentation.yml +38 -0
  55. followthemoney/schema/EconomicActivity.yaml +32 -17
  56. followthemoney/schema/Email.yaml +76 -0
  57. followthemoney/schema/Employment.yaml +39 -0
  58. followthemoney/schema/Event.yaml +35 -3
  59. followthemoney/schema/Family.yaml +41 -0
  60. followthemoney/schema/Folder.yaml +13 -0
  61. followthemoney/schema/HyperText.yaml +21 -0
  62. followthemoney/schema/Identification.yaml +40 -0
  63. followthemoney/schema/Image.yaml +25 -0
  64. followthemoney/schema/Interest.yaml +3 -6
  65. followthemoney/schema/Interval.yaml +56 -5
  66. followthemoney/schema/LegalEntity.yaml +81 -20
  67. followthemoney/schema/License.yaml +7 -3
  68. followthemoney/schema/Membership.yaml +19 -4
  69. followthemoney/schema/Mention.yaml +54 -0
  70. followthemoney/schema/Message.yaml +78 -0
  71. followthemoney/schema/Note.yaml +23 -0
  72. followthemoney/schema/Occupancy.yaml +44 -0
  73. followthemoney/schema/Organization.yaml +38 -3
  74. followthemoney/schema/Ownership.yaml +16 -4
  75. followthemoney/schema/Package.yaml +17 -0
  76. followthemoney/schema/Page.yaml +43 -0
  77. followthemoney/schema/Pages.yaml +23 -0
  78. followthemoney/schema/Passport.yaml +16 -17
  79. followthemoney/schema/Payment.yaml +38 -7
  80. followthemoney/schema/Person.yaml +61 -5
  81. followthemoney/schema/PlainText.yaml +17 -0
  82. followthemoney/schema/Position.yaml +50 -0
  83. followthemoney/schema/Post.yaml +42 -0
  84. followthemoney/schema/Project.yaml +27 -0
  85. followthemoney/schema/ProjectParticipant.yaml +36 -0
  86. followthemoney/schema/PublicBody.yaml +14 -3
  87. followthemoney/schema/RealEstate.yaml +19 -3
  88. followthemoney/schema/Representation.yaml +17 -6
  89. followthemoney/schema/Sanction.yaml +45 -21
  90. followthemoney/schema/Security.yaml +59 -0
  91. followthemoney/schema/Similar.yaml +37 -0
  92. followthemoney/schema/Succession.yaml +36 -0
  93. followthemoney/schema/Table.yaml +32 -0
  94. followthemoney/schema/TaxRoll.yaml +27 -9
  95. followthemoney/schema/Thing.yaml +69 -13
  96. followthemoney/schema/Trip.yaml +42 -0
  97. followthemoney/schema/UnknownLink.yaml +17 -6
  98. followthemoney/schema/UserAccount.yaml +44 -0
  99. followthemoney/schema/Value.yaml +5 -1
  100. followthemoney/schema/Vehicle.yaml +25 -8
  101. followthemoney/schema/Vessel.yaml +18 -10
  102. followthemoney/schema/Video.yaml +20 -0
  103. followthemoney/schema/Workbook.yaml +18 -0
  104. followthemoney/schema.py +436 -135
  105. followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
  106. followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
  107. followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
  108. followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
  109. followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
  110. followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
  111. followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
  112. followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
  113. followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
  114. followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
  115. followthemoney/translations/fr/followthemoney.po +3861 -0
  116. followthemoney/translations/messages.pot +3021 -725
  117. followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
  118. followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
  119. followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
  120. followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
  121. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
  122. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
  123. followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
  124. followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
  125. followthemoney/translations/ru/followthemoney.po +4221 -0
  126. followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
  127. followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
  128. followthemoney/types/__init__.py +35 -17
  129. followthemoney/types/address.py +50 -21
  130. followthemoney/types/checksum.py +25 -0
  131. followthemoney/types/common.py +233 -88
  132. followthemoney/types/country.py +50 -56
  133. followthemoney/types/date.py +59 -76
  134. followthemoney/types/email.py +66 -35
  135. followthemoney/types/entity.py +66 -13
  136. followthemoney/types/gender.py +66 -0
  137. followthemoney/types/iban.py +47 -28
  138. followthemoney/types/identifier.py +49 -22
  139. followthemoney/types/ip.py +35 -21
  140. followthemoney/types/json.py +58 -0
  141. followthemoney/types/language.py +124 -37
  142. followthemoney/types/mimetype.py +44 -0
  143. followthemoney/types/name.py +56 -12
  144. followthemoney/types/number.py +30 -0
  145. followthemoney/types/phone.py +92 -34
  146. followthemoney/types/registry.py +52 -0
  147. followthemoney/types/string.py +43 -0
  148. followthemoney/types/topic.py +94 -0
  149. followthemoney/types/url.py +39 -17
  150. followthemoney/util.py +139 -45
  151. followthemoney-3.8.1.dist-info/METADATA +153 -0
  152. followthemoney-3.8.1.dist-info/RECORD +157 -0
  153. {followthemoney-1.3.7.dist-info → followthemoney-3.8.1.dist-info}/WHEEL +1 -2
  154. followthemoney-3.8.1.dist-info/entry_points.txt +17 -0
  155. followthemoney-1.3.7.dist-info/LICENSE.txt → followthemoney-3.8.1.dist-info/licenses/LICENSE +1 -1
  156. followthemoney/link.py +0 -75
  157. followthemoney/schema/Associate.yml +0 -19
  158. followthemoney/schema/Family.yml +0 -19
  159. followthemoney/schema/Land.yml +0 -9
  160. followthemoney/schema/Relationship.yaml +0 -26
  161. followthemoney/types/domain.py +0 -50
  162. followthemoney-1.3.7.dist-info/DESCRIPTION.rst +0 -3
  163. followthemoney-1.3.7.dist-info/METADATA +0 -39
  164. followthemoney-1.3.7.dist-info/RECORD +0 -108
  165. followthemoney-1.3.7.dist-info/entry_points.txt +0 -3
  166. followthemoney-1.3.7.dist-info/metadata.json +0 -1
  167. followthemoney-1.3.7.dist-info/namespace_packages.txt +0 -1
  168. followthemoney-1.3.7.dist-info/top_level.txt +0 -3
  169. ns/ontology.py +0 -128
  170. tests/types/test_addresses.py +0 -24
  171. tests/types/test_common.py +0 -32
  172. tests/types/test_countries.py +0 -27
  173. tests/types/test_dates.py +0 -73
  174. tests/types/test_domains.py +0 -23
  175. tests/types/test_emails.py +0 -32
  176. tests/types/test_entity.py +0 -19
  177. tests/types/test_iban.py +0 -109
  178. tests/types/test_identifiers.py +0 -27
  179. tests/types/test_ip.py +0 -29
  180. tests/types/test_languages.py +0 -23
  181. tests/types/test_names.py +0 -33
  182. tests/types/test_phones.py +0 -24
  183. tests/types/test_registry.py +0 -14
  184. tests/types/test_urls.py +0 -23
  185. {ns → followthemoney/export}/__init__.py +0 -0
  186. /tests/types/__init__.py → /followthemoney/py.typed +0 -0
@@ -0,0 +1,182 @@
1
+ import os
2
+ import json
3
+ import logging
4
+ from typing import Any, Dict, Iterable, List, Optional, Set, TextIO
5
+ import stringcase # type: ignore
6
+
7
+ from followthemoney.export.csv import CSVMixin, CSVWriter
8
+ from followthemoney.export.graph import GraphExporter, DEFAULT_EDGE_TYPES
9
+ from followthemoney.graph import Edge, Node
10
+ from followthemoney.schema import Schema
11
+ from followthemoney.util import PathLike
12
+
13
+ log = logging.getLogger(__name__)
14
+ NEO4J_ADMIN_PATH = os.environ.get("NEO4J_ADMIN_PATH", "neo4j-admin")
15
+ NEO4J_DATABASE_NAME = os.environ.get("NEO4J_DATABASE_NAME", "graph.db")
16
+
17
+
18
+ class Neo4JCSVExporter(CSVMixin, GraphExporter):
19
+ def __init__(
20
+ self,
21
+ directory: PathLike,
22
+ extra: Optional[List[str]] = None,
23
+ edge_types: Iterable[str] = DEFAULT_EDGE_TYPES,
24
+ ) -> None:
25
+ super(Neo4JCSVExporter, self).__init__(edge_types=edge_types)
26
+ self._configure(directory, extra=extra)
27
+
28
+ self.links_handler, self.links_writer = self._open_csv_file("_links")
29
+ self.links_writer.writerow([":TYPE", ":START_ID", ":END_ID", "weight"])
30
+
31
+ self.nodes_handler, self.nodes_writer = self._open_csv_file("_nodes")
32
+ self.nodes_writer.writerow(["id:ID", ":LABEL", "caption"])
33
+ self.nodes_seen: Set[str] = set()
34
+
35
+ def _write_header(self, writer: CSVWriter, schema: Schema) -> None:
36
+ headers = []
37
+ if not schema.edge:
38
+ headers = ["id:ID", ":LABEL", "caption"]
39
+ else:
40
+ headers = ["id", ":TYPE", ":START_ID", ":END_ID"]
41
+
42
+ headers.extend(self.extra)
43
+ for prop in self.exportable_properties(schema):
44
+ headers.append(prop.name)
45
+ writer.writerow(headers)
46
+
47
+ def write_graph(self, extra: Optional[List[str]] = None) -> None:
48
+ extra_ = extra or []
49
+ for node in self.graph.iternodes():
50
+ self.write_node(node, extra_)
51
+
52
+ for edge in self.graph.iteredges():
53
+ self.write_edge(edge, extra_)
54
+
55
+ self.graph.flush()
56
+
57
+ def write_node(self, node: Node, extra: List[str]) -> None:
58
+ if node.id is None:
59
+ return None
60
+ if not node.is_entity and node.id not in self.nodes_seen:
61
+ row = [node.id, node.type.name, node.caption]
62
+ self.nodes_writer.writerow(row)
63
+ self.nodes_seen.add(node.id)
64
+ if node.proxy is not None and node.schema is not None:
65
+ label = ";".join(node.schema.names)
66
+ cells = [node.id, label, node.caption]
67
+ cells.extend(extra or [])
68
+ for prop, values in self.exportable_fields(node.proxy):
69
+ cells.append(prop.type.join(values))
70
+ writer = self._get_writer(node.schema)
71
+ writer.writerow(cells)
72
+
73
+ def write_edge(self, edge: Edge, extra: List[str]) -> None:
74
+ if edge.prop is not None:
75
+ type_ = stringcase.constcase(edge.prop.name)
76
+ row = [type_, edge.source_id, edge.target_id, edge.weight]
77
+ self.links_writer.writerow(row)
78
+ if edge.proxy is not None:
79
+ proxy = edge.proxy
80
+ type_ = stringcase.constcase(proxy.schema.name)
81
+ # That potentially may lead to multiple edges with same id
82
+ cells = [proxy.id, type_, edge.source_id, edge.target_id]
83
+ cells.extend(extra or [])
84
+
85
+ for prop, values in self.exportable_fields(edge.proxy):
86
+ cells.append(prop.type.join(values))
87
+
88
+ writer = self._get_writer(proxy.schema)
89
+ writer.writerow(cells)
90
+
91
+ def finalize_graph(self) -> None:
92
+ script_path = self.directory.joinpath("neo4j_import.sh")
93
+ with open(script_path, mode="w") as fp:
94
+ cmd = "{} import --id-type=STRING --database={} \\\n"
95
+ fp.write(cmd.format(NEO4J_ADMIN_PATH, NEO4J_DATABASE_NAME))
96
+ fp.write("\t--multiline-fields=true \\\n")
97
+ cmd = "\t--relationships={} \\\n"
98
+ fp.write(cmd.format(os.path.basename(self.links_handler.name)))
99
+ cmd = "\t--nodes={} \\\n"
100
+ fp.write(cmd.format(os.path.basename(self.nodes_handler.name)))
101
+
102
+ for schema, (handle, writer) in self.handles.items():
103
+ file_name = os.path.basename(handle.name)
104
+ if schema.edge:
105
+ cmd = "\t--relationships={} \\\n"
106
+ fp.write(cmd.format(file_name))
107
+ else:
108
+ cmd = "\t--nodes={} \\\n"
109
+ fp.write(cmd.format(file_name))
110
+
111
+ self.links_handler.close()
112
+ self.nodes_handler.close()
113
+ self.close()
114
+
115
+
116
+ class CypherGraphExporter(GraphExporter):
117
+ """Cypher query format, used for import to Neo4J. This is a bit like
118
+ writing SQL with individual statements - so for large datasets it
119
+ might be a better idea to do a CSV-based import."""
120
+
121
+ # https://www.opencypher.org/
122
+ # MATCH (n) DETACH DELETE n;
123
+
124
+ def __init__(self, fh: TextIO, edge_types: Iterable[str] = DEFAULT_EDGE_TYPES):
125
+ super(CypherGraphExporter, self).__init__(edge_types=edge_types)
126
+ self.fh = fh
127
+ self.proxy_nodes: Set[str] = set()
128
+
129
+ def _to_map(self, data: Dict[str, Any]) -> str:
130
+ values = []
131
+ for key, value in data.items():
132
+ if value:
133
+ value = "%s: %s" % (key, json.dumps(value))
134
+ values.append(value)
135
+ return ", ".join(values)
136
+
137
+ def write_graph(self) -> None:
138
+ """Export queries for each graph element."""
139
+ for node in self.graph.iternodes():
140
+ if node.value in self.proxy_nodes:
141
+ continue
142
+ if node.id is None:
143
+ continue
144
+ if node.proxy is not None:
145
+ self.proxy_nodes.add(node.value)
146
+ attributes = self.get_attributes(node)
147
+ attributes["id"] = node.id
148
+ if node.caption is not None:
149
+ attributes["caption"] = node.caption
150
+ if node.schema:
151
+ labels = list(node.schema.names)
152
+ else:
153
+ labels = [node.type.name]
154
+ cypher = "MERGE (p { %(id)s }) " "SET p += { %(map)s } SET p :%(label)s;\n"
155
+ self.fh.write(
156
+ cypher
157
+ % {
158
+ "id": self._to_map({"id": node.id}),
159
+ "map": self._to_map(attributes),
160
+ "label": ":".join(labels),
161
+ }
162
+ )
163
+
164
+ for edge in self.graph.iteredges():
165
+ attributes = self.get_attributes(edge)
166
+ attributes["id"] = edge.id
167
+ attributes["weight"] = str(edge.weight)
168
+ cypher = (
169
+ "MATCH (s { %(source)s }), (t { %(target)s }) "
170
+ "MERGE (s)-[:%(type)s { %(map)s }]->(t);\n"
171
+ )
172
+ self.fh.write(
173
+ cypher
174
+ % {
175
+ "source": self._to_map({"id": edge.source_id}),
176
+ "target": self._to_map({"id": edge.target_id}),
177
+ "type": stringcase.constcase(edge.type_name),
178
+ "map": self._to_map(attributes),
179
+ }
180
+ )
181
+
182
+ self.graph.flush()
@@ -0,0 +1,26 @@
1
+ import logging
2
+ from rdflib import Graph
3
+ from typing import List, Optional, TextIO
4
+
5
+ from followthemoney.export.common import Exporter
6
+ from followthemoney.proxy import E
7
+
8
+ log = logging.getLogger(__name__)
9
+
10
+
11
+ class RDFExporter(Exporter):
12
+ def __init__(self, fh: TextIO, qualified: bool = True) -> None:
13
+ super(RDFExporter, self).__init__()
14
+ self.fh = fh
15
+ self.qualified = qualified
16
+
17
+ def write(self, proxy: E, extra: Optional[List[str]] = None) -> None:
18
+ graph = Graph()
19
+
20
+ for triple in proxy.triples(qualified=self.qualified):
21
+ graph.add(triple)
22
+ try:
23
+ nt = graph.serialize(format="nt11").strip()
24
+ self.fh.write(nt + "\n")
25
+ except Exception:
26
+ log.exception("Failed to serialize ntriples.")
@@ -0,0 +1,308 @@
1
+ """
2
+ Converting FtM data to a property graph data model.
3
+
4
+ This module provides an abstract data object that represents a property
5
+ graph. This is used by the exporter modules to convert data
6
+ to a specific output format, like Cypher or NetworkX.
7
+ """
8
+ import logging
9
+ from typing import Any, Dict, Generator, Iterable, List, Optional
10
+
11
+ from followthemoney.types import registry
12
+ from followthemoney.types.common import PropertyType
13
+ from followthemoney.schema import Schema
14
+ from followthemoney.proxy import EntityProxy
15
+ from followthemoney.property import Property
16
+ from followthemoney.exc import InvalidModel
17
+
18
+ log = logging.getLogger(__name__)
19
+
20
+
21
+ class Node(object):
22
+ """A node represents either an entity that can be rendered as a
23
+ node in a graph, or as a re-ified value, like a name, email
24
+ address or phone number."""
25
+
26
+ __slots__ = ["type", "value", "id", "proxy", "schema"]
27
+
28
+ def __init__(
29
+ self,
30
+ type_: PropertyType,
31
+ value: str,
32
+ proxy: Optional[EntityProxy] = None,
33
+ schema: Optional[Schema] = None,
34
+ ) -> None:
35
+ self.type = type_
36
+ self.value = value
37
+ # _id = type_.node_id_safe(value)
38
+ # if _id is None:
39
+ # raise InvalidData("No ID for node")
40
+ self.id = type_.node_id_safe(value)
41
+ self.proxy = proxy
42
+ self.schema = schema if proxy is None else proxy.schema
43
+
44
+ @property
45
+ def is_entity(self) -> bool:
46
+ """Check to see if the node represents an entity. If this is false, the
47
+ node represents a non-entity property value that has been reified, like
48
+ a phone number or a name."""
49
+ return self.type == registry.entity
50
+
51
+ @property
52
+ def caption(self) -> str:
53
+ """A user-facing label for the current node."""
54
+ if self.type == registry.entity and self.proxy is not None:
55
+ return self.proxy.caption
56
+ caption = self.type.caption(self.value)
57
+ return caption or self.value
58
+
59
+ def to_dict(self) -> Dict[str, Any]:
60
+ """Return a simple dictionary to reflect this graph node."""
61
+ return {
62
+ "id": self.id,
63
+ "type": self.type.name,
64
+ "value": self.value,
65
+ "caption": self.caption,
66
+ }
67
+
68
+ @classmethod
69
+ def from_proxy(cls, proxy: EntityProxy) -> "Node":
70
+ """For a given :class:`~followthemoney.proxy.EntityProxy`, return a node
71
+ based on the entity."""
72
+ return cls(registry.entity, proxy.id, proxy=proxy)
73
+
74
+ def __str__(self) -> str:
75
+ return self.caption
76
+
77
+ def __repr__(self) -> str:
78
+ return "<Node(%r, %r, %r)>" % (self.id, self.type, self.caption)
79
+
80
+ def __hash__(self) -> int:
81
+ return hash(self.id)
82
+
83
+ def __eq__(self, other: Any) -> bool:
84
+ return bool(self.id == other.id)
85
+
86
+
87
+ class Edge(object):
88
+ """A link between two nodes."""
89
+
90
+ __slots__ = [
91
+ "id",
92
+ "weight",
93
+ "source_id",
94
+ "target_id",
95
+ "prop",
96
+ "proxy",
97
+ "schema",
98
+ "graph",
99
+ ]
100
+
101
+ def __init__(
102
+ self,
103
+ graph: "Graph",
104
+ source: Node,
105
+ target: Node,
106
+ proxy: Optional[EntityProxy] = None,
107
+ prop: Optional[Property] = None,
108
+ value: Optional[str] = None,
109
+ ):
110
+ self.graph = graph
111
+ self.id = f"{source.id}<>{target.id}"
112
+ self.source_id = source.id
113
+ self.target_id = target.id
114
+ self.weight = 1.0
115
+ self.prop = prop
116
+ self.proxy = proxy
117
+ self.schema: Optional[Schema] = None
118
+ if prop is not None and value is not None:
119
+ self.weight = prop.specificity(value)
120
+ if proxy is not None:
121
+ self.id = f"{source.id}<{proxy.id}>{target.id}"
122
+ self.schema = proxy.schema
123
+
124
+ @property
125
+ def source(self) -> Optional[Node]:
126
+ """The graph node from which the edge originates."""
127
+ if self.source_id is None:
128
+ return None
129
+ return self.graph.nodes.get(self.source_id)
130
+
131
+ @property
132
+ def source_prop(self) -> Property:
133
+ """Get the entity property originating this edge."""
134
+ if self.schema is not None and self.schema.source_prop is not None:
135
+ if self.schema.source_prop.reverse is not None:
136
+ return self.schema.source_prop.reverse
137
+ if self.prop is None:
138
+ raise InvalidModel("Contradiction: %r" % self)
139
+ return self.prop
140
+
141
+ @property
142
+ def target(self) -> Optional[Node]:
143
+ """The graph node to which the edge points."""
144
+ if self.target_id is None:
145
+ return None
146
+ return self.graph.nodes.get(self.target_id)
147
+
148
+ @property
149
+ def target_prop(self) -> Optional[Property]:
150
+ """Get the entity property originating this edge."""
151
+ if self.schema is not None and self.schema.target_prop is not None:
152
+ return self.schema.target_prop.reverse
153
+ if self.prop is not None:
154
+ return self.prop.reverse
155
+ # NOTE: this edge points at a value node.
156
+ return None
157
+
158
+ @property
159
+ def type_name(self) -> str:
160
+ """Return a machine-readable description of the type of the edge.
161
+ This is either a property name or a schema name."""
162
+ if self.schema is not None:
163
+ return self.schema.name
164
+ if self.prop is None:
165
+ raise InvalidModel("Invalid edge: %r" % self)
166
+ return self.prop.name
167
+
168
+ def to_dict(self) -> Dict[str, Optional[str]]:
169
+ return {
170
+ "id": self.id,
171
+ "source_id": self.source_id,
172
+ "target_id": self.target_id,
173
+ "type_name": self.type_name,
174
+ }
175
+
176
+ def __repr__(self) -> str:
177
+ return "<Edge(%r)>" % self.id
178
+
179
+ def __hash__(self) -> int:
180
+ return hash(self.id)
181
+
182
+ def __eq__(self, other: Any) -> bool:
183
+ return bool(self.id == other.id)
184
+
185
+
186
+ class Graph(object):
187
+ """A set of nodes and edges, derived from entities and their properties.
188
+ This represents an alternative interpretation of FtM data as a property
189
+ graph.
190
+
191
+ This class is meant to be extensible in order to support additional
192
+ backends, like Aleph.
193
+ """
194
+
195
+ def __init__(self, edge_types: Iterable[PropertyType] = registry.pivots) -> None:
196
+ types = registry.get_types(edge_types)
197
+ self.edge_types = [t for t in types if t.matchable]
198
+ self.flush()
199
+
200
+ def flush(self) -> None:
201
+ """Remove all nodes, edges and proxies from the graph."""
202
+ self.edges: Dict[str, Edge] = {}
203
+ self.nodes: Dict[str, Node] = {}
204
+ self.proxies: Dict[str, Optional[EntityProxy]] = {}
205
+
206
+ def queue(self, id_: str, proxy: Optional[EntityProxy] = None) -> None:
207
+ """Register a reference to an entity in the graph."""
208
+ if id_ not in self.proxies or proxy is not None:
209
+ self.proxies[id_] = proxy
210
+
211
+ @property
212
+ def queued(self) -> List[str]:
213
+ """Return a list of all the entities which are referenced from the graph
214
+ but that haven't been loaded yet. This can be used to get a list of
215
+ entities that should be included to expand the whole graph by one degree.
216
+ """
217
+ return [i for (i, p) in self.proxies.items() if p is None]
218
+
219
+ def _get_node_stub(self, prop: Property, value: str) -> Node:
220
+ if prop.type == registry.entity:
221
+ self.queue(value)
222
+ node = Node(prop.type, value, schema=prop.range)
223
+ if node.id is None:
224
+ return node
225
+ if node.id not in self.nodes:
226
+ self.nodes[node.id] = node
227
+ return self.nodes[node.id]
228
+
229
+ def _add_edge(self, proxy: EntityProxy, source: str, target: str) -> None:
230
+ if proxy.schema.source_prop is None:
231
+ raise InvalidModel("Invalid edge entity: %r" % proxy)
232
+ source_node = self._get_node_stub(proxy.schema.source_prop, source)
233
+ if proxy.schema.target_prop is None:
234
+ raise InvalidModel("Invalid edge entity: %r" % proxy)
235
+ target_node = self._get_node_stub(proxy.schema.target_prop, target)
236
+ if source_node.id is not None and target_node.id is not None:
237
+ edge = Edge(self, source_node, target_node, proxy=proxy)
238
+ self.edges[edge.id] = edge
239
+
240
+ def _add_node(self, proxy: EntityProxy) -> None:
241
+ """Derive a node and its value edges from the given proxy."""
242
+ entity = Node.from_proxy(proxy)
243
+ if entity.id is not None:
244
+ self.nodes[entity.id] = entity
245
+ for prop, value in proxy.itervalues():
246
+ if prop.type not in self.edge_types:
247
+ continue
248
+ node = self._get_node_stub(prop, value)
249
+ if node.id is None:
250
+ continue
251
+ edge = Edge(self, entity, node, prop=prop, value=value)
252
+ if edge.weight > 0:
253
+ self.edges[edge.id] = edge
254
+
255
+ def add(self, proxy: EntityProxy) -> None:
256
+ """Add an :class:`~followthemoney.proxy.EntityProxy` to the graph and make
257
+ it either a :class:`~followthemoney.graph.Node` or an
258
+ :class:`~followthemoney.graph.Edge`."""
259
+ if proxy is None:
260
+ return
261
+ self.queue(proxy.id, proxy)
262
+ if proxy.schema.edge:
263
+ for (source, target) in proxy.edgepairs():
264
+ self._add_edge(proxy, source, target)
265
+ else:
266
+ self._add_node(proxy)
267
+
268
+ def iternodes(self) -> Iterable[Node]:
269
+ """Iterate all :class:`nodes <followthemoney.graph.Node>` in the graph."""
270
+ return self.nodes.values()
271
+
272
+ def iteredges(self) -> Iterable[Edge]:
273
+ """Iterate all :class:`edges <followthemoney.graph.Edge>` in the graph."""
274
+ return self.edges.values()
275
+
276
+ def get_outbound(
277
+ self, node: Node, prop: Optional[Property] = None
278
+ ) -> Generator[Edge, None, None]:
279
+ """Get all edges pointed out from the given node."""
280
+ for edge in self.iteredges():
281
+ if edge.source == node:
282
+ if prop and edge.source_prop != prop:
283
+ continue
284
+ yield edge
285
+
286
+ def get_inbound(
287
+ self, node: Node, prop: Optional[Property] = None
288
+ ) -> Generator[Edge, None, None]:
289
+ """Get all edges pointed at the given node."""
290
+ for edge in self.iteredges():
291
+ if edge.target == node:
292
+ if prop and edge.target_prop != prop:
293
+ continue
294
+ yield edge
295
+
296
+ def get_adjacent(
297
+ self, node: Node, prop: Optional[Property] = None
298
+ ) -> Generator[Edge, None, None]:
299
+ "Get all adjacent edges of the given node."
300
+ yield from self.get_outbound(node, prop=prop)
301
+ yield from self.get_inbound(node, prop=prop)
302
+
303
+ def to_dict(self) -> Dict[str, Any]:
304
+ """Return a dictionary with the graph nodes and edges."""
305
+ return {
306
+ "nodes": [n.to_dict() for n in self.iternodes()],
307
+ "edges": [e.to_dict() for e in self.iteredges()],
308
+ }