followthemoney 3.8.4__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. followthemoney/__init__.py +30 -10
  2. followthemoney/cli/__init__.py +3 -12
  3. followthemoney/cli/aggregate.py +1 -1
  4. followthemoney/cli/cli.py +1 -1
  5. followthemoney/cli/exports.py +6 -2
  6. followthemoney/cli/mapping.py +6 -4
  7. followthemoney/cli/sieve.py +1 -1
  8. followthemoney/cli/statement.py +62 -0
  9. followthemoney/cli/util.py +2 -3
  10. followthemoney/compare.py +26 -16
  11. followthemoney/dataset/__init__.py +17 -0
  12. followthemoney/dataset/catalog.py +77 -0
  13. followthemoney/dataset/coverage.py +29 -0
  14. followthemoney/dataset/dataset.py +137 -0
  15. followthemoney/dataset/publisher.py +25 -0
  16. followthemoney/dataset/resource.py +30 -0
  17. followthemoney/dataset/util.py +58 -0
  18. followthemoney/entity.py +73 -0
  19. followthemoney/exc.py +6 -0
  20. followthemoney/export/common.py +3 -3
  21. followthemoney/export/csv.py +10 -12
  22. followthemoney/export/neo4j.py +1 -1
  23. followthemoney/export/rdf.py +57 -5
  24. followthemoney/graph.py +6 -4
  25. followthemoney/mapping/csv.py +6 -18
  26. followthemoney/mapping/sql.py +3 -4
  27. followthemoney/model.py +36 -9
  28. followthemoney/namespace.py +3 -1
  29. followthemoney/ontology.py +18 -16
  30. followthemoney/property.py +12 -15
  31. followthemoney/proxy.py +44 -65
  32. followthemoney/schema/Analyzable.yaml +2 -3
  33. followthemoney/schema/BankAccount.yaml +2 -3
  34. followthemoney/schema/Company.yaml +0 -6
  35. followthemoney/schema/Contract.yaml +0 -1
  36. followthemoney/schema/CryptoWallet.yaml +1 -1
  37. followthemoney/schema/Document.yaml +0 -6
  38. followthemoney/schema/Interval.yaml +7 -0
  39. followthemoney/schema/LegalEntity.yaml +6 -0
  40. followthemoney/schema/License.yaml +2 -0
  41. followthemoney/schema/Page.yaml +0 -1
  42. followthemoney/schema/Person.yaml +0 -5
  43. followthemoney/schema/Sanction.yaml +1 -0
  44. followthemoney/schema/Thing.yaml +0 -2
  45. followthemoney/schema/UserAccount.yaml +6 -3
  46. followthemoney/schema.py +27 -39
  47. followthemoney/statement/__init__.py +19 -0
  48. followthemoney/statement/entity.py +437 -0
  49. followthemoney/statement/serialize.py +245 -0
  50. followthemoney/statement/statement.py +256 -0
  51. followthemoney/statement/util.py +31 -0
  52. followthemoney/types/__init__.py +66 -23
  53. followthemoney/types/address.py +3 -3
  54. followthemoney/types/checksum.py +3 -7
  55. followthemoney/types/common.py +9 -14
  56. followthemoney/types/country.py +3 -7
  57. followthemoney/types/date.py +21 -11
  58. followthemoney/types/email.py +0 -4
  59. followthemoney/types/entity.py +5 -11
  60. followthemoney/types/gender.py +6 -10
  61. followthemoney/types/identifier.py +9 -3
  62. followthemoney/types/ip.py +5 -9
  63. followthemoney/types/json.py +2 -2
  64. followthemoney/types/language.py +3 -7
  65. followthemoney/types/mimetype.py +4 -8
  66. followthemoney/types/name.py +7 -8
  67. followthemoney/types/number.py +88 -6
  68. followthemoney/types/phone.py +4 -11
  69. followthemoney/types/string.py +4 -4
  70. followthemoney/types/topic.py +3 -7
  71. followthemoney/types/url.py +5 -10
  72. followthemoney/util.py +12 -13
  73. followthemoney/value.py +67 -0
  74. {followthemoney-3.8.4.dist-info → followthemoney-4.0.0.dist-info}/METADATA +38 -34
  75. {followthemoney-3.8.4.dist-info → followthemoney-4.0.0.dist-info}/RECORD +78 -69
  76. {followthemoney-3.8.4.dist-info → followthemoney-4.0.0.dist-info}/entry_points.txt +1 -0
  77. {followthemoney-3.8.4.dist-info → followthemoney-4.0.0.dist-info}/licenses/LICENSE +1 -0
  78. followthemoney/offshore.py +0 -48
  79. followthemoney/rdf.py +0 -9
  80. followthemoney/schema/Assessment.yaml +0 -32
  81. followthemoney/schema/Post.yaml +0 -42
  82. followthemoney/types/iban.py +0 -58
  83. followthemoney/types/registry.py +0 -52
  84. {followthemoney-3.8.4.dist-info → followthemoney-4.0.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,58 @@
1
+ from normality import slugify
2
+ from typing import Annotated, Any
3
+ from pydantic import BeforeValidator
4
+
5
+ from followthemoney.types import registry
6
+
7
+
8
+ def dataset_name_check(value: str) -> str:
9
+ """Check that the given value is a valid dataset name. This doesn't convert
10
+ or clean invalid names, but raises an error if they are not compliant to
11
+ force the user to fix an invalid name"""
12
+ if slugify(value, sep="_") != value:
13
+ raise ValueError("Invalid %s: %r" % ("dataset name", value))
14
+ return value
15
+
16
+
17
+ def type_check_date(value: Any) -> str:
18
+ """Check that the given value is a valid date string."""
19
+ cleaned = registry.date.clean(value)
20
+ if cleaned is None:
21
+ raise ValueError("Invalid date: %r" % value)
22
+ return cleaned
23
+
24
+
25
+ PartialDate = Annotated[str, BeforeValidator(type_check_date)]
26
+
27
+
28
+ def type_check_country(value: Any) -> str:
29
+ """Check that the given value is a valid country code."""
30
+ cleaned = registry.country.clean(value)
31
+ if cleaned is None:
32
+ raise ValueError("Invalid country code: %r" % value)
33
+ return cleaned
34
+
35
+
36
+ CountryCode = Annotated[str, BeforeValidator(type_check_country)]
37
+
38
+
39
+ class Named:
40
+ name: str
41
+
42
+ def __init__(self, name: str) -> None:
43
+ self.name = name
44
+
45
+ def __eq__(self, other: Any) -> bool:
46
+ try:
47
+ return not not self.name == other.name
48
+ except AttributeError:
49
+ return False
50
+
51
+ def __lt__(self, other: Any) -> bool:
52
+ return self.name.__lt__(other.name)
53
+
54
+ def __hash__(self) -> int:
55
+ return hash(self.name)
56
+
57
+ def __repr__(self) -> str:
58
+ return f"<{self.__class__.__name__}({self.name!r})>"
@@ -0,0 +1,73 @@
1
+ from typing import Any, Dict, List, Optional, Set, TypeVar
2
+
3
+ from rigour.names import pick_name
4
+
5
+ from followthemoney.proxy import EntityProxy
6
+ from followthemoney.schema import Schema
7
+ from followthemoney.statement.util import BASE_ID
8
+
9
+ VE = TypeVar("VE", bound="ValueEntity")
10
+
11
+
12
+ def _defined(*args: Optional[str]) -> List[str]:
13
+ return [arg for arg in args if arg is not None]
14
+
15
+
16
+ class ValueEntity(EntityProxy):
17
+ """
18
+ This class has the extended attributes from `StatementEntity` but without
19
+ statements. Useful for streaming around. Starting from followthemoeny 4.0,
20
+ applications should use this entity class as the base class.
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ schema: Schema,
26
+ data: Dict[str, Any],
27
+ key_prefix: Optional[str] = None,
28
+ cleaned: bool = True,
29
+ ):
30
+ super().__init__(schema, data, key_prefix=key_prefix, cleaned=cleaned)
31
+ self._caption: Optional[str] = data.get("caption")
32
+ self.datasets: Set[str] = set(data.get("datasets", []))
33
+ self.referents: Set[str] = set(data.get("referents", []))
34
+ self.first_seen: Optional[str] = data.get("first_seen")
35
+ self.last_seen: Optional[str] = data.get("last_seen")
36
+ self.last_change: Optional[str] = data.get("last_change")
37
+
38
+ # add data from statement dict if present.
39
+ # this updates the dataset and referents set
40
+ for stmt_data in data.pop("statements", []):
41
+ self.datasets.add(stmt_data["dataset"])
42
+ if stmt_data["entity_id"] != self.id:
43
+ self.referents.add(stmt_data["entity_id"])
44
+ if stmt_data["prop"] != BASE_ID:
45
+ self.add(stmt_data["prop"], stmt_data["value"])
46
+
47
+ def merge(self: "ValueEntity", other: "ValueEntity") -> "ValueEntity":
48
+ merged = super().merge(other)
49
+ merged._caption = pick_name(_defined(self._caption, other._caption))
50
+ merged.referents.update(other.referents)
51
+ merged.datasets.update(other.datasets)
52
+ self.first_seen = min(_defined(self.first_seen, other.first_seen), default=None)
53
+ self.last_seen = max(_defined(self.last_seen, other.last_seen), default=None)
54
+ changed = _defined(self.last_change, other.last_change)
55
+ self.last_change = max(changed, default=None)
56
+ return merged
57
+
58
+ def to_dict(self) -> Dict[str, Any]:
59
+ data: Dict[str, Any] = {
60
+ "id": self.id,
61
+ "caption": self._caption or self.caption,
62
+ "schema": self.schema.name,
63
+ "properties": self.properties,
64
+ "referents": list(self.referents),
65
+ "datasets": list(self.datasets),
66
+ }
67
+ if self.first_seen is not None:
68
+ data["first_seen"] = self.first_seen
69
+ if self.last_seen is not None:
70
+ data["last_seen"] = self.last_seen
71
+ if self.last_change is not None:
72
+ data["last_change"] = self.last_change
73
+ return data
followthemoney/exc.py CHANGED
@@ -11,6 +11,12 @@ class FollowTheMoneyException(Exception):
11
11
  pass
12
12
 
13
13
 
14
+ class MetadataException(FollowTheMoneyException):
15
+ """An exception raised by dataset metadata validation."""
16
+
17
+ pass
18
+
19
+
14
20
  class InvalidData(FollowTheMoneyException):
15
21
  """Schema validation errors will be caught by the API."""
16
22
 
@@ -1,6 +1,6 @@
1
1
  from typing import Generator, List, Optional, Tuple
2
2
  from followthemoney.property import Property
3
- from followthemoney.proxy import E
3
+ from followthemoney.proxy import EntityProxy
4
4
  from followthemoney.schema import Schema
5
5
  from followthemoney.types import registry
6
6
 
@@ -17,12 +17,12 @@ class Exporter(object):
17
17
  yield prop
18
18
 
19
19
  def exportable_fields(
20
- self, proxy: E
20
+ self, proxy: EntityProxy
21
21
  ) -> Generator[Tuple[Property, List[str]], None, None]:
22
22
  for prop in self.exportable_properties(proxy.schema):
23
23
  yield prop, proxy.get(prop)
24
24
 
25
- def write(self, proxy: E, extra: Optional[List[str]] = None) -> None:
25
+ def write(self, proxy: EntityProxy, extra: Optional[List[str]] = None) -> None:
26
26
  raise NotImplementedError
27
27
 
28
28
  def finalize(self) -> None:
@@ -1,21 +1,19 @@
1
1
  import csv
2
-
3
- try:
4
- from _csv import _writer as csv_writer
5
- except ImportError:
6
- # Python 3.8/3.9 work-around:
7
- from _csv import writer as csv_writer # type: ignore
8
-
9
- from io import TextIOWrapper
10
2
  from pathlib import Path
11
- from typing import Dict, List, Optional, Tuple
3
+ from io import TextIOWrapper
4
+ from typing import Any, Dict, List, Optional, Protocol, Tuple
12
5
 
13
- from followthemoney.proxy import E
6
+ from followthemoney.proxy import EntityProxy
14
7
  from followthemoney.export.common import Exporter
15
8
  from followthemoney.schema import Schema
16
9
  from followthemoney.util import PathLike
17
10
 
18
- CSVWriter = csv_writer
11
+
12
+ class CSVWriter(Protocol):
13
+ @property
14
+ def dialect(self) -> Any: ...
15
+ def writerow(self, row: Any) -> Any: ...
16
+ def writerows(self, rows: Any) -> None: ...
19
17
 
20
18
 
21
19
  class CSVMixin(object):
@@ -69,7 +67,7 @@ class CSVExporter(Exporter, CSVMixin):
69
67
  headers.append(prop.name)
70
68
  writer.writerow(headers)
71
69
 
72
- def write(self, proxy: E, extra: Optional[List[str]] = None) -> None:
70
+ def write(self, proxy: EntityProxy, extra: Optional[List[str]] = None) -> None:
73
71
  writer = self._get_writer(proxy.schema)
74
72
  cells = [proxy.id]
75
73
  cells.extend(extra or [])
@@ -150,7 +150,7 @@ class CypherGraphExporter(GraphExporter):
150
150
  labels = list(node.schema.names)
151
151
  else:
152
152
  labels = [node.type.name]
153
- cypher = "MERGE (p { %(id)s }) " "SET p += { %(map)s } SET p :%(label)s;\n"
153
+ cypher = "MERGE (p { %(id)s }) SET p += { %(map)s } SET p :%(label)s;\n"
154
154
  self.fh.write(
155
155
  cypher
156
156
  % {
@@ -1,23 +1,75 @@
1
1
  import logging
2
- from rdflib import Graph
3
- from typing import List, Optional, TextIO
2
+ from prefixdate import Precision
3
+ from rdflib import Graph, Namespace
4
+ from rdflib.term import Identifier, URIRef, Literal
5
+ from rdflib import RDF, SKOS, XSD
6
+ from typing import Generator, List, Optional, TextIO, Tuple
4
7
 
5
8
  from followthemoney.export.common import Exporter
6
- from followthemoney.proxy import E
9
+ from followthemoney.types import registry
10
+ from followthemoney.proxy import EntityProxy
7
11
 
8
12
  log = logging.getLogger(__name__)
13
+ Triple = Tuple[Identifier, Identifier, Identifier]
14
+ NS = Namespace("https://schema.followthemoney.tech/#")
9
15
 
10
16
 
11
17
  class RDFExporter(Exporter):
18
+ """Export the entity as RDF N-Triples."""
19
+
20
+ TYPE_PREFIXES = {
21
+ registry.checksum: "hash:",
22
+ registry.country: "http://id.loc.gov/vocabulary/countries/",
23
+ registry.email: "mailto:",
24
+ registry.entity: "e:",
25
+ registry.gender: "gender:",
26
+ registry.ip: "ip:",
27
+ registry.identifier: "id:",
28
+ registry.language: "http://lexvo.org/id/iso639-3/",
29
+ registry.mimetype: "urn:mimetype:",
30
+ registry.phone: "tel:",
31
+ registry.topic: "ftm:topic:",
32
+ }
33
+
12
34
  def __init__(self, fh: TextIO, qualified: bool = True) -> None:
13
35
  super(RDFExporter, self).__init__()
14
36
  self.fh = fh
15
37
  self.qualified = qualified
16
38
 
17
- def write(self, proxy: E, extra: Optional[List[str]] = None) -> None:
39
+ def entity_triples(self, proxy: EntityProxy) -> Generator[Triple, None, None]:
40
+ if proxy.id is None or proxy.schema is None:
41
+ return
42
+ entity_prefix = self.TYPE_PREFIXES[registry.entity]
43
+ uri = URIRef(f"{entity_prefix}{proxy.id}")
44
+ yield (uri, RDF.type, NS[proxy.schema.name])
45
+ if self.qualified:
46
+ caption = proxy.caption
47
+ if caption != proxy.schema.label:
48
+ yield (uri, SKOS.prefLabel, Literal(caption))
49
+ for prop, value in proxy.itervalues():
50
+ if prop.type in self.TYPE_PREFIXES:
51
+ prefix = self.TYPE_PREFIXES[prop.type]
52
+ if prop.type == registry.identifier and prop.format is not None:
53
+ prefix = f"{prefix}{prop.format}:"
54
+ obj: Identifier = URIRef(f"{prefix}{value}")
55
+ elif prop.type == registry.date:
56
+ if len(value) < Precision.HOUR.value:
57
+ obj = Literal(value, datatype=XSD.date)
58
+ else:
59
+ obj = Literal(value, datatype=XSD.dateTime)
60
+ elif prop.type == registry.url:
61
+ obj = URIRef(value)
62
+ else:
63
+ obj = Literal(value)
64
+ if self.qualified:
65
+ yield (uri, NS[prop.qname], obj)
66
+ else:
67
+ yield (uri, URIRef(prop.name), obj)
68
+
69
+ def write(self, proxy: EntityProxy, extra: Optional[List[str]] = None) -> None:
18
70
  graph = Graph()
19
71
 
20
- for triple in proxy.triples(qualified=self.qualified):
72
+ for triple in self.entity_triples(proxy):
21
73
  graph.add(triple)
22
74
  try:
23
75
  nt = graph.serialize(format="nt11").strip()
followthemoney/graph.py CHANGED
@@ -5,6 +5,7 @@ This module provides an abstract data object that represents a property
5
5
  graph. This is used by the exporter modules to convert data
6
6
  to a specific output format, like Cypher or NetworkX.
7
7
  """
8
+
8
9
  import logging
9
10
  from typing import Any, Dict, Generator, Iterable, List, Optional
10
11
 
@@ -69,6 +70,8 @@ class Node(object):
69
70
  def from_proxy(cls, proxy: EntityProxy) -> "Node":
70
71
  """For a given :class:`~followthemoney.proxy.EntityProxy`, return a node
71
72
  based on the entity."""
73
+ if proxy.id is None:
74
+ raise InvalidModel("Invalid entity proxy: %r" % proxy)
72
75
  return cls(registry.entity, proxy.id, proxy=proxy)
73
76
 
74
77
  def __str__(self) -> str:
@@ -193,8 +196,7 @@ class Graph(object):
193
196
  """
194
197
 
195
198
  def __init__(self, edge_types: Iterable[PropertyType] = registry.pivots) -> None:
196
- types = registry.get_types(edge_types)
197
- self.edge_types = [t for t in types if t.matchable]
199
+ self.edge_types = [t for t in edge_types if t.matchable]
198
200
  self.flush()
199
201
 
200
202
  def flush(self) -> None:
@@ -256,11 +258,11 @@ class Graph(object):
256
258
  """Add an :class:`~followthemoney.proxy.EntityProxy` to the graph and make
257
259
  it either a :class:`~followthemoney.graph.Node` or an
258
260
  :class:`~followthemoney.graph.Edge`."""
259
- if proxy is None:
261
+ if proxy is None or proxy.id is None:
260
262
  return
261
263
  self.queue(proxy.id, proxy)
262
264
  if proxy.schema.edge:
263
- for (source, target) in proxy.edgepairs():
265
+ for source, target in proxy.edgepairs():
264
266
  self._add_edge(proxy, source, target)
265
267
  else:
266
268
  self._add_node(proxy)
@@ -1,24 +1,12 @@
1
1
  import io
2
2
  import os
3
3
  import logging
4
- from banal.lists import ensure_list
5
4
  import requests
6
5
  from csv import DictReader
7
6
  from urllib.parse import urlparse
8
- from banal import keys_values
9
- from typing import (
10
- TYPE_CHECKING,
11
- Any,
12
- Dict,
13
- Generator,
14
- ItemsView,
15
- Iterable,
16
- List,
17
- Optional,
18
- Set,
19
- Tuple,
20
- cast,
21
- )
7
+ from banal import keys_values, ensure_list
8
+ from typing import TYPE_CHECKING, cast
9
+ from typing import Any, Dict, Generator, ItemsView, Iterable, List, Optional, Set, Tuple
22
10
 
23
11
  from followthemoney.mapping.source import Record, Source
24
12
  from followthemoney.util import sanitize_text
@@ -48,16 +36,16 @@ class CSVSource(Source):
48
36
 
49
37
  def _parse_filters(self, filters: ItemsView[str, Any]) -> FilterList:
50
38
  filters_set: FilterList = []
51
- for (key, value) in filters:
39
+ for key, value in filters:
52
40
  values = set(cast(List[Optional[str]], ensure_list(value)))
53
41
  filters_set.append((key, values))
54
42
  return filters_set
55
43
 
56
44
  def check_filters(self, data: Record) -> bool:
57
- for (k, v) in self.filters_set:
45
+ for k, v in self.filters_set:
58
46
  if data.get(k) not in v:
59
47
  return False
60
- for (k, v) in self.filters_not_set:
48
+ for k, v in self.filters_not_set:
61
49
  if data.get(k) in v:
62
50
  return False
63
51
  return True
@@ -3,8 +3,7 @@ import logging
3
3
  from uuid import uuid4
4
4
  from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Union, cast
5
5
  from banal import ensure_list, is_listish, keys_values
6
- from sqlalchemy import MetaData, func
7
- from sqlalchemy.future import select
6
+ from sqlalchemy import MetaData, func, select
8
7
  from sqlalchemy.engine import Engine, create_engine
9
8
  from sqlalchemy.sql.elements import Label
10
9
  from sqlalchemy.pool import NullPool
@@ -68,7 +67,7 @@ class SQLSource(Source):
68
67
  return table.refs[ref]
69
68
  raise InvalidMapping("Missing reference: %s" % ref)
70
69
 
71
- def apply_filters(self, q: Select) -> Select:
70
+ def apply_filters(self, q: Select[Any]) -> Select[Any]:
72
71
  for col, val in self.filters:
73
72
  if is_listish(val):
74
73
  q = q.where(self.get_column(col).in_(val))
@@ -88,7 +87,7 @@ class SQLSource(Source):
88
87
  q = q.where(left == right)
89
88
  return q
90
89
 
91
- def compose_query(self) -> Select:
90
+ def compose_query(self) -> Select[Any]:
92
91
  columns = [self.get_column(r) for r in self.query.refs]
93
92
  q = select(*columns)
94
93
  q = q.select_from(*[t.alias for t in self.tables])
followthemoney/model.py CHANGED
@@ -1,16 +1,19 @@
1
1
  import os
2
2
  import yaml
3
3
  from functools import lru_cache
4
- from typing import Any, Dict, Generator, Iterator, Optional, Set, TypedDict, Union
4
+ from typing import TYPE_CHECKING, Any
5
+ from typing import Dict, Generator, Iterator, Optional, Set, TypedDict, Union
5
6
 
6
7
  from followthemoney.types import registry
7
8
  from followthemoney.types.common import PropertyType, PropertyTypeToDict
8
9
  from followthemoney.schema import Schema, SchemaToDict
9
10
  from followthemoney.property import Property
10
- from followthemoney.mapping import QueryMapping
11
- from followthemoney.proxy import EntityProxy
12
11
  from followthemoney.exc import InvalidModel, InvalidData
13
12
 
13
+ if TYPE_CHECKING:
14
+ from followthemoney.proxy import EntityProxy
15
+ from followthemoney.mapping import QueryMapping
16
+
14
17
 
15
18
  class ModelToDict(TypedDict):
16
19
  schemata: Dict[str, SchemaToDict]
@@ -22,6 +25,8 @@ class Model(object):
22
25
  provides some helper functions to find schemata, properties or to instantiate
23
26
  entity proxies based on the schema metadata."""
24
27
 
28
+ _instance: Optional["Model"] = None
29
+
25
30
  __slots__ = ("path", "schemata", "properties", "qnames")
26
31
 
27
32
  def __init__(self, path: str) -> None:
@@ -38,6 +43,15 @@ class Model(object):
38
43
  self._load(os.path.join(path, filename))
39
44
  self.generate()
40
45
 
46
+ @classmethod
47
+ def instance(cls) -> "Model":
48
+ if cls._instance is None:
49
+ model_path = os.path.dirname(__file__)
50
+ model_path = os.path.join(model_path, "schema")
51
+ model_path = os.environ.get("FTM_MODEL_PATH", model_path)
52
+ cls._instance = cls(model_path)
53
+ return cls._instance
54
+
41
55
  def generate(self) -> None:
42
56
  """Loading the model is a weird process because the schemata reference
43
57
  each other in complex ways, so the generation process cannot be fully
@@ -89,13 +103,15 @@ class Model(object):
89
103
 
90
104
  def make_mapping(
91
105
  self, mapping: Dict[str, Any], key_prefix: Optional[str] = None
92
- ) -> QueryMapping:
106
+ ) -> "QueryMapping":
93
107
  """Parse a mapping that applies (tabular) source data to the model."""
108
+ from followthemoney.mapping import QueryMapping
109
+
94
110
  return QueryMapping(self, mapping, key_prefix=key_prefix)
95
111
 
96
112
  def map_entities(
97
113
  self, mapping: Dict[str, Any], key_prefix: Optional[str] = None
98
- ) -> Generator[EntityProxy, None, None]:
114
+ ) -> Generator["EntityProxy", None, None]:
99
115
  """Given a mapping, yield a series of entities from the data source."""
100
116
  gen = self.make_mapping(mapping, key_prefix=key_prefix)
101
117
  for record in gen.source.records:
@@ -127,20 +143,31 @@ class Model(object):
127
143
  msg = "No common schema: %s and %s"
128
144
  raise InvalidData(msg % (left, right))
129
145
 
146
+ def matchable_schemata(self) -> Set[Schema]:
147
+ """Return a list of all schemata that are matchable."""
148
+ return set([s for s in self.schemata.values() if s.matchable])
149
+
130
150
  def make_entity(
131
151
  self, schema: Union[str, Schema], key_prefix: Optional[str] = None
132
- ) -> EntityProxy:
152
+ ) -> "EntityProxy":
133
153
  """Instantiate an empty entity proxy of the given schema type."""
134
- return EntityProxy(self, {"schema": schema}, key_prefix=key_prefix)
154
+ from followthemoney.proxy import EntityProxy
155
+
156
+ schema_ = self.get(schema)
157
+ if schema_ is None:
158
+ raise InvalidData("Schema does not exist: %s" % schema)
159
+ return EntityProxy(schema_, {}, key_prefix=key_prefix)
135
160
 
136
- def get_proxy(self, data: Dict[str, Any], cleaned: bool = True) -> EntityProxy:
161
+ def get_proxy(self, data: Dict[str, Any], cleaned: bool = True) -> "EntityProxy":
137
162
  """Create an entity proxy to reflect the entity data in the given
138
163
  dictionary. If ``cleaned`` is disabled, all property values are
139
164
  fully re-validated and normalised. Use this if handling input data
140
165
  from an untrusted source."""
166
+ from followthemoney.proxy import EntityProxy
167
+
141
168
  if isinstance(data, EntityProxy):
142
169
  return data
143
- return EntityProxy.from_dict(self, data, cleaned=cleaned)
170
+ return EntityProxy.from_dict(data, cleaned=cleaned)
144
171
 
145
172
  def to_dict(self) -> ModelToDict:
146
173
  """Return metadata for all schemata and properties, in a serializable form."""
@@ -22,6 +22,7 @@ that the combined ID is specific to a dataset, without needing an (expensive)
22
22
  index look up of each ID first. It can also be generated on the client or
23
23
  the server without compromising isolation.
24
24
  """
25
+
25
26
  import hmac
26
27
  from typing import Any, Optional, Tuple, Union
27
28
 
@@ -95,7 +96,8 @@ class Namespace(object):
95
96
  """Rewrite an entity proxy so all IDs mentioned are limited to
96
97
  the namespace."""
97
98
  signed = proxy.clone()
98
- signed.id = self.sign(proxy.id)
99
+ if proxy.id is not None:
100
+ signed.id = self.sign(proxy.id)
99
101
  if not shallow:
100
102
  for prop in proxy.iterprops():
101
103
  if prop.type != registry.entity:
@@ -1,15 +1,16 @@
1
1
  import sys
2
2
  from datetime import datetime
3
- from rdflib import Graph, URIRef, Literal
3
+ from rdflib import Graph, URIRef, Literal, Namespace
4
4
  from rdflib.namespace import OWL, DCTERMS, RDF, RDFS, XSD
5
5
 
6
6
  from followthemoney import model
7
7
  from followthemoney.property import Property
8
8
  from followthemoney.schema import Schema
9
9
  from followthemoney.types import registry
10
- from followthemoney.rdf import NS
11
10
  from followthemoney.util import PathLike
12
11
 
12
+ NS = Namespace("https://schema.followthemoney.tech/#")
13
+
13
14
 
14
15
  class Ontology(object):
15
16
  def __init__(self) -> None:
@@ -32,37 +33,38 @@ class Ontology(object):
32
33
  self.add_class(schema)
33
34
 
34
35
  def add_class(self, schema: Schema) -> None:
35
- self.graph.add((schema.uri, RDF.type, RDFS.Class))
36
- self.graph.add((schema.uri, RDFS.isDefinedBy, self.uri))
36
+ suri = NS[schema.name]
37
+ self.graph.add((suri, RDF.type, RDFS.Class))
38
+ self.graph.add((suri, RDFS.isDefinedBy, self.uri))
37
39
  for parent in schema.extends:
38
- self.graph.add((schema.uri, RDFS.subClassOf, parent.uri))
40
+ self.graph.add((suri, RDFS.subClassOf, NS[parent.name]))
39
41
 
40
- self.graph.add((schema.uri, RDFS.label, Literal(schema.label)))
42
+ self.graph.add((suri, RDFS.label, Literal(schema.label)))
41
43
  if schema.description is not None:
42
44
  description = Literal(schema.description)
43
- self.graph.add((schema.uri, RDFS.comment, description))
45
+ self.graph.add((suri, RDFS.comment, description))
44
46
 
45
47
  for _, prop in sorted(schema.properties.items()):
46
48
  self.add_property(prop)
47
49
 
48
50
  def add_property(self, prop: Property) -> None:
49
- self.graph.add((prop.uri, RDF.type, RDF.Property))
50
- self.graph.add((prop.uri, RDFS.isDefinedBy, self.uri))
51
+ puri = NS[prop.qname]
52
+ self.graph.add((puri, RDF.type, RDF.Property))
53
+ self.graph.add((puri, RDFS.isDefinedBy, self.uri))
51
54
 
52
- self.graph.add((prop.uri, RDFS.label, Literal(prop.label)))
55
+ self.graph.add((puri, RDFS.label, Literal(prop.label)))
53
56
  if prop.description is not None:
54
- self.graph.add((prop.uri, RDFS.comment, Literal(prop.description)))
57
+ self.graph.add((puri, RDFS.comment, Literal(prop.description)))
55
58
 
56
- self.graph.add((prop.uri, RDFS.domain, prop.schema.uri))
59
+ self.graph.add((puri, RDFS.domain, NS[prop.schema.name]))
57
60
  if prop.range is not None:
58
61
  range = model.get(prop.range)
59
62
  if range is not None:
60
- range_uri = range.uri
61
- self.graph.add((prop.uri, RDFS.range, range_uri))
63
+ self.graph.add((puri, RDFS.range, NS[range.name]))
62
64
  if prop.reverse is not None:
63
- self.graph.add((prop.uri, OWL.inverseOf, prop.reverse.uri))
65
+ self.graph.add((puri, OWL.inverseOf, NS[prop.reverse.qname]))
64
66
  if prop.type == registry.date:
65
- self.graph.add((prop.uri, RDFS.range, XSD.dateTime))
67
+ self.graph.add((puri, RDFS.range, XSD.dateTime))
66
68
 
67
69
  def write_namespace_docs(self, path: PathLike) -> None:
68
70
  xml_fn = "%s/ftm.xml" % path