followthemoney 1.3.6__py3-none-any.whl → 3.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- followthemoney/__init__.py +5 -3
- followthemoney/cli/__init__.py +17 -0
- followthemoney/cli/aggregate.py +56 -0
- followthemoney/cli/cli.py +88 -0
- followthemoney/cli/exports.py +121 -0
- followthemoney/cli/mapping.py +85 -0
- followthemoney/cli/sieve.py +67 -0
- followthemoney/cli/util.py +142 -0
- followthemoney/compare.py +132 -55
- followthemoney/exc.py +19 -6
- followthemoney/export/common.py +29 -0
- followthemoney/export/csv.py +82 -0
- followthemoney/export/excel.py +75 -0
- followthemoney/export/graph.py +79 -0
- followthemoney/export/neo4j.py +182 -0
- followthemoney/export/rdf.py +26 -0
- followthemoney/graph.py +308 -0
- followthemoney/helpers.py +212 -0
- followthemoney/mapping/__init__.py +1 -1
- followthemoney/mapping/csv.py +67 -35
- followthemoney/mapping/entity.py +116 -44
- followthemoney/mapping/property.py +90 -44
- followthemoney/mapping/query.py +27 -19
- followthemoney/mapping/source.py +15 -5
- followthemoney/mapping/sql.py +75 -61
- followthemoney/messages.py +13 -7
- followthemoney/model.py +108 -56
- followthemoney/namespace.py +119 -0
- followthemoney/offshore.py +48 -0
- followthemoney/ontology.py +77 -0
- followthemoney/property.py +204 -71
- followthemoney/proxy.py +455 -118
- followthemoney/rdf.py +9 -0
- followthemoney/schema/Address.yaml +78 -0
- followthemoney/schema/Airplane.yaml +17 -10
- followthemoney/schema/Analyzable.yaml +54 -0
- followthemoney/schema/Article.yaml +16 -0
- followthemoney/schema/Assessment.yaml +32 -0
- followthemoney/schema/Asset.yaml +10 -4
- followthemoney/schema/Associate.yaml +41 -0
- followthemoney/schema/Audio.yaml +24 -0
- followthemoney/schema/BankAccount.yaml +53 -9
- followthemoney/schema/Call.yaml +48 -0
- followthemoney/schema/CallForTenders.yaml +117 -0
- followthemoney/schema/Company.yaml +37 -12
- followthemoney/schema/Contract.yaml +41 -7
- followthemoney/schema/ContractAward.yaml +30 -11
- followthemoney/schema/CourtCase.yaml +16 -10
- followthemoney/schema/CourtCaseParty.yaml +17 -6
- followthemoney/schema/CryptoWallet.yaml +48 -0
- followthemoney/schema/Debt.yaml +37 -0
- followthemoney/schema/Directorship.yaml +17 -4
- followthemoney/schema/Document.yaml +72 -139
- followthemoney/schema/Documentation.yml +38 -0
- followthemoney/schema/EconomicActivity.yaml +32 -17
- followthemoney/schema/Email.yaml +76 -0
- followthemoney/schema/Employment.yaml +39 -0
- followthemoney/schema/Event.yaml +35 -3
- followthemoney/schema/Family.yaml +41 -0
- followthemoney/schema/Folder.yaml +13 -0
- followthemoney/schema/HyperText.yaml +21 -0
- followthemoney/schema/Identification.yaml +40 -0
- followthemoney/schema/Image.yaml +25 -0
- followthemoney/schema/Interest.yaml +3 -6
- followthemoney/schema/Interval.yaml +56 -5
- followthemoney/schema/LegalEntity.yaml +81 -20
- followthemoney/schema/License.yaml +7 -3
- followthemoney/schema/Membership.yaml +19 -4
- followthemoney/schema/Mention.yaml +54 -0
- followthemoney/schema/Message.yaml +73 -0
- followthemoney/schema/Note.yaml +23 -0
- followthemoney/schema/Occupancy.yaml +40 -0
- followthemoney/schema/Organization.yaml +38 -3
- followthemoney/schema/Ownership.yaml +16 -4
- followthemoney/schema/Package.yaml +17 -0
- followthemoney/schema/Page.yaml +43 -0
- followthemoney/schema/Pages.yaml +23 -0
- followthemoney/schema/Passport.yaml +15 -17
- followthemoney/schema/Payment.yaml +38 -7
- followthemoney/schema/Person.yaml +61 -5
- followthemoney/schema/PlainText.yaml +17 -0
- followthemoney/schema/Position.yaml +50 -0
- followthemoney/schema/Post.yaml +42 -0
- followthemoney/schema/Project.yaml +27 -0
- followthemoney/schema/ProjectParticipant.yaml +36 -0
- followthemoney/schema/PublicBody.yaml +14 -3
- followthemoney/schema/RealEstate.yaml +19 -3
- followthemoney/schema/Representation.yaml +17 -6
- followthemoney/schema/Sanction.yaml +44 -20
- followthemoney/schema/Security.yaml +59 -0
- followthemoney/schema/Similar.yaml +37 -0
- followthemoney/schema/Succession.yaml +36 -0
- followthemoney/schema/Table.yaml +32 -0
- followthemoney/schema/TaxRoll.yaml +27 -9
- followthemoney/schema/Thing.yaml +69 -13
- followthemoney/schema/Trip.yaml +42 -0
- followthemoney/schema/UnknownLink.yaml +17 -6
- followthemoney/schema/UserAccount.yaml +44 -0
- followthemoney/schema/Value.yaml +5 -1
- followthemoney/schema/Vehicle.yaml +25 -8
- followthemoney/schema/Vessel.yaml +18 -10
- followthemoney/schema/Video.yaml +20 -0
- followthemoney/schema/Workbook.yaml +18 -0
- followthemoney/schema.py +406 -135
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
- followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
- followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
- followthemoney/translations/fr/followthemoney.po +3861 -0
- followthemoney/translations/messages.pot +3021 -725
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
- followthemoney/translations/ru/followthemoney.po +4221 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
- followthemoney/types/__init__.py +35 -17
- followthemoney/types/address.py +41 -21
- followthemoney/types/checksum.py +25 -0
- followthemoney/types/common.py +233 -88
- followthemoney/types/country.py +89 -56
- followthemoney/types/date.py +59 -76
- followthemoney/types/email.py +66 -35
- followthemoney/types/entity.py +66 -13
- followthemoney/types/gender.py +66 -0
- followthemoney/types/iban.py +47 -28
- followthemoney/types/identifier.py +49 -22
- followthemoney/types/ip.py +35 -21
- followthemoney/types/json.py +58 -0
- followthemoney/types/language.py +124 -37
- followthemoney/types/mimetype.py +44 -0
- followthemoney/types/name.py +56 -12
- followthemoney/types/number.py +30 -0
- followthemoney/types/phone.py +92 -34
- followthemoney/types/registry.py +52 -0
- followthemoney/types/string.py +43 -0
- followthemoney/types/topic.py +94 -0
- followthemoney/types/url.py +39 -17
- followthemoney/util.py +139 -45
- followthemoney-3.8.0.dist-info/METADATA +153 -0
- followthemoney-3.8.0.dist-info/RECORD +157 -0
- {followthemoney-1.3.6.dist-info → followthemoney-3.8.0.dist-info}/WHEEL +1 -2
- followthemoney-3.8.0.dist-info/entry_points.txt +17 -0
- followthemoney-1.3.6.dist-info/LICENSE.txt → followthemoney-3.8.0.dist-info/licenses/LICENSE +1 -1
- followthemoney/link.py +0 -75
- followthemoney/schema/Associate.yml +0 -19
- followthemoney/schema/Family.yml +0 -19
- followthemoney/schema/Land.yml +0 -9
- followthemoney/schema/Relationship.yaml +0 -26
- followthemoney/types/domain.py +0 -50
- followthemoney-1.3.6.dist-info/DESCRIPTION.rst +0 -3
- followthemoney-1.3.6.dist-info/METADATA +0 -39
- followthemoney-1.3.6.dist-info/RECORD +0 -108
- followthemoney-1.3.6.dist-info/entry_points.txt +0 -3
- followthemoney-1.3.6.dist-info/metadata.json +0 -1
- followthemoney-1.3.6.dist-info/namespace_packages.txt +0 -1
- followthemoney-1.3.6.dist-info/top_level.txt +0 -3
- ns/ontology.py +0 -128
- tests/types/test_addresses.py +0 -24
- tests/types/test_common.py +0 -27
- tests/types/test_countries.py +0 -21
- tests/types/test_dates.py +0 -72
- tests/types/test_domains.py +0 -23
- tests/types/test_emails.py +0 -30
- tests/types/test_entity.py +0 -16
- tests/types/test_iban.py +0 -109
- tests/types/test_identifiers.py +0 -25
- tests/types/test_ip.py +0 -26
- tests/types/test_languages.py +0 -20
- tests/types/test_names.py +0 -33
- tests/types/test_phones.py +0 -24
- tests/types/test_registry.py +0 -14
- tests/types/test_urls.py +0 -23
- {ns → followthemoney/export}/__init__.py +0 -0
- /tests/types/__init__.py → /followthemoney/py.typed +0 -0
followthemoney/__init__.py
CHANGED
|
@@ -3,12 +3,14 @@ import os
|
|
|
3
3
|
from followthemoney.model import Model
|
|
4
4
|
from followthemoney.util import set_model_locale
|
|
5
5
|
|
|
6
|
-
__version__ =
|
|
6
|
+
__version__ = "3.8.0"
|
|
7
|
+
|
|
7
8
|
|
|
8
9
|
model_path = os.path.dirname(__file__)
|
|
9
|
-
model_path = os.path.join(model_path,
|
|
10
|
+
model_path = os.path.join(model_path, "schema")
|
|
11
|
+
model_path = os.environ.get("FTM_MODEL_PATH", model_path)
|
|
10
12
|
|
|
11
13
|
# Data model singleton
|
|
12
14
|
model = Model(model_path)
|
|
13
15
|
|
|
14
|
-
__all__ = [model, set_model_locale]
|
|
16
|
+
__all__ = ["model", "set_model_locale"]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def load_entry_points() -> None:
|
|
5
|
+
if sys.version_info[0] >= 3 and sys.version_info[1] >= 10:
|
|
6
|
+
from importlib.metadata import entry_points
|
|
7
|
+
|
|
8
|
+
for ep in entry_points().select(group="followthemoney.cli"):
|
|
9
|
+
ep.load()
|
|
10
|
+
else:
|
|
11
|
+
from pkg_resources import iter_entry_points
|
|
12
|
+
|
|
13
|
+
for ep_ in iter_entry_points("followthemoney.cli"):
|
|
14
|
+
ep_.load()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
load_entry_points()
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Dict, Optional, Type
|
|
4
|
+
|
|
5
|
+
from followthemoney.proxy import EntityProxy, E
|
|
6
|
+
from followthemoney.namespace import Namespace
|
|
7
|
+
from followthemoney.cli.cli import cli
|
|
8
|
+
from followthemoney.cli.util import InPath, OutPath, path_entities
|
|
9
|
+
from followthemoney.cli.util import path_writer, write_entity
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def sorted_aggregate(path: Path, outpath: Path, entity_type: Type[E]) -> None:
|
|
13
|
+
"""Aggregate entities based on the premise that the fragments in the source
|
|
14
|
+
stream are sorted by their ID."""
|
|
15
|
+
entity: Optional[E] = None
|
|
16
|
+
with path_writer(outpath) as outfh:
|
|
17
|
+
for next_entity in path_entities(path, entity_type=entity_type):
|
|
18
|
+
if entity is None:
|
|
19
|
+
entity = next_entity
|
|
20
|
+
continue
|
|
21
|
+
if next_entity.id == entity.id:
|
|
22
|
+
entity = entity.merge(next_entity)
|
|
23
|
+
continue
|
|
24
|
+
write_entity(outfh, entity)
|
|
25
|
+
entity = next_entity
|
|
26
|
+
|
|
27
|
+
if entity is not None:
|
|
28
|
+
write_entity(outfh, entity)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@cli.command("aggregate", help="Aggregate multiple fragments of entities")
|
|
32
|
+
@click.option("-i", "--infile", type=InPath, default="-")
|
|
33
|
+
@click.option("-o", "--outfile", type=OutPath, default="-")
|
|
34
|
+
def aggregate(infile: Path, outfile: Path) -> None:
|
|
35
|
+
buffer: Dict[str, EntityProxy] = {}
|
|
36
|
+
namespace = Namespace(None)
|
|
37
|
+
try:
|
|
38
|
+
with path_writer(outfile) as outfh:
|
|
39
|
+
for entity in path_entities(infile, EntityProxy):
|
|
40
|
+
entity = namespace.apply(entity)
|
|
41
|
+
if entity.id in buffer:
|
|
42
|
+
buffer[entity.id].merge(entity)
|
|
43
|
+
else:
|
|
44
|
+
buffer[entity.id] = entity
|
|
45
|
+
|
|
46
|
+
for entity in buffer.values():
|
|
47
|
+
write_entity(outfh, entity)
|
|
48
|
+
except BrokenPipeError:
|
|
49
|
+
raise click.Abort()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@cli.command("sorted-aggregate", help="Aggregate sorted fragments of entities")
|
|
53
|
+
@click.option("-i", "--infile", type=InPath, default="-")
|
|
54
|
+
@click.option("-o", "--outfile", type=OutPath, default="-")
|
|
55
|
+
def sorted_aggregate_(infile: Path, outfile: Path) -> None:
|
|
56
|
+
sorted_aggregate(infile, outfile, EntityProxy)
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import click
|
|
3
|
+
import orjson
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Optional, BinaryIO, List, Any, Dict
|
|
7
|
+
from banal import ensure_list
|
|
8
|
+
|
|
9
|
+
from followthemoney import model
|
|
10
|
+
from followthemoney.namespace import Namespace
|
|
11
|
+
from followthemoney.cli.util import InPath, OutPath, path_entities
|
|
12
|
+
from followthemoney.cli.util import path_writer, write_entity
|
|
13
|
+
from followthemoney.proxy import EntityProxy
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@click.group(help="Utility for FollowTheMoney graph data")
|
|
17
|
+
def cli() -> None:
|
|
18
|
+
fmt = "%(name)s [%(levelname)s] %(message)s"
|
|
19
|
+
logging.basicConfig(stream=sys.stderr, level=logging.INFO, format=fmt)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@cli.command("dump-model", help="Export the current schema model")
|
|
23
|
+
@click.option("-o", "--outfile", type=click.File("wb"), default="-")
|
|
24
|
+
def dump_model(outfile: BinaryIO) -> None:
|
|
25
|
+
f = orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS
|
|
26
|
+
outfile.write(orjson.dumps(model.to_dict(), option=f))
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@cli.command("validate", help="Re-parse and validate the given data")
|
|
30
|
+
@click.option("-i", "--infile", type=InPath, default="-")
|
|
31
|
+
@click.option("-o", "--outfile", type=OutPath, default="-")
|
|
32
|
+
def validate(infile: Path, outfile: Path) -> None:
|
|
33
|
+
try:
|
|
34
|
+
with path_writer(outfile) as outfh:
|
|
35
|
+
for entity in path_entities(infile, EntityProxy, cleaned=False):
|
|
36
|
+
clean = model.make_entity(entity.schema)
|
|
37
|
+
clean.id = entity.id
|
|
38
|
+
for prop, value in entity.itervalues():
|
|
39
|
+
clean.add(prop, value)
|
|
40
|
+
write_entity(outfh, clean)
|
|
41
|
+
except BrokenPipeError:
|
|
42
|
+
raise click.Abort()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@cli.command("import-vis", help="Load a .VIS file and get entities")
|
|
46
|
+
@click.option("-i", "--infile", type=InPath, default="-") # noqa
|
|
47
|
+
@click.option("-o", "--outfile", type=OutPath, default="-") # noqa
|
|
48
|
+
def import_vis(infile: Path, outfile: Path) -> None:
|
|
49
|
+
with path_writer(outfile) as outfh:
|
|
50
|
+
with open(infile, "rb") as infh:
|
|
51
|
+
data: Dict[str, Any] = orjson.loads(infh.read())
|
|
52
|
+
if "entities" in data:
|
|
53
|
+
entities: List[Dict[str, Any]] = data.get("entities", data)
|
|
54
|
+
elif "layout" in data:
|
|
55
|
+
entities = data.get("layout", {}).get("entities", data)
|
|
56
|
+
else:
|
|
57
|
+
raise click.ClickException("No entities found in VIS file")
|
|
58
|
+
for entity_data in ensure_list(entities):
|
|
59
|
+
entity = EntityProxy.from_dict(model, entity_data)
|
|
60
|
+
write_entity(outfh, entity)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@cli.command("sign", help="Apply a HMAC signature to entity IDs")
|
|
64
|
+
@click.option("-i", "--infile", type=InPath, default="-") # noqa
|
|
65
|
+
@click.option("-o", "--outfile", type=OutPath, default="-") # noqa
|
|
66
|
+
@click.option("-s", "--signature", default=None, help="HMAC signature key") # noqa
|
|
67
|
+
def sign(infile: Path, outfile: Path, signature: Optional[str]) -> None:
|
|
68
|
+
ns = Namespace(signature)
|
|
69
|
+
try:
|
|
70
|
+
with path_writer(outfile) as outfh:
|
|
71
|
+
for entity in path_entities(infile, EntityProxy):
|
|
72
|
+
signed = ns.apply(entity)
|
|
73
|
+
write_entity(outfh, signed)
|
|
74
|
+
except BrokenPipeError:
|
|
75
|
+
raise click.Abort()
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@cli.command(help="Format a stream of entities to make it readable")
|
|
79
|
+
@click.option("-i", "--infile", type=InPath, default="-") # noqa
|
|
80
|
+
def pretty(infile: Path) -> None:
|
|
81
|
+
stdout = click.get_binary_stream("stdout")
|
|
82
|
+
try:
|
|
83
|
+
f = orjson.OPT_INDENT_2 | orjson.OPT_APPEND_NEWLINE
|
|
84
|
+
for entity in path_entities(infile, EntityProxy):
|
|
85
|
+
data = orjson.dumps(entity.to_dict(), option=f)
|
|
86
|
+
stdout.write(data)
|
|
87
|
+
except BrokenPipeError:
|
|
88
|
+
raise click.Abort()
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from typing import List, TextIO, Generator
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
|
|
6
|
+
from followthemoney.cli.cli import cli
|
|
7
|
+
from followthemoney.cli.util import InPath, OutPath, export_stream
|
|
8
|
+
from followthemoney.export.csv import CSVExporter
|
|
9
|
+
from followthemoney.export.rdf import RDFExporter
|
|
10
|
+
from followthemoney.export.excel import ExcelExporter
|
|
11
|
+
from followthemoney.export.graph import edge_types, DEFAULT_EDGE_TYPES
|
|
12
|
+
from followthemoney.export.graph import NXGraphExporter
|
|
13
|
+
from followthemoney.export.neo4j import Neo4JCSVExporter
|
|
14
|
+
from followthemoney.export.neo4j import CypherGraphExporter
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@contextmanager
|
|
18
|
+
def text_out(path: Path) -> Generator[TextIO, None, None]:
|
|
19
|
+
if str(path) == "-":
|
|
20
|
+
yield click.get_text_stream("stdout")
|
|
21
|
+
return
|
|
22
|
+
with open(path, "w") as fh:
|
|
23
|
+
yield fh
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@cli.command("export-csv", help="Export to CSV")
|
|
27
|
+
@click.option("-i", "--infile", type=InPath, default="-") # noqa
|
|
28
|
+
@click.option(
|
|
29
|
+
"-o",
|
|
30
|
+
"--outdir",
|
|
31
|
+
type=click.Path(file_okay=False, writable=True, path_type=Path),
|
|
32
|
+
default=".",
|
|
33
|
+
help="output directory",
|
|
34
|
+
)
|
|
35
|
+
def export_csv(infile: Path, outdir: Path) -> None:
|
|
36
|
+
exporter = CSVExporter(outdir)
|
|
37
|
+
export_stream(exporter, infile)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@cli.command("export-excel", help="Export to Excel")
|
|
41
|
+
@click.option("-i", "--infile", type=InPath, default="-")
|
|
42
|
+
@click.option(
|
|
43
|
+
"-o",
|
|
44
|
+
"--outfile",
|
|
45
|
+
type=click.Path(dir_okay=False, writable=True, path_type=Path),
|
|
46
|
+
required=True,
|
|
47
|
+
)
|
|
48
|
+
def export_excel(infile: Path, outfile: Path) -> None:
|
|
49
|
+
exporter = ExcelExporter(outfile)
|
|
50
|
+
export_stream(exporter, infile)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@cli.command("export-rdf", help="Export to RDF NTriples")
|
|
54
|
+
@click.option("-i", "--infile", type=InPath, default="-")
|
|
55
|
+
@click.option("-o", "--outfile", type=OutPath, default="-")
|
|
56
|
+
@click.option(
|
|
57
|
+
"--qualified/--unqualified",
|
|
58
|
+
is_flag=True,
|
|
59
|
+
default=True,
|
|
60
|
+
help="Generate full predicates",
|
|
61
|
+
)
|
|
62
|
+
def export_rdf(infile: Path, outfile: Path, qualified: bool = True) -> None:
|
|
63
|
+
with text_out(outfile) as fh:
|
|
64
|
+
exporter = RDFExporter(fh, qualified=qualified)
|
|
65
|
+
export_stream(exporter, infile)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@cli.command("export-gexf", help="Export to GEXF (Gephi) format")
|
|
69
|
+
@click.option("-i", "--infile", type=InPath, default="-")
|
|
70
|
+
@click.option("-o", "--outfile", type=OutPath, default="-")
|
|
71
|
+
@click.option(
|
|
72
|
+
"-e",
|
|
73
|
+
"--edge-types",
|
|
74
|
+
type=click.Choice(edge_types()),
|
|
75
|
+
multiple=True,
|
|
76
|
+
default=DEFAULT_EDGE_TYPES,
|
|
77
|
+
help="Property types to be reified into graph edges.",
|
|
78
|
+
)
|
|
79
|
+
def export_gexf(infile: Path, outfile: Path, edge_types: List[str]) -> None:
|
|
80
|
+
with text_out(outfile) as fh:
|
|
81
|
+
exporter = NXGraphExporter(fh, edge_types=edge_types)
|
|
82
|
+
export_stream(exporter, infile)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@cli.command("export-cypher", help="Export to Cypher script")
|
|
86
|
+
@click.option("-i", "--infile", type=InPath, default="-") # noqa
|
|
87
|
+
@click.option("-o", "--outfile", type=OutPath, default="-") # noqa
|
|
88
|
+
@click.option(
|
|
89
|
+
"-e",
|
|
90
|
+
"--edge-types",
|
|
91
|
+
type=click.Choice(edge_types()),
|
|
92
|
+
multiple=True,
|
|
93
|
+
default=DEFAULT_EDGE_TYPES,
|
|
94
|
+
help="Property types to be reified into graph edges.",
|
|
95
|
+
)
|
|
96
|
+
def export_cypher(infile: Path, outfile: Path, edge_types: List[str]) -> None:
|
|
97
|
+
with text_out(outfile) as fh:
|
|
98
|
+
exporter = CypherGraphExporter(fh, edge_types=edge_types)
|
|
99
|
+
export_stream(exporter, infile)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@cli.command("export-neo4j-bulk", help="Export to Neo4J bulk import")
|
|
103
|
+
@click.option("-i", "--infile", type=InPath, default="-") # noqa
|
|
104
|
+
@click.option(
|
|
105
|
+
"-o",
|
|
106
|
+
"--outdir",
|
|
107
|
+
type=click.Path(file_okay=False, writable=True, path_type=Path),
|
|
108
|
+
required=True,
|
|
109
|
+
help="Output directory for Neo4J import script",
|
|
110
|
+
)
|
|
111
|
+
@click.option(
|
|
112
|
+
"-e",
|
|
113
|
+
"--edge-types",
|
|
114
|
+
type=click.Choice(edge_types()),
|
|
115
|
+
multiple=True,
|
|
116
|
+
default=DEFAULT_EDGE_TYPES,
|
|
117
|
+
help="Property types to be reified into graph edges.",
|
|
118
|
+
)
|
|
119
|
+
def export_neo4j_bulk(infile: Path, outdir: Path, edge_types: List[str]) -> None:
|
|
120
|
+
exporter = Neo4JCSVExporter(outdir, edge_types=edge_types)
|
|
121
|
+
export_stream(exporter, infile)
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import click
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from banal import keys_values
|
|
5
|
+
from typing import Generator, List, TextIO, Tuple
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
|
|
8
|
+
from followthemoney import model
|
|
9
|
+
from followthemoney.namespace import Namespace
|
|
10
|
+
from followthemoney.mapping.query import QueryMapping
|
|
11
|
+
from followthemoney.mapping.csv import CSVSource
|
|
12
|
+
from followthemoney.cli.cli import cli
|
|
13
|
+
from followthemoney.cli.util import InPath, OutPath, load_mapping_file
|
|
14
|
+
from followthemoney.cli.util import path_writer, write_entity
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@contextmanager
|
|
18
|
+
def input_file(path: Path) -> Generator[TextIO, None, None]:
|
|
19
|
+
if str(path) == "-":
|
|
20
|
+
yield sys.stdin
|
|
21
|
+
return
|
|
22
|
+
with open(path, "r") as fh:
|
|
23
|
+
yield fh
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@cli.command("map", help="Execute a mapping file and emit objects")
|
|
27
|
+
@click.option("-o", "--outfile", type=OutPath, default="-")
|
|
28
|
+
@click.option(
|
|
29
|
+
"--sign/--no-sign",
|
|
30
|
+
is_flag=True,
|
|
31
|
+
default=True,
|
|
32
|
+
help="Apply HMAC signature",
|
|
33
|
+
)
|
|
34
|
+
@click.argument("mapping_yaml", type=click.Path(exists=True, path_type=Path))
|
|
35
|
+
def run_mapping(outfile: Path, mapping_yaml: Path, sign: bool = True) -> None:
|
|
36
|
+
config = load_mapping_file(mapping_yaml)
|
|
37
|
+
try:
|
|
38
|
+
with path_writer(outfile) as outfh:
|
|
39
|
+
for dataset, meta in config.items():
|
|
40
|
+
ns = Namespace(dataset)
|
|
41
|
+
for mapping in keys_values(meta, "queries", "query"):
|
|
42
|
+
entities = model.map_entities(mapping, key_prefix=dataset)
|
|
43
|
+
for entity in entities:
|
|
44
|
+
if sign:
|
|
45
|
+
entity = ns.apply(entity)
|
|
46
|
+
write_entity(outfh, entity)
|
|
47
|
+
except BrokenPipeError:
|
|
48
|
+
raise click.Abort()
|
|
49
|
+
except Exception as exc:
|
|
50
|
+
raise click.ClickException(str(exc))
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@cli.command("map-csv", help="Map CSV data from stdin and emit objects")
|
|
54
|
+
@click.option("-i", "--infile", type=InPath, default="-")
|
|
55
|
+
@click.option("-o", "--outfile", type=OutPath, default="-")
|
|
56
|
+
@click.option(
|
|
57
|
+
"--sign/--no-sign", is_flag=True, default=True, help="Apply HMAC signature"
|
|
58
|
+
)
|
|
59
|
+
@click.argument("mapping_yaml", type=click.Path(exists=True, path_type=Path))
|
|
60
|
+
def stream_mapping(
|
|
61
|
+
infile: Path, outfile: Path, mapping_yaml: Path, sign: bool = True
|
|
62
|
+
) -> None:
|
|
63
|
+
queries: List[Tuple[str, QueryMapping]] = []
|
|
64
|
+
config = load_mapping_file(mapping_yaml)
|
|
65
|
+
for dataset, meta in config.items():
|
|
66
|
+
for data in keys_values(meta, "queries", "query"):
|
|
67
|
+
data.pop("database", None)
|
|
68
|
+
data["csv_url"] = "/dev/null"
|
|
69
|
+
query = model.make_mapping(data, key_prefix=dataset)
|
|
70
|
+
queries.append((dataset, query))
|
|
71
|
+
|
|
72
|
+
try:
|
|
73
|
+
with path_writer(outfile) as outfh:
|
|
74
|
+
with input_file(infile) as fh:
|
|
75
|
+
for record in CSVSource.read_csv(fh):
|
|
76
|
+
for (dataset, query) in queries:
|
|
77
|
+
ns = Namespace(dataset)
|
|
78
|
+
if query.source.check_filters(record): # type: ignore
|
|
79
|
+
entities = query.map(record)
|
|
80
|
+
for entity in entities.values():
|
|
81
|
+
if sign:
|
|
82
|
+
entity = ns.apply(entity)
|
|
83
|
+
write_entity(outfh, entity)
|
|
84
|
+
except BrokenPipeError:
|
|
85
|
+
raise click.Abort()
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import click
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Iterable, Optional
|
|
4
|
+
|
|
5
|
+
from followthemoney import model
|
|
6
|
+
from followthemoney.proxy import E, EntityProxy
|
|
7
|
+
from followthemoney.types import registry
|
|
8
|
+
from followthemoney.cli.cli import cli
|
|
9
|
+
from followthemoney.cli.util import InPath, OutPath, path_entities
|
|
10
|
+
from followthemoney.cli.util import path_writer, write_entity
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def sieve_entity(
|
|
14
|
+
entity: EntityProxy,
|
|
15
|
+
schemata: Iterable[str],
|
|
16
|
+
properties: Iterable[str],
|
|
17
|
+
types: Iterable[str],
|
|
18
|
+
) -> Optional[EntityProxy]:
|
|
19
|
+
for schema in schemata:
|
|
20
|
+
if entity.schema.is_a(schema):
|
|
21
|
+
return None
|
|
22
|
+
for prop in entity.iterprops():
|
|
23
|
+
if prop.name in properties or prop.qname in properties:
|
|
24
|
+
entity.pop(prop, quiet=True)
|
|
25
|
+
elif prop.type.name in types:
|
|
26
|
+
entity.pop(prop, quiet=True)
|
|
27
|
+
return entity
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@cli.command("sieve", help="Filter out parts of entities.")
|
|
31
|
+
@click.option("-i", "--infile", type=InPath, default="-")
|
|
32
|
+
@click.option("-o", "--outfile", type=OutPath, default="-")
|
|
33
|
+
@click.option(
|
|
34
|
+
"-s",
|
|
35
|
+
"--schema",
|
|
36
|
+
type=click.Choice(list(model.schemata.keys())),
|
|
37
|
+
multiple=True,
|
|
38
|
+
help="Filter out the given schemata.",
|
|
39
|
+
)
|
|
40
|
+
@click.option(
|
|
41
|
+
"-p",
|
|
42
|
+
"--property",
|
|
43
|
+
multiple=True,
|
|
44
|
+
help="Filter out the given property names.",
|
|
45
|
+
)
|
|
46
|
+
@click.option(
|
|
47
|
+
"-t",
|
|
48
|
+
"--type",
|
|
49
|
+
type=click.Choice([t.name for t in registry.types]),
|
|
50
|
+
multiple=True,
|
|
51
|
+
help="Filter out the given property types.",
|
|
52
|
+
)
|
|
53
|
+
def sieve(
|
|
54
|
+
infile: Path,
|
|
55
|
+
outfile: Path,
|
|
56
|
+
schema: Iterable[str],
|
|
57
|
+
property: Iterable[str],
|
|
58
|
+
type: Iterable[str],
|
|
59
|
+
) -> None:
|
|
60
|
+
try:
|
|
61
|
+
with path_writer(outfile) as outfh:
|
|
62
|
+
for entity in path_entities(infile, EntityProxy):
|
|
63
|
+
sieved = sieve_entity(entity, schema, property, type)
|
|
64
|
+
if sieved is not None:
|
|
65
|
+
write_entity(outfh, sieved)
|
|
66
|
+
except BrokenPipeError:
|
|
67
|
+
raise click.Abort()
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
from contextlib import contextmanager
|
|
2
|
+
import os
|
|
3
|
+
import json
|
|
4
|
+
import yaml
|
|
5
|
+
import click
|
|
6
|
+
import orjson
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from warnings import warn
|
|
9
|
+
from typing import Any, BinaryIO, Generator, Optional, TextIO, Type
|
|
10
|
+
from banal import is_mapping, is_listish, ensure_list
|
|
11
|
+
|
|
12
|
+
from followthemoney import model
|
|
13
|
+
from followthemoney.export.common import Exporter
|
|
14
|
+
from followthemoney.proxy import E, EntityProxy
|
|
15
|
+
from followthemoney.util import MEGABYTE, PathLike
|
|
16
|
+
|
|
17
|
+
MAX_LINE = 200 * MEGABYTE
|
|
18
|
+
InPath = click.Path(dir_okay=False, readable=True, path_type=Path, allow_dash=True)
|
|
19
|
+
OutPath = click.Path(dir_okay=False, writable=True, path_type=Path, allow_dash=True)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def write_object(stream: TextIO, obj: Any) -> None:
|
|
23
|
+
warn("write_object() is deprecated.", DeprecationWarning, stacklevel=2)
|
|
24
|
+
if hasattr(obj, "to_dict"):
|
|
25
|
+
obj = obj.to_dict()
|
|
26
|
+
data = json.dumps(obj)
|
|
27
|
+
stream.write(data + "\n")
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def write_entity(fh: BinaryIO, entity: E) -> None:
|
|
31
|
+
data = entity.to_dict()
|
|
32
|
+
entity_id = data.pop("id")
|
|
33
|
+
assert entity_id is not None, data
|
|
34
|
+
sort_data = dict(id=entity_id)
|
|
35
|
+
sort_data.update(data)
|
|
36
|
+
out = orjson.dumps(sort_data, option=orjson.OPT_APPEND_NEWLINE)
|
|
37
|
+
fh.write(out)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _read_one(data: Any, cleaned: bool = True) -> Generator[EntityProxy, None, None]:
|
|
41
|
+
if is_mapping(data) and "schema" in data:
|
|
42
|
+
yield model.get_proxy(data, cleaned=cleaned)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def read_entities(
|
|
46
|
+
stream: TextIO, cleaned: bool = True, max_line: int = MAX_LINE
|
|
47
|
+
) -> Generator[EntityProxy, None, None]:
|
|
48
|
+
warn("read_entities() is deprecated.", DeprecationWarning, stacklevel=2)
|
|
49
|
+
while True:
|
|
50
|
+
line = stream.readline(max_line)
|
|
51
|
+
if not line:
|
|
52
|
+
return
|
|
53
|
+
data = json.loads(line)
|
|
54
|
+
entities = ensure_list(data)
|
|
55
|
+
if is_mapping(data):
|
|
56
|
+
if "entities" in data:
|
|
57
|
+
entities = data.get("entities", data)
|
|
58
|
+
if "layout" in data:
|
|
59
|
+
entities = data.get("layout", {}).get("entities", data)
|
|
60
|
+
for entity in ensure_list(entities):
|
|
61
|
+
yield from _read_one(entity, cleaned=cleaned)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def read_entity(
|
|
65
|
+
stream: TextIO, cleaned: bool = True, max_line: int = MAX_LINE
|
|
66
|
+
) -> Optional[Any]:
|
|
67
|
+
warn("read_entity() is deprecated.", DeprecationWarning, stacklevel=2)
|
|
68
|
+
line = stream.readline(max_line)
|
|
69
|
+
if not line:
|
|
70
|
+
return None
|
|
71
|
+
data = json.loads(line)
|
|
72
|
+
for entity in _read_one(data, cleaned=cleaned):
|
|
73
|
+
return entity
|
|
74
|
+
return data
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def binary_entities(
|
|
78
|
+
fh: BinaryIO, entity_type: Type[E], cleaned: bool = True, max_line: int = MAX_LINE
|
|
79
|
+
) -> Generator[E, None, None]:
|
|
80
|
+
while line := fh.readline(max_line):
|
|
81
|
+
data = orjson.loads(line)
|
|
82
|
+
yield entity_type.from_dict(model, data, cleaned=cleaned)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def path_entities(
|
|
86
|
+
path: PathLike,
|
|
87
|
+
entity_type: Type[E],
|
|
88
|
+
cleaned: bool = True,
|
|
89
|
+
max_line: int = MAX_LINE,
|
|
90
|
+
) -> Generator[E, None, None]:
|
|
91
|
+
if str(path) == "-":
|
|
92
|
+
fh = click.get_binary_stream("stdin")
|
|
93
|
+
yield from binary_entities(fh, entity_type, cleaned=cleaned, max_line=max_line)
|
|
94
|
+
return
|
|
95
|
+
with open(path, "rb") as fh:
|
|
96
|
+
yield from binary_entities(fh, entity_type, cleaned=cleaned, max_line=max_line)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@contextmanager
|
|
100
|
+
def path_writer(path: PathLike) -> Generator[BinaryIO, None, None]:
|
|
101
|
+
"""Open a file for writing binary content, or use stdout."""
|
|
102
|
+
if str(path) == "-":
|
|
103
|
+
yield click.get_binary_stream("stdout")
|
|
104
|
+
return
|
|
105
|
+
with open(path, "wb") as fh:
|
|
106
|
+
yield fh
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def export_stream(exporter: Exporter, path: Path) -> None:
|
|
110
|
+
try:
|
|
111
|
+
for entity in path_entities(path, EntityProxy):
|
|
112
|
+
exporter.write(entity)
|
|
113
|
+
except BrokenPipeError:
|
|
114
|
+
raise click.Abort()
|
|
115
|
+
finally:
|
|
116
|
+
exporter.finalize()
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def load_mapping_file(file_path: PathLike) -> Any:
|
|
120
|
+
"""Load a YAML (or JSON) bulk load mapping file."""
|
|
121
|
+
file_path = os.path.abspath(file_path)
|
|
122
|
+
with open(file_path, "r") as fh:
|
|
123
|
+
data = yaml.safe_load(fh) or {}
|
|
124
|
+
return resolve_includes(file_path, data)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def resolve_includes(file_path: PathLike, data: Any) -> Any:
|
|
128
|
+
"""Handle include statements in the graph configuration file.
|
|
129
|
+
|
|
130
|
+
This allows the YAML graph configuration to be broken into
|
|
131
|
+
multiple smaller fragments that are easier to maintain."""
|
|
132
|
+
if is_listish(data):
|
|
133
|
+
return [resolve_includes(file_path, i) for i in data]
|
|
134
|
+
if is_mapping(data):
|
|
135
|
+
include_paths = ensure_list(data.pop("include", []))
|
|
136
|
+
for include_path in include_paths:
|
|
137
|
+
dir_prefix = os.path.dirname(file_path)
|
|
138
|
+
include_path = os.path.join(dir_prefix, include_path)
|
|
139
|
+
data.update(load_mapping_file(include_path))
|
|
140
|
+
for key, value in data.items():
|
|
141
|
+
data[key] = resolve_includes(file_path, value)
|
|
142
|
+
return data
|