followthemoney 1.3.7__py3-none-any.whl → 3.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. followthemoney/__init__.py +5 -3
  2. followthemoney/cli/__init__.py +17 -0
  3. followthemoney/cli/aggregate.py +56 -0
  4. followthemoney/cli/cli.py +88 -0
  5. followthemoney/cli/exports.py +121 -0
  6. followthemoney/cli/mapping.py +85 -0
  7. followthemoney/cli/sieve.py +67 -0
  8. followthemoney/cli/util.py +142 -0
  9. followthemoney/compare.py +130 -60
  10. followthemoney/exc.py +19 -6
  11. followthemoney/export/common.py +29 -0
  12. followthemoney/export/csv.py +82 -0
  13. followthemoney/export/excel.py +75 -0
  14. followthemoney/export/graph.py +79 -0
  15. followthemoney/export/neo4j.py +182 -0
  16. followthemoney/export/rdf.py +26 -0
  17. followthemoney/graph.py +308 -0
  18. followthemoney/helpers.py +212 -0
  19. followthemoney/mapping/__init__.py +1 -1
  20. followthemoney/mapping/csv.py +67 -35
  21. followthemoney/mapping/entity.py +116 -44
  22. followthemoney/mapping/property.py +90 -44
  23. followthemoney/mapping/query.py +27 -19
  24. followthemoney/mapping/source.py +15 -5
  25. followthemoney/mapping/sql.py +75 -61
  26. followthemoney/messages.py +13 -7
  27. followthemoney/model.py +108 -56
  28. followthemoney/namespace.py +119 -0
  29. followthemoney/offshore.py +48 -0
  30. followthemoney/ontology.py +77 -0
  31. followthemoney/property.py +204 -71
  32. followthemoney/proxy.py +455 -118
  33. followthemoney/rdf.py +9 -0
  34. followthemoney/schema/Address.yaml +78 -0
  35. followthemoney/schema/Airplane.yaml +17 -10
  36. followthemoney/schema/Analyzable.yaml +54 -0
  37. followthemoney/schema/Article.yaml +16 -0
  38. followthemoney/schema/Assessment.yaml +32 -0
  39. followthemoney/schema/Asset.yaml +10 -4
  40. followthemoney/schema/Associate.yaml +41 -0
  41. followthemoney/schema/Audio.yaml +24 -0
  42. followthemoney/schema/BankAccount.yaml +53 -9
  43. followthemoney/schema/Call.yaml +48 -0
  44. followthemoney/schema/CallForTenders.yaml +117 -0
  45. followthemoney/schema/Company.yaml +37 -12
  46. followthemoney/schema/Contract.yaml +41 -7
  47. followthemoney/schema/ContractAward.yaml +30 -11
  48. followthemoney/schema/CourtCase.yaml +16 -10
  49. followthemoney/schema/CourtCaseParty.yaml +17 -6
  50. followthemoney/schema/CryptoWallet.yaml +48 -0
  51. followthemoney/schema/Debt.yaml +37 -0
  52. followthemoney/schema/Directorship.yaml +17 -4
  53. followthemoney/schema/Document.yaml +72 -139
  54. followthemoney/schema/Documentation.yml +38 -0
  55. followthemoney/schema/EconomicActivity.yaml +32 -17
  56. followthemoney/schema/Email.yaml +76 -0
  57. followthemoney/schema/Employment.yaml +39 -0
  58. followthemoney/schema/Event.yaml +35 -3
  59. followthemoney/schema/Family.yaml +41 -0
  60. followthemoney/schema/Folder.yaml +13 -0
  61. followthemoney/schema/HyperText.yaml +21 -0
  62. followthemoney/schema/Identification.yaml +40 -0
  63. followthemoney/schema/Image.yaml +25 -0
  64. followthemoney/schema/Interest.yaml +3 -6
  65. followthemoney/schema/Interval.yaml +56 -5
  66. followthemoney/schema/LegalEntity.yaml +81 -20
  67. followthemoney/schema/License.yaml +7 -3
  68. followthemoney/schema/Membership.yaml +19 -4
  69. followthemoney/schema/Mention.yaml +54 -0
  70. followthemoney/schema/Message.yaml +78 -0
  71. followthemoney/schema/Note.yaml +23 -0
  72. followthemoney/schema/Occupancy.yaml +44 -0
  73. followthemoney/schema/Organization.yaml +38 -3
  74. followthemoney/schema/Ownership.yaml +16 -4
  75. followthemoney/schema/Package.yaml +17 -0
  76. followthemoney/schema/Page.yaml +43 -0
  77. followthemoney/schema/Pages.yaml +23 -0
  78. followthemoney/schema/Passport.yaml +16 -17
  79. followthemoney/schema/Payment.yaml +38 -7
  80. followthemoney/schema/Person.yaml +61 -5
  81. followthemoney/schema/PlainText.yaml +17 -0
  82. followthemoney/schema/Position.yaml +50 -0
  83. followthemoney/schema/Post.yaml +42 -0
  84. followthemoney/schema/Project.yaml +27 -0
  85. followthemoney/schema/ProjectParticipant.yaml +36 -0
  86. followthemoney/schema/PublicBody.yaml +14 -3
  87. followthemoney/schema/RealEstate.yaml +19 -3
  88. followthemoney/schema/Representation.yaml +17 -6
  89. followthemoney/schema/Sanction.yaml +45 -21
  90. followthemoney/schema/Security.yaml +59 -0
  91. followthemoney/schema/Similar.yaml +37 -0
  92. followthemoney/schema/Succession.yaml +36 -0
  93. followthemoney/schema/Table.yaml +32 -0
  94. followthemoney/schema/TaxRoll.yaml +27 -9
  95. followthemoney/schema/Thing.yaml +69 -13
  96. followthemoney/schema/Trip.yaml +42 -0
  97. followthemoney/schema/UnknownLink.yaml +17 -6
  98. followthemoney/schema/UserAccount.yaml +44 -0
  99. followthemoney/schema/Value.yaml +5 -1
  100. followthemoney/schema/Vehicle.yaml +25 -8
  101. followthemoney/schema/Vessel.yaml +18 -10
  102. followthemoney/schema/Video.yaml +20 -0
  103. followthemoney/schema/Workbook.yaml +18 -0
  104. followthemoney/schema.py +436 -135
  105. followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
  106. followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
  107. followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
  108. followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
  109. followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
  110. followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
  111. followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
  112. followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
  113. followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
  114. followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
  115. followthemoney/translations/fr/followthemoney.po +3861 -0
  116. followthemoney/translations/messages.pot +3021 -725
  117. followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
  118. followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
  119. followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
  120. followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
  121. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
  122. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
  123. followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
  124. followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
  125. followthemoney/translations/ru/followthemoney.po +4221 -0
  126. followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
  127. followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
  128. followthemoney/types/__init__.py +35 -17
  129. followthemoney/types/address.py +50 -21
  130. followthemoney/types/checksum.py +25 -0
  131. followthemoney/types/common.py +233 -88
  132. followthemoney/types/country.py +50 -56
  133. followthemoney/types/date.py +59 -76
  134. followthemoney/types/email.py +66 -35
  135. followthemoney/types/entity.py +66 -13
  136. followthemoney/types/gender.py +66 -0
  137. followthemoney/types/iban.py +47 -28
  138. followthemoney/types/identifier.py +49 -22
  139. followthemoney/types/ip.py +35 -21
  140. followthemoney/types/json.py +58 -0
  141. followthemoney/types/language.py +124 -37
  142. followthemoney/types/mimetype.py +44 -0
  143. followthemoney/types/name.py +56 -12
  144. followthemoney/types/number.py +30 -0
  145. followthemoney/types/phone.py +92 -34
  146. followthemoney/types/registry.py +52 -0
  147. followthemoney/types/string.py +43 -0
  148. followthemoney/types/topic.py +94 -0
  149. followthemoney/types/url.py +39 -17
  150. followthemoney/util.py +139 -45
  151. followthemoney-3.8.1.dist-info/METADATA +153 -0
  152. followthemoney-3.8.1.dist-info/RECORD +157 -0
  153. {followthemoney-1.3.7.dist-info → followthemoney-3.8.1.dist-info}/WHEEL +1 -2
  154. followthemoney-3.8.1.dist-info/entry_points.txt +17 -0
  155. followthemoney-1.3.7.dist-info/LICENSE.txt → followthemoney-3.8.1.dist-info/licenses/LICENSE +1 -1
  156. followthemoney/link.py +0 -75
  157. followthemoney/schema/Associate.yml +0 -19
  158. followthemoney/schema/Family.yml +0 -19
  159. followthemoney/schema/Land.yml +0 -9
  160. followthemoney/schema/Relationship.yaml +0 -26
  161. followthemoney/types/domain.py +0 -50
  162. followthemoney-1.3.7.dist-info/DESCRIPTION.rst +0 -3
  163. followthemoney-1.3.7.dist-info/METADATA +0 -39
  164. followthemoney-1.3.7.dist-info/RECORD +0 -108
  165. followthemoney-1.3.7.dist-info/entry_points.txt +0 -3
  166. followthemoney-1.3.7.dist-info/metadata.json +0 -1
  167. followthemoney-1.3.7.dist-info/namespace_packages.txt +0 -1
  168. followthemoney-1.3.7.dist-info/top_level.txt +0 -3
  169. ns/ontology.py +0 -128
  170. tests/types/test_addresses.py +0 -24
  171. tests/types/test_common.py +0 -32
  172. tests/types/test_countries.py +0 -27
  173. tests/types/test_dates.py +0 -73
  174. tests/types/test_domains.py +0 -23
  175. tests/types/test_emails.py +0 -32
  176. tests/types/test_entity.py +0 -19
  177. tests/types/test_iban.py +0 -109
  178. tests/types/test_identifiers.py +0 -27
  179. tests/types/test_ip.py +0 -29
  180. tests/types/test_languages.py +0 -23
  181. tests/types/test_names.py +0 -33
  182. tests/types/test_phones.py +0 -24
  183. tests/types/test_registry.py +0 -14
  184. tests/types/test_urls.py +0 -23
  185. {ns → followthemoney/export}/__init__.py +0 -0
  186. /tests/types/__init__.py → /followthemoney/py.typed +0 -0
@@ -3,12 +3,14 @@ import os
3
3
  from followthemoney.model import Model
4
4
  from followthemoney.util import set_model_locale
5
5
 
6
- __version__ = '1.3.7'
6
+ __version__ = "3.8.1"
7
+
7
8
 
8
9
  model_path = os.path.dirname(__file__)
9
- model_path = os.path.join(model_path, 'schema')
10
+ model_path = os.path.join(model_path, "schema")
11
+ model_path = os.environ.get("FTM_MODEL_PATH", model_path)
10
12
 
11
13
  # Data model singleton
12
14
  model = Model(model_path)
13
15
 
14
- __all__ = [model, set_model_locale]
16
+ __all__ = ["model", "set_model_locale"]
@@ -0,0 +1,17 @@
1
+ import sys
2
+
3
+
4
+ def load_entry_points() -> None:
5
+ if sys.version_info[0] >= 3 and sys.version_info[1] >= 10:
6
+ from importlib.metadata import entry_points
7
+
8
+ for ep in entry_points().select(group="followthemoney.cli"):
9
+ ep.load()
10
+ else:
11
+ from pkg_resources import iter_entry_points
12
+
13
+ for ep_ in iter_entry_points("followthemoney.cli"):
14
+ ep_.load()
15
+
16
+
17
+ load_entry_points()
@@ -0,0 +1,56 @@
1
+ import click
2
+ from pathlib import Path
3
+ from typing import Dict, Optional, Type
4
+
5
+ from followthemoney.proxy import EntityProxy, E
6
+ from followthemoney.namespace import Namespace
7
+ from followthemoney.cli.cli import cli
8
+ from followthemoney.cli.util import InPath, OutPath, path_entities
9
+ from followthemoney.cli.util import path_writer, write_entity
10
+
11
+
12
+ def sorted_aggregate(path: Path, outpath: Path, entity_type: Type[E]) -> None:
13
+ """Aggregate entities based on the premise that the fragments in the source
14
+ stream are sorted by their ID."""
15
+ entity: Optional[E] = None
16
+ with path_writer(outpath) as outfh:
17
+ for next_entity in path_entities(path, entity_type=entity_type):
18
+ if entity is None:
19
+ entity = next_entity
20
+ continue
21
+ if next_entity.id == entity.id:
22
+ entity = entity.merge(next_entity)
23
+ continue
24
+ write_entity(outfh, entity)
25
+ entity = next_entity
26
+
27
+ if entity is not None:
28
+ write_entity(outfh, entity)
29
+
30
+
31
+ @cli.command("aggregate", help="Aggregate multiple fragments of entities")
32
+ @click.option("-i", "--infile", type=InPath, default="-")
33
+ @click.option("-o", "--outfile", type=OutPath, default="-")
34
+ def aggregate(infile: Path, outfile: Path) -> None:
35
+ buffer: Dict[str, EntityProxy] = {}
36
+ namespace = Namespace(None)
37
+ try:
38
+ with path_writer(outfile) as outfh:
39
+ for entity in path_entities(infile, EntityProxy):
40
+ entity = namespace.apply(entity)
41
+ if entity.id in buffer:
42
+ buffer[entity.id].merge(entity)
43
+ else:
44
+ buffer[entity.id] = entity
45
+
46
+ for entity in buffer.values():
47
+ write_entity(outfh, entity)
48
+ except BrokenPipeError:
49
+ raise click.Abort()
50
+
51
+
52
+ @cli.command("sorted-aggregate", help="Aggregate sorted fragments of entities")
53
+ @click.option("-i", "--infile", type=InPath, default="-")
54
+ @click.option("-o", "--outfile", type=OutPath, default="-")
55
+ def sorted_aggregate_(infile: Path, outfile: Path) -> None:
56
+ sorted_aggregate(infile, outfile, EntityProxy)
@@ -0,0 +1,88 @@
1
+ import sys
2
+ import click
3
+ import orjson
4
+ import logging
5
+ from pathlib import Path
6
+ from typing import Optional, BinaryIO, List, Any, Dict
7
+ from banal import ensure_list
8
+
9
+ from followthemoney import model
10
+ from followthemoney.namespace import Namespace
11
+ from followthemoney.cli.util import InPath, OutPath, path_entities
12
+ from followthemoney.cli.util import path_writer, write_entity
13
+ from followthemoney.proxy import EntityProxy
14
+
15
+
16
+ @click.group(help="Utility for FollowTheMoney graph data")
17
+ def cli() -> None:
18
+ fmt = "%(name)s [%(levelname)s] %(message)s"
19
+ logging.basicConfig(stream=sys.stderr, level=logging.INFO, format=fmt)
20
+
21
+
22
+ @cli.command("dump-model", help="Export the current schema model")
23
+ @click.option("-o", "--outfile", type=click.File("wb"), default="-")
24
+ def dump_model(outfile: BinaryIO) -> None:
25
+ f = orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS
26
+ outfile.write(orjson.dumps(model.to_dict(), option=f))
27
+
28
+
29
+ @cli.command("validate", help="Re-parse and validate the given data")
30
+ @click.option("-i", "--infile", type=InPath, default="-")
31
+ @click.option("-o", "--outfile", type=OutPath, default="-")
32
+ def validate(infile: Path, outfile: Path) -> None:
33
+ try:
34
+ with path_writer(outfile) as outfh:
35
+ for entity in path_entities(infile, EntityProxy, cleaned=False):
36
+ clean = model.make_entity(entity.schema)
37
+ clean.id = entity.id
38
+ for prop, value in entity.itervalues():
39
+ clean.add(prop, value)
40
+ write_entity(outfh, clean)
41
+ except BrokenPipeError:
42
+ raise click.Abort()
43
+
44
+
45
+ @cli.command("import-vis", help="Load a .VIS file and get entities")
46
+ @click.option("-i", "--infile", type=InPath, default="-") # noqa
47
+ @click.option("-o", "--outfile", type=OutPath, default="-") # noqa
48
+ def import_vis(infile: Path, outfile: Path) -> None:
49
+ with path_writer(outfile) as outfh:
50
+ with open(infile, "rb") as infh:
51
+ data: Dict[str, Any] = orjson.loads(infh.read())
52
+ if "entities" in data:
53
+ entities: List[Dict[str, Any]] = data.get("entities", data)
54
+ elif "layout" in data:
55
+ entities = data.get("layout", {}).get("entities", data)
56
+ else:
57
+ raise click.ClickException("No entities found in VIS file")
58
+ for entity_data in ensure_list(entities):
59
+ entity = EntityProxy.from_dict(model, entity_data)
60
+ write_entity(outfh, entity)
61
+
62
+
63
+ @cli.command("sign", help="Apply a HMAC signature to entity IDs")
64
+ @click.option("-i", "--infile", type=InPath, default="-") # noqa
65
+ @click.option("-o", "--outfile", type=OutPath, default="-") # noqa
66
+ @click.option("-s", "--signature", default=None, help="HMAC signature key") # noqa
67
+ def sign(infile: Path, outfile: Path, signature: Optional[str]) -> None:
68
+ ns = Namespace(signature)
69
+ try:
70
+ with path_writer(outfile) as outfh:
71
+ for entity in path_entities(infile, EntityProxy):
72
+ signed = ns.apply(entity)
73
+ write_entity(outfh, signed)
74
+ except BrokenPipeError:
75
+ raise click.Abort()
76
+
77
+
78
+ @cli.command(help="Format a stream of entities to make it readable")
79
+ @click.option("-i", "--infile", type=InPath, default="-") # noqa
80
+ def pretty(infile: Path) -> None:
81
+ stdout = click.get_binary_stream("stdout")
82
+ try:
83
+ f = orjson.OPT_INDENT_2 | orjson.OPT_APPEND_NEWLINE
84
+ for entity in path_entities(infile, EntityProxy):
85
+ data = orjson.dumps(entity.to_dict(), option=f)
86
+ stdout.write(data)
87
+ except BrokenPipeError:
88
+ raise click.Abort()
@@ -0,0 +1,121 @@
1
+ import click
2
+ from typing import List, TextIO, Generator
3
+ from pathlib import Path
4
+ from contextlib import contextmanager
5
+
6
+ from followthemoney.cli.cli import cli
7
+ from followthemoney.cli.util import InPath, OutPath, export_stream
8
+ from followthemoney.export.csv import CSVExporter
9
+ from followthemoney.export.rdf import RDFExporter
10
+ from followthemoney.export.excel import ExcelExporter
11
+ from followthemoney.export.graph import edge_types, DEFAULT_EDGE_TYPES
12
+ from followthemoney.export.graph import NXGraphExporter
13
+ from followthemoney.export.neo4j import Neo4JCSVExporter
14
+ from followthemoney.export.neo4j import CypherGraphExporter
15
+
16
+
17
+ @contextmanager
18
+ def text_out(path: Path) -> Generator[TextIO, None, None]:
19
+ if str(path) == "-":
20
+ yield click.get_text_stream("stdout")
21
+ return
22
+ with open(path, "w") as fh:
23
+ yield fh
24
+
25
+
26
+ @cli.command("export-csv", help="Export to CSV")
27
+ @click.option("-i", "--infile", type=InPath, default="-") # noqa
28
+ @click.option(
29
+ "-o",
30
+ "--outdir",
31
+ type=click.Path(file_okay=False, writable=True, path_type=Path),
32
+ default=".",
33
+ help="output directory",
34
+ )
35
+ def export_csv(infile: Path, outdir: Path) -> None:
36
+ exporter = CSVExporter(outdir)
37
+ export_stream(exporter, infile)
38
+
39
+
40
+ @cli.command("export-excel", help="Export to Excel")
41
+ @click.option("-i", "--infile", type=InPath, default="-")
42
+ @click.option(
43
+ "-o",
44
+ "--outfile",
45
+ type=click.Path(dir_okay=False, writable=True, path_type=Path),
46
+ required=True,
47
+ )
48
+ def export_excel(infile: Path, outfile: Path) -> None:
49
+ exporter = ExcelExporter(outfile)
50
+ export_stream(exporter, infile)
51
+
52
+
53
+ @cli.command("export-rdf", help="Export to RDF NTriples")
54
+ @click.option("-i", "--infile", type=InPath, default="-")
55
+ @click.option("-o", "--outfile", type=OutPath, default="-")
56
+ @click.option(
57
+ "--qualified/--unqualified",
58
+ is_flag=True,
59
+ default=True,
60
+ help="Generate full predicates",
61
+ )
62
+ def export_rdf(infile: Path, outfile: Path, qualified: bool = True) -> None:
63
+ with text_out(outfile) as fh:
64
+ exporter = RDFExporter(fh, qualified=qualified)
65
+ export_stream(exporter, infile)
66
+
67
+
68
+ @cli.command("export-gexf", help="Export to GEXF (Gephi) format")
69
+ @click.option("-i", "--infile", type=InPath, default="-")
70
+ @click.option("-o", "--outfile", type=OutPath, default="-")
71
+ @click.option(
72
+ "-e",
73
+ "--edge-types",
74
+ type=click.Choice(edge_types()),
75
+ multiple=True,
76
+ default=DEFAULT_EDGE_TYPES,
77
+ help="Property types to be reified into graph edges.",
78
+ )
79
+ def export_gexf(infile: Path, outfile: Path, edge_types: List[str]) -> None:
80
+ with text_out(outfile) as fh:
81
+ exporter = NXGraphExporter(fh, edge_types=edge_types)
82
+ export_stream(exporter, infile)
83
+
84
+
85
+ @cli.command("export-cypher", help="Export to Cypher script")
86
+ @click.option("-i", "--infile", type=InPath, default="-") # noqa
87
+ @click.option("-o", "--outfile", type=OutPath, default="-") # noqa
88
+ @click.option(
89
+ "-e",
90
+ "--edge-types",
91
+ type=click.Choice(edge_types()),
92
+ multiple=True,
93
+ default=DEFAULT_EDGE_TYPES,
94
+ help="Property types to be reified into graph edges.",
95
+ )
96
+ def export_cypher(infile: Path, outfile: Path, edge_types: List[str]) -> None:
97
+ with text_out(outfile) as fh:
98
+ exporter = CypherGraphExporter(fh, edge_types=edge_types)
99
+ export_stream(exporter, infile)
100
+
101
+
102
+ @cli.command("export-neo4j-bulk", help="Export to Neo4J bulk import")
103
+ @click.option("-i", "--infile", type=InPath, default="-") # noqa
104
+ @click.option(
105
+ "-o",
106
+ "--outdir",
107
+ type=click.Path(file_okay=False, writable=True, path_type=Path),
108
+ required=True,
109
+ help="Output directory for Neo4J import script",
110
+ )
111
+ @click.option(
112
+ "-e",
113
+ "--edge-types",
114
+ type=click.Choice(edge_types()),
115
+ multiple=True,
116
+ default=DEFAULT_EDGE_TYPES,
117
+ help="Property types to be reified into graph edges.",
118
+ )
119
+ def export_neo4j_bulk(infile: Path, outdir: Path, edge_types: List[str]) -> None:
120
+ exporter = Neo4JCSVExporter(outdir, edge_types=edge_types)
121
+ export_stream(exporter, infile)
@@ -0,0 +1,85 @@
1
+ import sys
2
+ import click
3
+ from pathlib import Path
4
+ from banal import keys_values
5
+ from typing import Generator, List, TextIO, Tuple
6
+ from contextlib import contextmanager
7
+
8
+ from followthemoney import model
9
+ from followthemoney.namespace import Namespace
10
+ from followthemoney.mapping.query import QueryMapping
11
+ from followthemoney.mapping.csv import CSVSource
12
+ from followthemoney.cli.cli import cli
13
+ from followthemoney.cli.util import InPath, OutPath, load_mapping_file
14
+ from followthemoney.cli.util import path_writer, write_entity
15
+
16
+
17
+ @contextmanager
18
+ def input_file(path: Path) -> Generator[TextIO, None, None]:
19
+ if str(path) == "-":
20
+ yield sys.stdin
21
+ return
22
+ with open(path, "r") as fh:
23
+ yield fh
24
+
25
+
26
+ @cli.command("map", help="Execute a mapping file and emit objects")
27
+ @click.option("-o", "--outfile", type=OutPath, default="-")
28
+ @click.option(
29
+ "--sign/--no-sign",
30
+ is_flag=True,
31
+ default=True,
32
+ help="Apply HMAC signature",
33
+ )
34
+ @click.argument("mapping_yaml", type=click.Path(exists=True, path_type=Path))
35
+ def run_mapping(outfile: Path, mapping_yaml: Path, sign: bool = True) -> None:
36
+ config = load_mapping_file(mapping_yaml)
37
+ try:
38
+ with path_writer(outfile) as outfh:
39
+ for dataset, meta in config.items():
40
+ ns = Namespace(dataset)
41
+ for mapping in keys_values(meta, "queries", "query"):
42
+ entities = model.map_entities(mapping, key_prefix=dataset)
43
+ for entity in entities:
44
+ if sign:
45
+ entity = ns.apply(entity)
46
+ write_entity(outfh, entity)
47
+ except BrokenPipeError:
48
+ raise click.Abort()
49
+ except Exception as exc:
50
+ raise click.ClickException(str(exc))
51
+
52
+
53
+ @cli.command("map-csv", help="Map CSV data from stdin and emit objects")
54
+ @click.option("-i", "--infile", type=InPath, default="-")
55
+ @click.option("-o", "--outfile", type=OutPath, default="-")
56
+ @click.option(
57
+ "--sign/--no-sign", is_flag=True, default=True, help="Apply HMAC signature"
58
+ )
59
+ @click.argument("mapping_yaml", type=click.Path(exists=True, path_type=Path))
60
+ def stream_mapping(
61
+ infile: Path, outfile: Path, mapping_yaml: Path, sign: bool = True
62
+ ) -> None:
63
+ queries: List[Tuple[str, QueryMapping]] = []
64
+ config = load_mapping_file(mapping_yaml)
65
+ for dataset, meta in config.items():
66
+ for data in keys_values(meta, "queries", "query"):
67
+ data.pop("database", None)
68
+ data["csv_url"] = "/dev/null"
69
+ query = model.make_mapping(data, key_prefix=dataset)
70
+ queries.append((dataset, query))
71
+
72
+ try:
73
+ with path_writer(outfile) as outfh:
74
+ with input_file(infile) as fh:
75
+ for record in CSVSource.read_csv(fh):
76
+ for (dataset, query) in queries:
77
+ ns = Namespace(dataset)
78
+ if query.source.check_filters(record): # type: ignore
79
+ entities = query.map(record)
80
+ for entity in entities.values():
81
+ if sign:
82
+ entity = ns.apply(entity)
83
+ write_entity(outfh, entity)
84
+ except BrokenPipeError:
85
+ raise click.Abort()
@@ -0,0 +1,67 @@
1
+ import click
2
+ from pathlib import Path
3
+ from typing import Iterable, Optional
4
+
5
+ from followthemoney import model
6
+ from followthemoney.proxy import E, EntityProxy
7
+ from followthemoney.types import registry
8
+ from followthemoney.cli.cli import cli
9
+ from followthemoney.cli.util import InPath, OutPath, path_entities
10
+ from followthemoney.cli.util import path_writer, write_entity
11
+
12
+
13
+ def sieve_entity(
14
+ entity: EntityProxy,
15
+ schemata: Iterable[str],
16
+ properties: Iterable[str],
17
+ types: Iterable[str],
18
+ ) -> Optional[EntityProxy]:
19
+ for schema in schemata:
20
+ if entity.schema.is_a(schema):
21
+ return None
22
+ for prop in entity.iterprops():
23
+ if prop.name in properties or prop.qname in properties:
24
+ entity.pop(prop, quiet=True)
25
+ elif prop.type.name in types:
26
+ entity.pop(prop, quiet=True)
27
+ return entity
28
+
29
+
30
+ @cli.command("sieve", help="Filter out parts of entities.")
31
+ @click.option("-i", "--infile", type=InPath, default="-")
32
+ @click.option("-o", "--outfile", type=OutPath, default="-")
33
+ @click.option(
34
+ "-s",
35
+ "--schema",
36
+ type=click.Choice(list(model.schemata.keys())),
37
+ multiple=True,
38
+ help="Filter out the given schemata.",
39
+ )
40
+ @click.option(
41
+ "-p",
42
+ "--property",
43
+ multiple=True,
44
+ help="Filter out the given property names.",
45
+ )
46
+ @click.option(
47
+ "-t",
48
+ "--type",
49
+ type=click.Choice([t.name for t in registry.types]),
50
+ multiple=True,
51
+ help="Filter out the given property types.",
52
+ )
53
+ def sieve(
54
+ infile: Path,
55
+ outfile: Path,
56
+ schema: Iterable[str],
57
+ property: Iterable[str],
58
+ type: Iterable[str],
59
+ ) -> None:
60
+ try:
61
+ with path_writer(outfile) as outfh:
62
+ for entity in path_entities(infile, EntityProxy):
63
+ sieved = sieve_entity(entity, schema, property, type)
64
+ if sieved is not None:
65
+ write_entity(outfh, sieved)
66
+ except BrokenPipeError:
67
+ raise click.Abort()
@@ -0,0 +1,142 @@
1
+ from contextlib import contextmanager
2
+ import os
3
+ import json
4
+ import yaml
5
+ import click
6
+ import orjson
7
+ from pathlib import Path
8
+ from warnings import warn
9
+ from typing import Any, BinaryIO, Generator, Optional, TextIO, Type
10
+ from banal import is_mapping, is_listish, ensure_list
11
+
12
+ from followthemoney import model
13
+ from followthemoney.export.common import Exporter
14
+ from followthemoney.proxy import E, EntityProxy
15
+ from followthemoney.util import MEGABYTE, PathLike
16
+
17
+ MAX_LINE = 200 * MEGABYTE
18
+ InPath = click.Path(dir_okay=False, readable=True, path_type=Path, allow_dash=True)
19
+ OutPath = click.Path(dir_okay=False, writable=True, path_type=Path, allow_dash=True)
20
+
21
+
22
+ def write_object(stream: TextIO, obj: Any) -> None:
23
+ warn("write_object() is deprecated.", DeprecationWarning, stacklevel=2)
24
+ if hasattr(obj, "to_dict"):
25
+ obj = obj.to_dict()
26
+ data = json.dumps(obj)
27
+ stream.write(data + "\n")
28
+
29
+
30
+ def write_entity(fh: BinaryIO, entity: E) -> None:
31
+ data = entity.to_dict()
32
+ entity_id = data.pop("id")
33
+ assert entity_id is not None, data
34
+ sort_data = dict(id=entity_id)
35
+ sort_data.update(data)
36
+ out = orjson.dumps(sort_data, option=orjson.OPT_APPEND_NEWLINE)
37
+ fh.write(out)
38
+
39
+
40
+ def _read_one(data: Any, cleaned: bool = True) -> Generator[EntityProxy, None, None]:
41
+ if is_mapping(data) and "schema" in data:
42
+ yield model.get_proxy(data, cleaned=cleaned)
43
+
44
+
45
+ def read_entities(
46
+ stream: TextIO, cleaned: bool = True, max_line: int = MAX_LINE
47
+ ) -> Generator[EntityProxy, None, None]:
48
+ warn("read_entities() is deprecated.", DeprecationWarning, stacklevel=2)
49
+ while True:
50
+ line = stream.readline(max_line)
51
+ if not line:
52
+ return
53
+ data = json.loads(line)
54
+ entities = ensure_list(data)
55
+ if is_mapping(data):
56
+ if "entities" in data:
57
+ entities = data.get("entities", data)
58
+ if "layout" in data:
59
+ entities = data.get("layout", {}).get("entities", data)
60
+ for entity in ensure_list(entities):
61
+ yield from _read_one(entity, cleaned=cleaned)
62
+
63
+
64
+ def read_entity(
65
+ stream: TextIO, cleaned: bool = True, max_line: int = MAX_LINE
66
+ ) -> Optional[Any]:
67
+ warn("read_entity() is deprecated.", DeprecationWarning, stacklevel=2)
68
+ line = stream.readline(max_line)
69
+ if not line:
70
+ return None
71
+ data = json.loads(line)
72
+ for entity in _read_one(data, cleaned=cleaned):
73
+ return entity
74
+ return data
75
+
76
+
77
+ def binary_entities(
78
+ fh: BinaryIO, entity_type: Type[E], cleaned: bool = True, max_line: int = MAX_LINE
79
+ ) -> Generator[E, None, None]:
80
+ while line := fh.readline(max_line):
81
+ data = orjson.loads(line)
82
+ yield entity_type.from_dict(model, data, cleaned=cleaned)
83
+
84
+
85
+ def path_entities(
86
+ path: PathLike,
87
+ entity_type: Type[E],
88
+ cleaned: bool = True,
89
+ max_line: int = MAX_LINE,
90
+ ) -> Generator[E, None, None]:
91
+ if str(path) == "-":
92
+ fh = click.get_binary_stream("stdin")
93
+ yield from binary_entities(fh, entity_type, cleaned=cleaned, max_line=max_line)
94
+ return
95
+ with open(path, "rb") as fh:
96
+ yield from binary_entities(fh, entity_type, cleaned=cleaned, max_line=max_line)
97
+
98
+
99
+ @contextmanager
100
+ def path_writer(path: PathLike) -> Generator[BinaryIO, None, None]:
101
+ """Open a file for writing binary content, or use stdout."""
102
+ if str(path) == "-":
103
+ yield click.get_binary_stream("stdout")
104
+ return
105
+ with open(path, "wb") as fh:
106
+ yield fh
107
+
108
+
109
+ def export_stream(exporter: Exporter, path: Path) -> None:
110
+ try:
111
+ for entity in path_entities(path, EntityProxy):
112
+ exporter.write(entity)
113
+ except BrokenPipeError:
114
+ raise click.Abort()
115
+ finally:
116
+ exporter.finalize()
117
+
118
+
119
+ def load_mapping_file(file_path: PathLike) -> Any:
120
+ """Load a YAML (or JSON) bulk load mapping file."""
121
+ file_path = os.path.abspath(file_path)
122
+ with open(file_path, "r") as fh:
123
+ data = yaml.safe_load(fh) or {}
124
+ return resolve_includes(file_path, data)
125
+
126
+
127
+ def resolve_includes(file_path: PathLike, data: Any) -> Any:
128
+ """Handle include statements in the graph configuration file.
129
+
130
+ This allows the YAML graph configuration to be broken into
131
+ multiple smaller fragments that are easier to maintain."""
132
+ if is_listish(data):
133
+ return [resolve_includes(file_path, i) for i in data]
134
+ if is_mapping(data):
135
+ include_paths = ensure_list(data.pop("include", []))
136
+ for include_path in include_paths:
137
+ dir_prefix = os.path.dirname(file_path)
138
+ include_path = os.path.join(dir_prefix, include_path)
139
+ data.update(load_mapping_file(include_path))
140
+ for key, value in data.items():
141
+ data[key] = resolve_includes(file_path, value)
142
+ return data