followthemoney 4.5.0__py3-none-any.whl → 4.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,14 +2,14 @@ from followthemoney.entity import ValueEntity, VE
2
2
  from followthemoney.model import Model
3
3
  from followthemoney.schema import Schema
4
4
  from followthemoney.property import Property
5
- from followthemoney.types import registry
5
+ from followthemoney.types import registry, PropertyType
6
6
  from followthemoney.value import Value, Values
7
7
  from followthemoney.proxy import EntityProxy, E
8
8
  from followthemoney.statement import Statement, StatementEntity, SE
9
- from followthemoney.dataset import Dataset, DefaultDataset, DS
9
+ from followthemoney.dataset import Dataset, UndefinedDataset, DS
10
10
  from followthemoney.util import set_model_locale
11
11
 
12
- __version__ = "4.5.0"
12
+ __version__ = "4.5.1"
13
13
 
14
14
  # Data model singleton
15
15
  model = Model.instance()
@@ -20,13 +20,14 @@ __all__ = [
20
20
  "Model",
21
21
  "Schema",
22
22
  "Property",
23
+ "PropertyType",
23
24
  "Value",
24
25
  "Values",
25
26
  "EntityProxy",
26
27
  "E",
27
28
  "registry",
28
29
  "Dataset",
29
- "DefaultDataset",
30
+ "UndefinedDataset",
30
31
  "DS",
31
32
  "Statement",
32
33
  "StatementEntity",
@@ -1,12 +1,12 @@
1
1
  import click
2
2
  from pathlib import Path
3
- from typing import Generator, List
3
+ from typing import Generator, List, Optional
4
4
 
5
5
 
6
6
  from followthemoney.cli.cli import cli
7
7
  from followthemoney.cli.util import InPath, OutPath
8
8
  from followthemoney.cli.util import path_entities, write_entity, path_writer
9
- from followthemoney.dataset import Dataset, DefaultDataset
9
+ from followthemoney.dataset import Dataset, UndefinedDataset
10
10
  from followthemoney.statement import Statement, StatementEntity
11
11
  from followthemoney.statement import FORMATS, CSV
12
12
  from followthemoney.statement import write_statements
@@ -16,12 +16,18 @@ from followthemoney.statement import read_path_statements
16
16
  @cli.command("statements", help="Export entities to statements")
17
17
  @click.argument("path", type=InPath)
18
18
  @click.option("-o", "--outpath", type=OutPath, default="-")
19
- @click.option("-d", "--dataset", type=str, required=True)
19
+ @click.option("-d", "--dataset", type=str)
20
20
  @click.option("-f", "--format", type=click.Choice(FORMATS), default=CSV)
21
- def entity_statements(path: Path, outpath: Path, dataset: str, format: str) -> None:
21
+ def entity_statements(
22
+ path: Path, outpath: Path, dataset: Optional[str], format: str
23
+ ) -> None:
22
24
  def make_statements() -> Generator[Statement, None, None]:
25
+ dataset_ = dataset or Dataset.UNDEFINED
23
26
  for entity in path_entities(path, StatementEntity):
24
- yield from Statement.from_entity(entity, dataset=dataset)
27
+ for stmt in Statement.from_entity(entity, dataset=dataset_):
28
+ if dataset is not None:
29
+ stmt = stmt.clone(dataset=dataset)
30
+ yield stmt
25
31
 
26
32
  with path_writer(outpath) as outfh:
27
33
  write_statements(outfh, format, make_statements())
@@ -43,12 +49,12 @@ def format_statements(
43
49
  @cli.command("aggregate-statements", help="Roll up statements into entities")
44
50
  @click.option("-i", "--infile", type=InPath, default="-")
45
51
  @click.option("-o", "--outpath", type=OutPath, default="-")
46
- @click.option("-d", "--dataset", type=str, default=DefaultDataset.name)
52
+ @click.option("-d", "--dataset", type=str, default=UndefinedDataset.name)
47
53
  @click.option("-f", "--format", type=click.Choice(FORMATS), default=CSV)
48
54
  def statements_aggregate(
49
55
  infile: Path, outpath: Path, dataset: str, format: str
50
56
  ) -> None:
51
- dataset_ = Dataset.make({"name": dataset, "title": dataset})
57
+ dataset_ = Dataset.make({"name": dataset})
52
58
  with path_writer(outpath) as outfh:
53
59
  statements: List[Statement] = []
54
60
  for stmt in read_path_statements(infile, format=format):
@@ -6,7 +6,7 @@ import click
6
6
  import orjson
7
7
  from pathlib import Path
8
8
  from warnings import warn
9
- from typing import Any, BinaryIO, Generator, Optional, TextIO, Type
9
+ from typing import Any, BinaryIO, Generator, List, Optional, TextIO, Type
10
10
  from banal import is_mapping, is_listish, ensure_list
11
11
 
12
12
  from followthemoney.export.common import Exporter
@@ -26,7 +26,7 @@ def write_object(stream: TextIO, obj: Any) -> None:
26
26
  stream.write(data + "\n")
27
27
 
28
28
 
29
- def write_entity(fh: BinaryIO, entity: E) -> None:
29
+ def write_entity(fh: BinaryIO, entity: EntityProxy) -> None:
30
30
  data = entity.to_dict()
31
31
  entity_id = data.pop("id")
32
32
  assert entity_id is not None, data
@@ -131,7 +131,7 @@ def resolve_includes(file_path: PathLike, data: Any) -> Any:
131
131
  if is_listish(data):
132
132
  return [resolve_includes(file_path, i) for i in data]
133
133
  if is_mapping(data):
134
- include_paths = ensure_list(data.pop("include", []))
134
+ include_paths: List[str] = ensure_list(data.pop("include", []))
135
135
  for include_path in include_paths:
136
136
  dir_prefix = os.path.dirname(file_path)
137
137
  include_path = os.path.join(dir_prefix, include_path)
@@ -4,11 +4,11 @@ from followthemoney.dataset.resource import DataResource
4
4
  from followthemoney.dataset.publisher import DataPublisher
5
5
  from followthemoney.dataset.coverage import DataCoverage
6
6
 
7
- DefaultDataset = Dataset.make({"name": "default"})
7
+ UndefinedDataset = Dataset.make({"name": Dataset.UNDEFINED})
8
8
 
9
9
  __all__ = [
10
10
  "Dataset",
11
- "DefaultDataset",
11
+ "UndefinedDataset",
12
12
  "DataCatalog",
13
13
  "DataResource",
14
14
  "DataPublisher",
@@ -83,6 +83,8 @@ class Dataset:
83
83
  """A container for entities, often from one source or related to one topic.
84
84
  A dataset is a set of data, sez W3C."""
85
85
 
86
+ UNDEFINED = "undefined"
87
+
86
88
  def __init__(self: Self, data: Dict[str, Any]) -> None:
87
89
  self.model = DatasetModel.model_validate(data)
88
90
  self.name = self.model.name
@@ -15,7 +15,7 @@ from followthemoney.proxy import P
15
15
  from followthemoney.types import registry
16
16
  from followthemoney.value import string_list, Values
17
17
  from followthemoney.proxy import EntityProxy
18
- from followthemoney.dataset import Dataset, DefaultDataset
18
+ from followthemoney.dataset import Dataset, UndefinedDataset
19
19
  from followthemoney.statement.statement import Statement
20
20
  from followthemoney.statement.util import BASE_ID
21
21
 
@@ -190,6 +190,11 @@ class StatementEntity(EntityProxy):
190
190
  return []
191
191
  return list(self._statements[prop_name])
192
192
 
193
+ @property
194
+ def has_statements(self) -> bool:
195
+ """Return whether the entity has any statements."""
196
+ return len(self._statements) > 0
197
+
193
198
  def set(
194
199
  self,
195
200
  prop: P,
@@ -433,7 +438,7 @@ class StatementEntity(EntityProxy):
433
438
  origins.add(stmt.origin)
434
439
 
435
440
  data["referents"] = list(referents)
436
- data["datasets"] = list(datasets)
441
+ data["datasets"] = [d for d in datasets if d != Dataset.UNDEFINED]
437
442
  if origins:
438
443
  data["origin"] = list(origins)
439
444
 
@@ -484,7 +489,7 @@ class StatementEntity(EntityProxy):
484
489
  default_dataset: Optional[Dataset] = None,
485
490
  ) -> SE:
486
491
  # Exists only for backwards compatibility.
487
- dataset = default_dataset or DefaultDataset
492
+ dataset = default_dataset or UndefinedDataset
488
493
  return cls(dataset, data, cleaned=cleaned)
489
494
 
490
495
  @classmethod
@@ -1,4 +1,5 @@
1
1
  import csv
2
+ import sys
2
3
  import click
3
4
  import orjson
4
5
  import logging
@@ -49,6 +50,7 @@ LEGACY_PACK_COLUMNS = [
49
50
  "first_seen",
50
51
  "last_seen",
51
52
  ]
53
+ csv.field_size_limit(sys.maxsize)
52
54
 
53
55
 
54
56
  def read_json_statements(
@@ -69,6 +71,8 @@ def read_csv_statements(fh: BinaryIO) -> Generator[Statement, None, None]:
69
71
  data["lang"] = None
70
72
  if row.get("original_value") == "":
71
73
  data["original_value"] = None
74
+ if row.get("origin") == "":
75
+ data["origin"] = None
72
76
  yield Statement.from_dict(data)
73
77
 
74
78
 
@@ -101,7 +105,7 @@ def read_pack_statements_decoded(fh: TextIO) -> Generator[Statement, None, None]
101
105
  dataset=data["dataset"],
102
106
  lang=data["lang"] or None,
103
107
  original_value=data["original_value"] or None,
104
- origin=data.get("origin"),
108
+ origin=data.get("origin") or None,
105
109
  first_seen=data["first_seen"],
106
110
  external=data["external"] == "t",
107
111
  canonical_id=data["entity_id"],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: followthemoney
3
- Version: 4.5.0
3
+ Version: 4.5.1
4
4
  Summary: A data model for anti corruption data modeling and analysis.
5
5
  Project-URL: Documentation, https://followthemoney.tech/
6
6
  Project-URL: Repository, https://github.com/opensanctions/followthemoney.git
@@ -1,4 +1,4 @@
1
- followthemoney/__init__.py,sha256=3Mvq7FWl0cNbTovkJhys0iHU24UdFT44P2AqszoGs-8,856
1
+ followthemoney/__init__.py,sha256=xxSC9m3THm5tFr_l8TY-RAEKb-ymcueZsmV4JVEjPyM,894
2
2
  followthemoney/compare.py,sha256=6y6fqtbbfW7ee4_EVXPcKCIr75GjPqXvtHfvExJ-KK0,6119
3
3
  followthemoney/entity.py,sha256=biAjuuHlwsVT02imAsaWP0YtgdfU8skCntzBU3mgJpg,4052
4
4
  followthemoney/exc.py,sha256=GyMgwY4QVm87hLevDfV7gM1MJsDqfNCi_UQw7F_A8X8,858
@@ -22,12 +22,12 @@ followthemoney/cli/cli.py,sha256=cWSQIrMS0b40uzIveoIfR9CEBbQEwcfonYhDTpioqBM,358
22
22
  followthemoney/cli/exports.py,sha256=arWgIfDhMdEEoAnANOPbgirL68qAR2pCt0otz9MEXmg,4074
23
23
  followthemoney/cli/mapping.py,sha256=PGQ-9T5ss6w6qnZg7IjUZZ3PplY15CcPSxZxkyMFLDM,3370
24
24
  followthemoney/cli/sieve.py,sha256=wLB35fCVp1ArZ7FDTbARevBk8jH4vnp65fyBZU7Lk_k,1937
25
- followthemoney/cli/statement.py,sha256=kjcOzcwWT_cHCwriiYA8B05BJpwVoI3WMJjghzPOCsY,2871
26
- followthemoney/cli/util.py,sha256=C3nGMVY3-9JHSFLn3AGvTNcAdvGcgfFS-7jXIzKg6Ik,4735
27
- followthemoney/dataset/__init__.py,sha256=rOKsI39dccDaYcSa7ASoNKkhmbFYUArxMCRqtrxy2iE,477
25
+ followthemoney/cli/statement.py,sha256=g1Ksi3iS-YsuVA40AnKpIsVDPRc4SovAe1JCoXNIW4g,3041
26
+ followthemoney/cli/util.py,sha256=EyhR6qLIfueG1OK5_DwDA_7mcDPnhNs5dyijbP3WWZY,4762
27
+ followthemoney/dataset/__init__.py,sha256=1-I1MkuTvu72tpZZcxD2GK-jq0_TwNJ29zaFQIo2ORQ,489
28
28
  followthemoney/dataset/catalog.py,sha256=bIpxr0jvJeutNSmCaXREQac7TyvZak2Y_QoCFdCM0d4,3001
29
29
  followthemoney/dataset/coverage.py,sha256=rBnKs7VngCtIuaDqrF5D0ygCHg8NAMkYbmtl7336PSI,724
30
- followthemoney/dataset/dataset.py,sha256=7lP3gz94AAaxX6J7OWlNmvPoWkoKvH7ISwoNqYsC4Go,5495
30
+ followthemoney/dataset/dataset.py,sha256=TV3enQCWJCo2cXZCEz_yPWsF-09W0p_A3RSroRbPWV8,5524
31
31
  followthemoney/dataset/publisher.py,sha256=nexZe9XexV8WI5Id999vf5OH_DPUmiKQ_GT3c59eF44,893
32
32
  followthemoney/dataset/resource.py,sha256=S_-tNjMwHQ8LcSOsZO_xhXD-vLK90wyxtIRBbyCJ0Xo,1164
33
33
  followthemoney/dataset/util.py,sha256=mfVTXdbNnWly6cXo4SjNzHuJK1c1uNBwULYOVg1gK5I,1617
@@ -115,8 +115,8 @@ followthemoney/schema/Vessel.yaml,sha256=zWHUfSK8g6Pz58ZyCaK0AFJ4u_UHjEIUGC4c_7o
115
115
  followthemoney/schema/Video.yaml,sha256=LY3DYMWTHXiAhL0hxBCNCz50cp2sPbUlEhhig5Fbjos,327
116
116
  followthemoney/schema/Workbook.yaml,sha256=iikWPElz4klA7SkWH7eae6xqhbkMCIP_3zdeXzFEMU0,354
117
117
  followthemoney/statement/__init__.py,sha256=7m2VUCAuqNZXIY0WFJRFkw5UG14QuxATL4f_xbqKwhw,633
118
- followthemoney/statement/entity.py,sha256=vznbPTMSWcezYt--bu2RretMresgwp-bUqCsv4w-U90,19568
119
- followthemoney/statement/serialize.py,sha256=PcG2Qf1jYcF_rF1YybYWBhX7NiX6WZ94u1WUv9Mh-Lw,7386
118
+ followthemoney/statement/entity.py,sha256=jwT1OiM6NiGbCWTZIG4wi0a-ROGy-GN6Vpq9pyp5mbs,19759
119
+ followthemoney/statement/serialize.py,sha256=TLpeCrpgFrQ6_AA3tcNxZhcOjlo_-c1MNfh3zE3idj4,7509
120
120
  followthemoney/statement/statement.py,sha256=bAwrrKyYRdJVxRGuXDQIOA7cdEngk1NKUaij_gAwSd4,11876
121
121
  followthemoney/statement/util.py,sha256=jHBwK3FIBynUJZRlBBOHayalAFrqpXf2f2JwkHi0zAU,1450
122
122
  followthemoney/translations/messages.pot,sha256=JhtY9NJ9wP_EAX4APxOqMyvKcX53oIC9kAxBsliJkf4,107703
@@ -162,8 +162,8 @@ followthemoney/types/phone.py,sha256=_HanfxxTV7jp75gZO2evBc9HWwQTxEMQRaoVDcoXDIQ
162
162
  followthemoney/types/string.py,sha256=SEh3xqQCnm377PGvwfR6ao85pHJCNeCUWBKnvccrJ7I,1216
163
163
  followthemoney/types/topic.py,sha256=9FIH_WmwVOFg1CJRBF4KeE6vNTn-QQkzsKU5XaMqNJ0,4604
164
164
  followthemoney/types/url.py,sha256=sSHKtzvm4kc-VTvNCPIDykOG1hUoawhORj6Bklo0a2A,1434
165
- followthemoney-4.5.0.dist-info/METADATA,sha256=dteJlaqiJHM7Du9BTLMiOgl-DEjNp2ewkkFU8nMcCYA,6747
166
- followthemoney-4.5.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
167
- followthemoney-4.5.0.dist-info/entry_points.txt,sha256=caoFTlf213jhg5sz3TNSofutjUTzaKtWATuSIdd9Cps,653
168
- followthemoney-4.5.0.dist-info/licenses/LICENSE,sha256=H6_EVXisnJC0-18CjXIaqrBSFq_VH3OnS7u3dccOv6g,1148
169
- followthemoney-4.5.0.dist-info/RECORD,,
165
+ followthemoney-4.5.1.dist-info/METADATA,sha256=kRhvZB4Bvc1aVmGx2n2dyvYoOGhU16uUBqKf69C7bck,6747
166
+ followthemoney-4.5.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
167
+ followthemoney-4.5.1.dist-info/entry_points.txt,sha256=caoFTlf213jhg5sz3TNSofutjUTzaKtWATuSIdd9Cps,653
168
+ followthemoney-4.5.1.dist-info/licenses/LICENSE,sha256=H6_EVXisnJC0-18CjXIaqrBSFq_VH3OnS7u3dccOv6g,1148
169
+ followthemoney-4.5.1.dist-info/RECORD,,