nomenklatura-mpt 4.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nomenklatura/__init__.py +11 -0
- nomenklatura/cache.py +194 -0
- nomenklatura/cli.py +260 -0
- nomenklatura/conflicting_match.py +80 -0
- nomenklatura/data/er-unstable.pkl +0 -0
- nomenklatura/data/regression-v1.pkl +0 -0
- nomenklatura/db.py +139 -0
- nomenklatura/delta.py +4 -0
- nomenklatura/enrich/__init__.py +94 -0
- nomenklatura/enrich/aleph.py +141 -0
- nomenklatura/enrich/common.py +219 -0
- nomenklatura/enrich/nominatim.py +72 -0
- nomenklatura/enrich/opencorporates.py +233 -0
- nomenklatura/enrich/openfigi.py +124 -0
- nomenklatura/enrich/permid.py +201 -0
- nomenklatura/enrich/wikidata.py +268 -0
- nomenklatura/enrich/yente.py +116 -0
- nomenklatura/exceptions.py +9 -0
- nomenklatura/index/__init__.py +5 -0
- nomenklatura/index/common.py +24 -0
- nomenklatura/index/entry.py +89 -0
- nomenklatura/index/index.py +170 -0
- nomenklatura/index/tokenizer.py +92 -0
- nomenklatura/judgement.py +21 -0
- nomenklatura/kv.py +40 -0
- nomenklatura/matching/__init__.py +47 -0
- nomenklatura/matching/bench.py +32 -0
- nomenklatura/matching/compare/__init__.py +0 -0
- nomenklatura/matching/compare/addresses.py +71 -0
- nomenklatura/matching/compare/countries.py +15 -0
- nomenklatura/matching/compare/dates.py +83 -0
- nomenklatura/matching/compare/gender.py +15 -0
- nomenklatura/matching/compare/identifiers.py +30 -0
- nomenklatura/matching/compare/names.py +157 -0
- nomenklatura/matching/compare/util.py +51 -0
- nomenklatura/matching/compat.py +66 -0
- nomenklatura/matching/erun/__init__.py +0 -0
- nomenklatura/matching/erun/countries.py +42 -0
- nomenklatura/matching/erun/identifiers.py +64 -0
- nomenklatura/matching/erun/misc.py +71 -0
- nomenklatura/matching/erun/model.py +110 -0
- nomenklatura/matching/erun/names.py +126 -0
- nomenklatura/matching/erun/train.py +135 -0
- nomenklatura/matching/erun/util.py +28 -0
- nomenklatura/matching/logic_v1/__init__.py +0 -0
- nomenklatura/matching/logic_v1/identifiers.py +104 -0
- nomenklatura/matching/logic_v1/model.py +76 -0
- nomenklatura/matching/logic_v1/multi.py +21 -0
- nomenklatura/matching/logic_v1/phonetic.py +142 -0
- nomenklatura/matching/logic_v2/__init__.py +0 -0
- nomenklatura/matching/logic_v2/identifiers.py +124 -0
- nomenklatura/matching/logic_v2/model.py +98 -0
- nomenklatura/matching/logic_v2/names/__init__.py +3 -0
- nomenklatura/matching/logic_v2/names/analysis.py +51 -0
- nomenklatura/matching/logic_v2/names/distance.py +181 -0
- nomenklatura/matching/logic_v2/names/magic.py +60 -0
- nomenklatura/matching/logic_v2/names/match.py +195 -0
- nomenklatura/matching/logic_v2/names/pairing.py +81 -0
- nomenklatura/matching/logic_v2/names/util.py +89 -0
- nomenklatura/matching/name_based/__init__.py +4 -0
- nomenklatura/matching/name_based/misc.py +86 -0
- nomenklatura/matching/name_based/model.py +59 -0
- nomenklatura/matching/name_based/names.py +59 -0
- nomenklatura/matching/pairs.py +42 -0
- nomenklatura/matching/regression_v1/__init__.py +0 -0
- nomenklatura/matching/regression_v1/misc.py +75 -0
- nomenklatura/matching/regression_v1/model.py +110 -0
- nomenklatura/matching/regression_v1/names.py +63 -0
- nomenklatura/matching/regression_v1/train.py +87 -0
- nomenklatura/matching/regression_v1/util.py +31 -0
- nomenklatura/matching/svm_v1/__init__.py +5 -0
- nomenklatura/matching/svm_v1/misc.py +94 -0
- nomenklatura/matching/svm_v1/model.py +168 -0
- nomenklatura/matching/svm_v1/names.py +81 -0
- nomenklatura/matching/svm_v1/train.py +186 -0
- nomenklatura/matching/svm_v1/util.py +30 -0
- nomenklatura/matching/types.py +227 -0
- nomenklatura/matching/util.py +62 -0
- nomenklatura/publish/__init__.py +0 -0
- nomenklatura/publish/dates.py +49 -0
- nomenklatura/publish/edges.py +32 -0
- nomenklatura/py.typed +0 -0
- nomenklatura/resolver/__init__.py +6 -0
- nomenklatura/resolver/common.py +2 -0
- nomenklatura/resolver/edge.py +107 -0
- nomenklatura/resolver/identifier.py +60 -0
- nomenklatura/resolver/linker.py +101 -0
- nomenklatura/resolver/resolver.py +565 -0
- nomenklatura/settings.py +17 -0
- nomenklatura/store/__init__.py +41 -0
- nomenklatura/store/base.py +130 -0
- nomenklatura/store/level.py +272 -0
- nomenklatura/store/memory.py +102 -0
- nomenklatura/store/redis_.py +131 -0
- nomenklatura/store/sql.py +219 -0
- nomenklatura/store/util.py +48 -0
- nomenklatura/store/versioned.py +371 -0
- nomenklatura/tui/__init__.py +17 -0
- nomenklatura/tui/app.py +294 -0
- nomenklatura/tui/app.tcss +52 -0
- nomenklatura/tui/comparison.py +81 -0
- nomenklatura/tui/util.py +35 -0
- nomenklatura/util.py +26 -0
- nomenklatura/versions.py +119 -0
- nomenklatura/wikidata/__init__.py +14 -0
- nomenklatura/wikidata/client.py +122 -0
- nomenklatura/wikidata/lang.py +94 -0
- nomenklatura/wikidata/model.py +139 -0
- nomenklatura/wikidata/props.py +70 -0
- nomenklatura/wikidata/qualified.py +49 -0
- nomenklatura/wikidata/query.py +66 -0
- nomenklatura/wikidata/value.py +87 -0
- nomenklatura/xref.py +125 -0
- nomenklatura_mpt-4.1.9.dist-info/METADATA +159 -0
- nomenklatura_mpt-4.1.9.dist-info/RECORD +118 -0
- nomenklatura_mpt-4.1.9.dist-info/WHEEL +4 -0
- nomenklatura_mpt-4.1.9.dist-info/entry_points.txt +3 -0
- nomenklatura_mpt-4.1.9.dist-info/licenses/LICENSE +21 -0
nomenklatura/tui/app.py
ADDED
@@ -0,0 +1,294 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import Dict, Optional, Set, Tuple, cast
|
3
|
+
|
4
|
+
from rich.console import RenderableType
|
5
|
+
from rich.text import Text
|
6
|
+
from textual.app import App, ComposeResult
|
7
|
+
from textual.containers import Grid, VerticalScroll
|
8
|
+
from textual.screen import ModalScreen
|
9
|
+
from textual.widget import Widget
|
10
|
+
from textual.widgets import Button, Footer, Label, ListItem, ListView, Static
|
11
|
+
|
12
|
+
from followthemoney import DS, SE
|
13
|
+
|
14
|
+
from nomenklatura.judgement import Judgement
|
15
|
+
from nomenklatura.resolver import Resolver
|
16
|
+
from nomenklatura.resolver.edge import Edge
|
17
|
+
from nomenklatura.store import Store
|
18
|
+
from nomenklatura.tui.comparison import render_comparison
|
19
|
+
|
20
|
+
HISTORY_LENGTH = 20
|
21
|
+
|
22
|
+
|
23
|
+
class DedupeState(object):
|
24
|
+
def __init__(
|
25
|
+
self,
|
26
|
+
resolver: Resolver[SE],
|
27
|
+
store: Store[DS, SE],
|
28
|
+
url_base: Optional[str] = None,
|
29
|
+
):
|
30
|
+
self.store = store
|
31
|
+
self.resolver = resolver
|
32
|
+
self.view = store.default_view(external=True)
|
33
|
+
self.url_base = url_base
|
34
|
+
self.latinize = False
|
35
|
+
self.message: Optional[str] = None
|
36
|
+
self.ignore: Set[Tuple[str, str]] = set()
|
37
|
+
self.left: Optional[SE] = None
|
38
|
+
self.right: Optional[SE] = None
|
39
|
+
self.score = 0.0
|
40
|
+
self.recents: Dict[str, SE] = dict()
|
41
|
+
|
42
|
+
def load(self) -> bool:
|
43
|
+
self.left = None
|
44
|
+
self.right = None
|
45
|
+
self.resolver.begin()
|
46
|
+
for left_id, right_id, score in self.resolver.get_candidates():
|
47
|
+
left_id = self.resolver.get_canonical(left_id)
|
48
|
+
right_id = self.resolver.get_canonical(right_id)
|
49
|
+
if (left_id, right_id) in self.ignore:
|
50
|
+
continue
|
51
|
+
if score is None:
|
52
|
+
self.ignore.add((left_id, right_id))
|
53
|
+
continue
|
54
|
+
if not self.resolver.check_candidate(left_id, right_id):
|
55
|
+
self.ignore.add((left_id, right_id))
|
56
|
+
continue
|
57
|
+
self.left = self.view.get_entity(left_id)
|
58
|
+
self.right = self.view.get_entity(right_id)
|
59
|
+
self.score = score
|
60
|
+
if self.left is not None and self.right is not None:
|
61
|
+
if self.left.schema == self.right.schema:
|
62
|
+
return True
|
63
|
+
if self.left.schema.can_match(self.right.schema):
|
64
|
+
return True
|
65
|
+
self.ignore.add((left_id, right_id))
|
66
|
+
return False
|
67
|
+
|
68
|
+
def decide(self, judgement: Judgement) -> None:
|
69
|
+
if self.left is not None and self.left.id is not None:
|
70
|
+
if self.right is not None and self.right.id is not None:
|
71
|
+
# Since we don't have an unresolved store as well as the resolved one,
|
72
|
+
# hold on to pre-merge entities to show in history.
|
73
|
+
self.recents[self.left.id] = self.left
|
74
|
+
self.recents[self.right.id] = self.right
|
75
|
+
canonical_id = self.resolver.decide(
|
76
|
+
self.left.id,
|
77
|
+
self.right.id,
|
78
|
+
judgement=judgement,
|
79
|
+
)
|
80
|
+
self.store.update(canonical_id)
|
81
|
+
self.resolver.commit()
|
82
|
+
self.load()
|
83
|
+
|
84
|
+
def edit(self, edge: Edge, judgement: Judgement) -> None:
|
85
|
+
self.resolver.decide(edge.source, edge.target, judgement)
|
86
|
+
self.store.update(edge.source)
|
87
|
+
self.store.update(edge.target)
|
88
|
+
self.resolver.commit()
|
89
|
+
self.load()
|
90
|
+
|
91
|
+
|
92
|
+
class DedupeAppWidget(Widget):
|
93
|
+
@property
|
94
|
+
def dedupe(self) -> DedupeState:
|
95
|
+
return cast(DedupeApp, self.app).dedupe
|
96
|
+
|
97
|
+
|
98
|
+
class HistoryItem(Static, DedupeAppWidget):
|
99
|
+
def __init__(self, edge: Edge) -> None:
|
100
|
+
self.edge = edge
|
101
|
+
source = self.dedupe.recents.get(edge.source.id, None)
|
102
|
+
target = self.dedupe.recents.get(edge.target.id, None)
|
103
|
+
if target is None:
|
104
|
+
target = self.dedupe.view.get_entity(edge.target.id)
|
105
|
+
source_str = f"src: {edge.source.id}"
|
106
|
+
if source:
|
107
|
+
source_str += f"\n {source.caption}"
|
108
|
+
target_str = f"tgt: {edge.target.id}"
|
109
|
+
if target:
|
110
|
+
target_str += f"\n {target.caption}"
|
111
|
+
|
112
|
+
content = (
|
113
|
+
f"{edge.created_at if edge.created_at else 'unknown time'}\n"
|
114
|
+
f"{source_str}\n"
|
115
|
+
f"{target_str}\n"
|
116
|
+
f"{edge.user} decided {edge.judgement.value}"
|
117
|
+
)
|
118
|
+
super().__init__(content)
|
119
|
+
|
120
|
+
|
121
|
+
class ConfirmEditModal(ModalScreen[bool]):
|
122
|
+
edge: Optional[Edge] = None
|
123
|
+
judgement: Optional[Judgement] = None
|
124
|
+
|
125
|
+
def compose(self) -> ComposeResult:
|
126
|
+
assert self.edge is not None
|
127
|
+
assert self.judgement is not None
|
128
|
+
message = f"Change {self.edge.source.id} -> {self.edge.target.id} to {self.judgement.value}?"
|
129
|
+
yield Grid(
|
130
|
+
Label(message, id="question"),
|
131
|
+
Button("Yes", variant="error", id="yes"),
|
132
|
+
Button("No", variant="primary", id="no"),
|
133
|
+
id="dialog",
|
134
|
+
)
|
135
|
+
|
136
|
+
def on_button_pressed(self, event: Button.Pressed) -> None:
|
137
|
+
if event.button.id == "yes":
|
138
|
+
self.dismiss(True)
|
139
|
+
else:
|
140
|
+
self.dismiss(False)
|
141
|
+
|
142
|
+
|
143
|
+
class HistoryListView(ListView):
|
144
|
+
BINDINGS = [
|
145
|
+
("x", "positive", "Match"),
|
146
|
+
("n", "negative", "No match"),
|
147
|
+
("u", "unsure", "Unsure"),
|
148
|
+
("d", "delete", "No judgement"),
|
149
|
+
]
|
150
|
+
|
151
|
+
async def action_positive(self) -> None:
|
152
|
+
await self.trigger_edit(Judgement.POSITIVE)
|
153
|
+
|
154
|
+
async def action_negative(self) -> None:
|
155
|
+
await self.trigger_edit(Judgement.NEGATIVE)
|
156
|
+
|
157
|
+
async def action_unsure(self) -> None:
|
158
|
+
await self.trigger_edit(Judgement.UNSURE)
|
159
|
+
|
160
|
+
async def action_delete(self) -> None:
|
161
|
+
await self.trigger_edit(Judgement.NO_JUDGEMENT)
|
162
|
+
|
163
|
+
async def trigger_edit(self, judgement: Judgement) -> None:
|
164
|
+
selected = self.highlighted_child
|
165
|
+
if selected is None:
|
166
|
+
return
|
167
|
+
edge = selected.query_one(HistoryItem).edge
|
168
|
+
await cast(DedupeApp, self.app).edit(edge, judgement)
|
169
|
+
|
170
|
+
|
171
|
+
class HistoryWidget(DedupeAppWidget):
|
172
|
+
list_view: ListView
|
173
|
+
is_visible: bool = False
|
174
|
+
|
175
|
+
def on_mount(self) -> None:
|
176
|
+
self.border_title = "History"
|
177
|
+
self._apply_visibility()
|
178
|
+
self.reload_history()
|
179
|
+
|
180
|
+
def compose(self) -> ComposeResult:
|
181
|
+
self.list_view = HistoryListView()
|
182
|
+
yield Static(
|
183
|
+
(
|
184
|
+
"Tab to toggle between dedupe and history.\n"
|
185
|
+
"Arrow up/down to select history to edit."
|
186
|
+
),
|
187
|
+
classes="help",
|
188
|
+
)
|
189
|
+
yield self.list_view
|
190
|
+
|
191
|
+
def reload_history(self) -> None:
|
192
|
+
if not self.is_visible:
|
193
|
+
return
|
194
|
+
self.list_view.clear()
|
195
|
+
for edge in self.dedupe.resolver.get_judgements(HISTORY_LENGTH):
|
196
|
+
self.list_view.append(ListItem(HistoryItem(edge)))
|
197
|
+
self.list_view.scroll_home(animate=False)
|
198
|
+
|
199
|
+
def toggle_visible(self) -> None:
|
200
|
+
self.is_visible = not self.is_visible
|
201
|
+
self._apply_visibility()
|
202
|
+
self.reload_history()
|
203
|
+
|
204
|
+
def _apply_visibility(self) -> None:
|
205
|
+
if self.is_visible:
|
206
|
+
self.styles.display = "block"
|
207
|
+
else:
|
208
|
+
self.styles.display = "none"
|
209
|
+
|
210
|
+
|
211
|
+
class CompareWidget(DedupeAppWidget, can_focus=True):
|
212
|
+
def render(self) -> RenderableType:
|
213
|
+
if self.dedupe.message is not None:
|
214
|
+
return Text(self.dedupe.message, justify="center")
|
215
|
+
if self.dedupe.left and self.dedupe.right:
|
216
|
+
return render_comparison(
|
217
|
+
self.dedupe.view,
|
218
|
+
self.dedupe.left,
|
219
|
+
self.dedupe.right,
|
220
|
+
self.dedupe.score,
|
221
|
+
latinize=self.dedupe.latinize,
|
222
|
+
url_base=self.dedupe.url_base,
|
223
|
+
)
|
224
|
+
return Text("No candidates.", justify="center")
|
225
|
+
|
226
|
+
|
227
|
+
class DedupeWidget(Widget):
|
228
|
+
def compose(self) -> ComposeResult:
|
229
|
+
yield VerticalScroll(CompareWidget())
|
230
|
+
yield HistoryWidget()
|
231
|
+
|
232
|
+
|
233
|
+
class DedupeApp(App[int]):
|
234
|
+
CSS_PATH = "app.tcss"
|
235
|
+
dedupe: DedupeState
|
236
|
+
|
237
|
+
BINDINGS = [
|
238
|
+
("x", "positive", "Match"),
|
239
|
+
("n", "negative", "No match"),
|
240
|
+
("u", "unsure", "Unsure"),
|
241
|
+
("l", "latinize", "Latinize"),
|
242
|
+
("h", "history", "Toggle History"),
|
243
|
+
("q", "exit_hard", "Quit"),
|
244
|
+
]
|
245
|
+
|
246
|
+
async def decide(self, judgement: Judgement) -> None:
|
247
|
+
self.dedupe.decide(judgement)
|
248
|
+
self.force_render()
|
249
|
+
|
250
|
+
async def edit(self, edge: Edge, judgement: Judgement) -> None:
|
251
|
+
async def handle_confirmation(confirmed: bool | None) -> None:
|
252
|
+
if confirmed:
|
253
|
+
self.dedupe.edit(edge, judgement)
|
254
|
+
self.force_render()
|
255
|
+
else:
|
256
|
+
self.dedupe.message = "Canceled edit."
|
257
|
+
self.force_render()
|
258
|
+
await asyncio.sleep(1)
|
259
|
+
self.dedupe.message = None
|
260
|
+
self.force_render()
|
261
|
+
|
262
|
+
screen = ConfirmEditModal()
|
263
|
+
screen.edge = edge
|
264
|
+
screen.judgement = judgement
|
265
|
+
self.app.push_screen(screen, handle_confirmation)
|
266
|
+
|
267
|
+
def force_render(self) -> None:
|
268
|
+
self.query_one(CompareWidget).refresh(layout=True)
|
269
|
+
self.query_one(HistoryWidget).reload_history()
|
270
|
+
self.query_one(HistoryWidget).refresh(layout=True)
|
271
|
+
|
272
|
+
async def action_positive(self) -> None:
|
273
|
+
await self.decide(Judgement.POSITIVE)
|
274
|
+
|
275
|
+
async def action_negative(self) -> None:
|
276
|
+
await self.decide(Judgement.NEGATIVE)
|
277
|
+
|
278
|
+
async def action_unsure(self) -> None:
|
279
|
+
await self.decide(Judgement.UNSURE)
|
280
|
+
|
281
|
+
async def action_latinize(self) -> None:
|
282
|
+
self.dedupe.latinize = not self.dedupe.latinize
|
283
|
+
self.force_render()
|
284
|
+
|
285
|
+
async def action_history(self) -> None:
|
286
|
+
self.query_one(HistoryWidget).toggle_visible()
|
287
|
+
|
288
|
+
async def action_exit_hard(self) -> None:
|
289
|
+
self.exit(0)
|
290
|
+
|
291
|
+
def compose(self) -> ComposeResult:
|
292
|
+
self.dedupe.load()
|
293
|
+
yield DedupeWidget()
|
294
|
+
yield Footer()
|
@@ -0,0 +1,52 @@
|
|
1
|
+
ConfirmEditModal {
|
2
|
+
align: center middle;
|
3
|
+
}
|
4
|
+
|
5
|
+
#dialog {
|
6
|
+
grid-size: 2;
|
7
|
+
grid-gutter: 1 2;
|
8
|
+
grid-rows: 1fr 3;
|
9
|
+
padding: 0 1;
|
10
|
+
width: 60;
|
11
|
+
height: 11;
|
12
|
+
border: thick $background 80%;
|
13
|
+
background: $surface;
|
14
|
+
}
|
15
|
+
|
16
|
+
#question {
|
17
|
+
column-span: 2;
|
18
|
+
height: 1fr;
|
19
|
+
width: 1fr;
|
20
|
+
content-align: center middle;
|
21
|
+
}
|
22
|
+
|
23
|
+
|
24
|
+
HistoryItem {
|
25
|
+
border: solid white;
|
26
|
+
}
|
27
|
+
|
28
|
+
HistoryWidget {
|
29
|
+
width: 50;
|
30
|
+
border: solid white;
|
31
|
+
}
|
32
|
+
|
33
|
+
HistoryWidget > .help {
|
34
|
+
padding: 1
|
35
|
+
}
|
36
|
+
|
37
|
+
DedupeWidget {
|
38
|
+
layout: horizontal;
|
39
|
+
}
|
40
|
+
|
41
|
+
CompareWidget:focus {
|
42
|
+
background: #222222;
|
43
|
+
}
|
44
|
+
|
45
|
+
CompareWidget {
|
46
|
+
width: 1fr;
|
47
|
+
height: auto;
|
48
|
+
}
|
49
|
+
|
50
|
+
DedupeApp {
|
51
|
+
layout: vertical;
|
52
|
+
}
|
@@ -0,0 +1,81 @@
|
|
1
|
+
from typing import Optional, Union
|
2
|
+
from normality import latinize_text
|
3
|
+
from rich.table import Table
|
4
|
+
from rich.text import Text
|
5
|
+
from followthemoney import DS, registry, Property
|
6
|
+
from followthemoney import SE, StatementEntity as Entity
|
7
|
+
|
8
|
+
from nomenklatura.store import View
|
9
|
+
from nomenklatura.tui.util import comparison_props
|
10
|
+
|
11
|
+
|
12
|
+
def render_column(entity: Entity) -> Text:
|
13
|
+
return Text.assemble(
|
14
|
+
(entity.schema.label, "blue"), " [%s]" % entity.id, no_wrap=True
|
15
|
+
)
|
16
|
+
|
17
|
+
|
18
|
+
def render_values(
|
19
|
+
view: View[DS, SE], prop: Property, entity: SE, other: SE, latinize: bool
|
20
|
+
) -> Text:
|
21
|
+
values = entity.get(prop, quiet=True)
|
22
|
+
other_values = other.get_type_values(prop.type)
|
23
|
+
text = Text()
|
24
|
+
for i, value in enumerate(sorted(values)):
|
25
|
+
caption = prop.type.caption(value)
|
26
|
+
if prop.type == registry.entity:
|
27
|
+
sub = view.get_entity(value)
|
28
|
+
if sub is not None:
|
29
|
+
caption = sub.caption
|
30
|
+
score = prop.type.compare_sets([value], other_values)
|
31
|
+
if latinize:
|
32
|
+
caption = latinize_text(caption) or caption
|
33
|
+
if prop.name == "wikidataId":
|
34
|
+
caption = f"https://wikidata.org/wiki/{value}"
|
35
|
+
style = "default"
|
36
|
+
if score > 0.7:
|
37
|
+
style = "orange1"
|
38
|
+
if score > 0.95:
|
39
|
+
style = "green1"
|
40
|
+
if caption is not None:
|
41
|
+
if i > 0:
|
42
|
+
text.append(" · ", "gray")
|
43
|
+
text.append(caption, style)
|
44
|
+
return text
|
45
|
+
|
46
|
+
|
47
|
+
def render_comparison(
|
48
|
+
view: View[DS, SE],
|
49
|
+
left: SE,
|
50
|
+
right: SE,
|
51
|
+
score: float,
|
52
|
+
latinize: bool = False,
|
53
|
+
url_base: Optional[str] = None,
|
54
|
+
) -> Union[Table, Text]:
|
55
|
+
if left is None or right is None:
|
56
|
+
return Text("No candidates loaded.", justify="center")
|
57
|
+
|
58
|
+
table = Table(expand=True)
|
59
|
+
score_text = "Score: %.3f" % score
|
60
|
+
table.add_column(score_text, justify="right", no_wrap=True, ratio=2)
|
61
|
+
table.add_column(render_column(left), ratio=5)
|
62
|
+
table.add_column(render_column(right), ratio=5)
|
63
|
+
|
64
|
+
for prop in comparison_props(left, right):
|
65
|
+
label = Text(prop.label, "white bold")
|
66
|
+
left_text = render_values(view, prop, left, right, latinize)
|
67
|
+
right_text = render_values(view, prop, right, left, latinize)
|
68
|
+
table.add_row(label, left_text, right_text)
|
69
|
+
|
70
|
+
ds_label = Text("Sources", "grey bold")
|
71
|
+
ds_left = Text(", ".join(left.datasets))
|
72
|
+
ds_right = Text(", ".join(right.datasets))
|
73
|
+
table.add_row(ds_label, ds_left, ds_right)
|
74
|
+
|
75
|
+
if url_base is not None:
|
76
|
+
ds_label = Text("URL", "grey bold")
|
77
|
+
ds_left = Text(url_base % left.id)
|
78
|
+
ds_right = Text(url_base % right.id)
|
79
|
+
table.add_row(ds_label, ds_left, ds_right)
|
80
|
+
|
81
|
+
return table
|
nomenklatura/tui/util.py
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
from typing import Generator, Tuple
|
2
|
+
from followthemoney import registry, Property, SE
|
3
|
+
|
4
|
+
TYPE_ORDER = {
|
5
|
+
registry.name: -6,
|
6
|
+
registry.identifier: -5,
|
7
|
+
registry.date: -4,
|
8
|
+
registry.country: -3,
|
9
|
+
registry.string: -1,
|
10
|
+
registry.text: 3,
|
11
|
+
}
|
12
|
+
|
13
|
+
|
14
|
+
def comparison_props(left: SE, right: SE) -> Generator[Property, None, None]:
|
15
|
+
"""Return an ordered list of properties to be shown in a comparison of
|
16
|
+
the two given entities."""
|
17
|
+
props = set(left.iterprops())
|
18
|
+
props.update(right.iterprops())
|
19
|
+
weights = {p.name: TYPE_ORDER.get(p.type, 0) for p in props}
|
20
|
+
for prop in props:
|
21
|
+
for schema in (left.schema, right.schema):
|
22
|
+
if prop.name in schema.featured:
|
23
|
+
weights[prop.name] -= 10
|
24
|
+
|
25
|
+
def sort_props(prop: Property) -> Tuple[int, str]:
|
26
|
+
return (weights[prop.name], prop.label)
|
27
|
+
|
28
|
+
for prop in sorted(props, key=sort_props):
|
29
|
+
if prop.hidden:
|
30
|
+
continue
|
31
|
+
if prop.type.matchable and not prop.matchable:
|
32
|
+
continue
|
33
|
+
# if prop.type == registry.entity:
|
34
|
+
# continue
|
35
|
+
yield prop
|
nomenklatura/util.py
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
import re
|
2
|
+
import os
|
3
|
+
from pathlib import Path
|
4
|
+
from collections.abc import Mapping
|
5
|
+
from typing import Iterable, TypeVar, List, Union, Optional
|
6
|
+
from rigour.time import iso_datetime
|
7
|
+
|
8
|
+
T = TypeVar("T")
|
9
|
+
DATA_PATH = Path(os.path.join(os.path.dirname(__file__), "data")).resolve()
|
10
|
+
ID_CLEAN = re.compile(r"[^A-Z0-9]+", re.UNICODE)
|
11
|
+
HeadersType = Optional[Mapping[str, Union[str, bytes, None]]]
|
12
|
+
|
13
|
+
|
14
|
+
def iso_to_version(value: str) -> Optional[str]:
|
15
|
+
## Phase this out - it won't be used in new FtM metadata, is used by yente
|
16
|
+
dt = iso_datetime(value)
|
17
|
+
if dt is not None:
|
18
|
+
return dt.strftime("%Y%m%d%H%M%S")
|
19
|
+
return None
|
20
|
+
|
21
|
+
|
22
|
+
def unroll(values: Iterable[Iterable[T]]) -> List[T]:
|
23
|
+
unrolled: List[T] = []
|
24
|
+
for sub in values:
|
25
|
+
unrolled.extend(sub)
|
26
|
+
return unrolled
|
nomenklatura/versions.py
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
import os
|
2
|
+
import json
|
3
|
+
import string
|
4
|
+
import random
|
5
|
+
from rigour.time import utc_now
|
6
|
+
from typing import Any, List, Iterator, Optional
|
7
|
+
from datetime import datetime
|
8
|
+
|
9
|
+
ALPHABET = string.ascii_lowercase
|
10
|
+
|
11
|
+
|
12
|
+
class Version(object):
|
13
|
+
"""A class to represent a dataset version, which consists of a timestamp
|
14
|
+
and a string tag."""
|
15
|
+
|
16
|
+
__slots__ = ["dt", "tag"]
|
17
|
+
|
18
|
+
def __init__(self, dt: datetime, tag: str) -> None:
|
19
|
+
self.dt: datetime = dt
|
20
|
+
self.tag: str = tag
|
21
|
+
|
22
|
+
@classmethod
|
23
|
+
def new(cls, tag: Optional[str] = None) -> "Version":
|
24
|
+
now = utc_now().replace(tzinfo=None)
|
25
|
+
|
26
|
+
if tag is None:
|
27
|
+
# This keeps the tag sortable but short.
|
28
|
+
tag_num = (now.microsecond // 1000) * 10
|
29
|
+
tag_num_ = tag_num + random.randint(0, 9)
|
30
|
+
tag = cls._tag_encode(int(tag_num_))
|
31
|
+
|
32
|
+
tag = tag.ljust(3, "x")[:3]
|
33
|
+
now = now.replace(microsecond=0)
|
34
|
+
return cls(now, tag)
|
35
|
+
|
36
|
+
@classmethod
|
37
|
+
def from_string(cls, id: str) -> "Version":
|
38
|
+
if "-" not in id:
|
39
|
+
raise ValueError(f"Invalid dataset version: {id}")
|
40
|
+
ts, tag = id.split("-", 1)
|
41
|
+
dt = datetime.strptime(ts, "%Y%m%d%H%M%S")
|
42
|
+
dt = dt.replace(tzinfo=None)
|
43
|
+
return cls(dt, tag)
|
44
|
+
|
45
|
+
@classmethod
|
46
|
+
def _tag_encode(cls, number: int, alphabet: str = ALPHABET) -> str:
|
47
|
+
"""Converts an integer to a base36 string."""
|
48
|
+
assert number >= 0, "number must be positive"
|
49
|
+
if 0 <= number < len(alphabet):
|
50
|
+
return alphabet[number]
|
51
|
+
|
52
|
+
encoded = ""
|
53
|
+
while number != 0:
|
54
|
+
number, i = divmod(number, len(alphabet))
|
55
|
+
encoded = alphabet[i] + encoded
|
56
|
+
return encoded
|
57
|
+
|
58
|
+
@classmethod
|
59
|
+
def from_env(cls, name: str) -> "Version":
|
60
|
+
id = os.environ.get(name)
|
61
|
+
if id is None:
|
62
|
+
return cls.new()
|
63
|
+
return cls.from_string(id)
|
64
|
+
|
65
|
+
@property
|
66
|
+
def id(self) -> str:
|
67
|
+
return f"{self.dt.strftime('%Y%m%d%H%M%S')}-{self.tag}"
|
68
|
+
|
69
|
+
def __str__(self) -> str:
|
70
|
+
return self.id
|
71
|
+
|
72
|
+
def __repr__(self) -> str:
|
73
|
+
return f"Version({self.id})"
|
74
|
+
|
75
|
+
def __eq__(self, other: Any) -> bool:
|
76
|
+
return self.id == str(other)
|
77
|
+
|
78
|
+
def __hash__(self) -> int:
|
79
|
+
return hash(self.id)
|
80
|
+
|
81
|
+
|
82
|
+
class VersionHistory(object):
|
83
|
+
"""A class to represent a history of dataset versions."""
|
84
|
+
|
85
|
+
LENGTH = 100
|
86
|
+
|
87
|
+
def __init__(self, items: List[Version], max_length: int = LENGTH) -> None:
|
88
|
+
self.items = items
|
89
|
+
self.max_length = max_length
|
90
|
+
|
91
|
+
def append(self, version: Version) -> "VersionHistory":
|
92
|
+
"""Creates a new history with the given RunID appended."""
|
93
|
+
items = list(self.items)
|
94
|
+
items.append(version)
|
95
|
+
return VersionHistory(items[-self.max_length :])
|
96
|
+
|
97
|
+
@property
|
98
|
+
def latest(self) -> Optional[Version]:
|
99
|
+
if not len(self.items):
|
100
|
+
return None
|
101
|
+
return self.items[-1]
|
102
|
+
|
103
|
+
def to_json(self) -> str:
|
104
|
+
"""Return a JSON representation of the version history."""
|
105
|
+
items = [str(run) for run in self.items[-self.LENGTH :]]
|
106
|
+
return json.dumps({"items": items})
|
107
|
+
|
108
|
+
@classmethod
|
109
|
+
def from_json(cls, data: str) -> "VersionHistory":
|
110
|
+
"""Create a run history from a JSON representation."""
|
111
|
+
items = json.loads(data).get("items", [])
|
112
|
+
items = [Version.from_string(item) for item in items]
|
113
|
+
return cls(items)
|
114
|
+
|
115
|
+
def __iter__(self) -> Iterator[Version]:
|
116
|
+
return iter(self.items)
|
117
|
+
|
118
|
+
def __len__(self) -> int:
|
119
|
+
return len(self.items)
|
@@ -0,0 +1,14 @@
|
|
1
|
+
from nomenklatura.wikidata.client import WikidataClient
|
2
|
+
from nomenklatura.wikidata.lang import LangText
|
3
|
+
from nomenklatura.wikidata.model import Item, Claim
|
4
|
+
from nomenklatura.wikidata.query import SparqlBinding, SparqlResponse, SparqlValue
|
5
|
+
|
6
|
+
__all__ = [
|
7
|
+
"WikidataClient",
|
8
|
+
"LangText",
|
9
|
+
"Item",
|
10
|
+
"Claim",
|
11
|
+
"SparqlBinding",
|
12
|
+
"SparqlResponse",
|
13
|
+
"SparqlValue",
|
14
|
+
]
|