nomenklatura-mpt 4.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. nomenklatura/__init__.py +11 -0
  2. nomenklatura/cache.py +194 -0
  3. nomenklatura/cli.py +260 -0
  4. nomenklatura/conflicting_match.py +80 -0
  5. nomenklatura/data/er-unstable.pkl +0 -0
  6. nomenklatura/data/regression-v1.pkl +0 -0
  7. nomenklatura/db.py +139 -0
  8. nomenklatura/delta.py +4 -0
  9. nomenklatura/enrich/__init__.py +94 -0
  10. nomenklatura/enrich/aleph.py +141 -0
  11. nomenklatura/enrich/common.py +219 -0
  12. nomenklatura/enrich/nominatim.py +72 -0
  13. nomenklatura/enrich/opencorporates.py +233 -0
  14. nomenklatura/enrich/openfigi.py +124 -0
  15. nomenklatura/enrich/permid.py +201 -0
  16. nomenklatura/enrich/wikidata.py +268 -0
  17. nomenklatura/enrich/yente.py +116 -0
  18. nomenklatura/exceptions.py +9 -0
  19. nomenklatura/index/__init__.py +5 -0
  20. nomenklatura/index/common.py +24 -0
  21. nomenklatura/index/entry.py +89 -0
  22. nomenklatura/index/index.py +170 -0
  23. nomenklatura/index/tokenizer.py +92 -0
  24. nomenklatura/judgement.py +21 -0
  25. nomenklatura/kv.py +40 -0
  26. nomenklatura/matching/__init__.py +47 -0
  27. nomenklatura/matching/bench.py +32 -0
  28. nomenklatura/matching/compare/__init__.py +0 -0
  29. nomenklatura/matching/compare/addresses.py +71 -0
  30. nomenklatura/matching/compare/countries.py +15 -0
  31. nomenklatura/matching/compare/dates.py +83 -0
  32. nomenklatura/matching/compare/gender.py +15 -0
  33. nomenklatura/matching/compare/identifiers.py +30 -0
  34. nomenklatura/matching/compare/names.py +157 -0
  35. nomenklatura/matching/compare/util.py +51 -0
  36. nomenklatura/matching/compat.py +66 -0
  37. nomenklatura/matching/erun/__init__.py +0 -0
  38. nomenklatura/matching/erun/countries.py +42 -0
  39. nomenklatura/matching/erun/identifiers.py +64 -0
  40. nomenklatura/matching/erun/misc.py +71 -0
  41. nomenklatura/matching/erun/model.py +110 -0
  42. nomenklatura/matching/erun/names.py +126 -0
  43. nomenklatura/matching/erun/train.py +135 -0
  44. nomenklatura/matching/erun/util.py +28 -0
  45. nomenklatura/matching/logic_v1/__init__.py +0 -0
  46. nomenklatura/matching/logic_v1/identifiers.py +104 -0
  47. nomenklatura/matching/logic_v1/model.py +76 -0
  48. nomenklatura/matching/logic_v1/multi.py +21 -0
  49. nomenklatura/matching/logic_v1/phonetic.py +142 -0
  50. nomenklatura/matching/logic_v2/__init__.py +0 -0
  51. nomenklatura/matching/logic_v2/identifiers.py +124 -0
  52. nomenklatura/matching/logic_v2/model.py +98 -0
  53. nomenklatura/matching/logic_v2/names/__init__.py +3 -0
  54. nomenklatura/matching/logic_v2/names/analysis.py +51 -0
  55. nomenklatura/matching/logic_v2/names/distance.py +181 -0
  56. nomenklatura/matching/logic_v2/names/magic.py +60 -0
  57. nomenklatura/matching/logic_v2/names/match.py +195 -0
  58. nomenklatura/matching/logic_v2/names/pairing.py +81 -0
  59. nomenklatura/matching/logic_v2/names/util.py +89 -0
  60. nomenklatura/matching/name_based/__init__.py +4 -0
  61. nomenklatura/matching/name_based/misc.py +86 -0
  62. nomenklatura/matching/name_based/model.py +59 -0
  63. nomenklatura/matching/name_based/names.py +59 -0
  64. nomenklatura/matching/pairs.py +42 -0
  65. nomenklatura/matching/regression_v1/__init__.py +0 -0
  66. nomenklatura/matching/regression_v1/misc.py +75 -0
  67. nomenklatura/matching/regression_v1/model.py +110 -0
  68. nomenklatura/matching/regression_v1/names.py +63 -0
  69. nomenklatura/matching/regression_v1/train.py +87 -0
  70. nomenklatura/matching/regression_v1/util.py +31 -0
  71. nomenklatura/matching/svm_v1/__init__.py +5 -0
  72. nomenklatura/matching/svm_v1/misc.py +94 -0
  73. nomenklatura/matching/svm_v1/model.py +168 -0
  74. nomenklatura/matching/svm_v1/names.py +81 -0
  75. nomenklatura/matching/svm_v1/train.py +186 -0
  76. nomenklatura/matching/svm_v1/util.py +30 -0
  77. nomenklatura/matching/types.py +227 -0
  78. nomenklatura/matching/util.py +62 -0
  79. nomenklatura/publish/__init__.py +0 -0
  80. nomenklatura/publish/dates.py +49 -0
  81. nomenklatura/publish/edges.py +32 -0
  82. nomenklatura/py.typed +0 -0
  83. nomenklatura/resolver/__init__.py +6 -0
  84. nomenklatura/resolver/common.py +2 -0
  85. nomenklatura/resolver/edge.py +107 -0
  86. nomenklatura/resolver/identifier.py +60 -0
  87. nomenklatura/resolver/linker.py +101 -0
  88. nomenklatura/resolver/resolver.py +565 -0
  89. nomenklatura/settings.py +17 -0
  90. nomenklatura/store/__init__.py +41 -0
  91. nomenklatura/store/base.py +130 -0
  92. nomenklatura/store/level.py +272 -0
  93. nomenklatura/store/memory.py +102 -0
  94. nomenklatura/store/redis_.py +131 -0
  95. nomenklatura/store/sql.py +219 -0
  96. nomenklatura/store/util.py +48 -0
  97. nomenklatura/store/versioned.py +371 -0
  98. nomenklatura/tui/__init__.py +17 -0
  99. nomenklatura/tui/app.py +294 -0
  100. nomenklatura/tui/app.tcss +52 -0
  101. nomenklatura/tui/comparison.py +81 -0
  102. nomenklatura/tui/util.py +35 -0
  103. nomenklatura/util.py +26 -0
  104. nomenklatura/versions.py +119 -0
  105. nomenklatura/wikidata/__init__.py +14 -0
  106. nomenklatura/wikidata/client.py +122 -0
  107. nomenklatura/wikidata/lang.py +94 -0
  108. nomenklatura/wikidata/model.py +139 -0
  109. nomenklatura/wikidata/props.py +70 -0
  110. nomenklatura/wikidata/qualified.py +49 -0
  111. nomenklatura/wikidata/query.py +66 -0
  112. nomenklatura/wikidata/value.py +87 -0
  113. nomenklatura/xref.py +125 -0
  114. nomenklatura_mpt-4.1.9.dist-info/METADATA +159 -0
  115. nomenklatura_mpt-4.1.9.dist-info/RECORD +118 -0
  116. nomenklatura_mpt-4.1.9.dist-info/WHEEL +4 -0
  117. nomenklatura_mpt-4.1.9.dist-info/entry_points.txt +3 -0
  118. nomenklatura_mpt-4.1.9.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,294 @@
1
+ import asyncio
2
+ from typing import Dict, Optional, Set, Tuple, cast
3
+
4
+ from rich.console import RenderableType
5
+ from rich.text import Text
6
+ from textual.app import App, ComposeResult
7
+ from textual.containers import Grid, VerticalScroll
8
+ from textual.screen import ModalScreen
9
+ from textual.widget import Widget
10
+ from textual.widgets import Button, Footer, Label, ListItem, ListView, Static
11
+
12
+ from followthemoney import DS, SE
13
+
14
+ from nomenklatura.judgement import Judgement
15
+ from nomenklatura.resolver import Resolver
16
+ from nomenklatura.resolver.edge import Edge
17
+ from nomenklatura.store import Store
18
+ from nomenklatura.tui.comparison import render_comparison
19
+
20
+ HISTORY_LENGTH = 20
21
+
22
+
23
+ class DedupeState(object):
24
+ def __init__(
25
+ self,
26
+ resolver: Resolver[SE],
27
+ store: Store[DS, SE],
28
+ url_base: Optional[str] = None,
29
+ ):
30
+ self.store = store
31
+ self.resolver = resolver
32
+ self.view = store.default_view(external=True)
33
+ self.url_base = url_base
34
+ self.latinize = False
35
+ self.message: Optional[str] = None
36
+ self.ignore: Set[Tuple[str, str]] = set()
37
+ self.left: Optional[SE] = None
38
+ self.right: Optional[SE] = None
39
+ self.score = 0.0
40
+ self.recents: Dict[str, SE] = dict()
41
+
42
+ def load(self) -> bool:
43
+ self.left = None
44
+ self.right = None
45
+ self.resolver.begin()
46
+ for left_id, right_id, score in self.resolver.get_candidates():
47
+ left_id = self.resolver.get_canonical(left_id)
48
+ right_id = self.resolver.get_canonical(right_id)
49
+ if (left_id, right_id) in self.ignore:
50
+ continue
51
+ if score is None:
52
+ self.ignore.add((left_id, right_id))
53
+ continue
54
+ if not self.resolver.check_candidate(left_id, right_id):
55
+ self.ignore.add((left_id, right_id))
56
+ continue
57
+ self.left = self.view.get_entity(left_id)
58
+ self.right = self.view.get_entity(right_id)
59
+ self.score = score
60
+ if self.left is not None and self.right is not None:
61
+ if self.left.schema == self.right.schema:
62
+ return True
63
+ if self.left.schema.can_match(self.right.schema):
64
+ return True
65
+ self.ignore.add((left_id, right_id))
66
+ return False
67
+
68
+ def decide(self, judgement: Judgement) -> None:
69
+ if self.left is not None and self.left.id is not None:
70
+ if self.right is not None and self.right.id is not None:
71
+ # Since we don't have an unresolved store as well as the resolved one,
72
+ # hold on to pre-merge entities to show in history.
73
+ self.recents[self.left.id] = self.left
74
+ self.recents[self.right.id] = self.right
75
+ canonical_id = self.resolver.decide(
76
+ self.left.id,
77
+ self.right.id,
78
+ judgement=judgement,
79
+ )
80
+ self.store.update(canonical_id)
81
+ self.resolver.commit()
82
+ self.load()
83
+
84
+ def edit(self, edge: Edge, judgement: Judgement) -> None:
85
+ self.resolver.decide(edge.source, edge.target, judgement)
86
+ self.store.update(edge.source)
87
+ self.store.update(edge.target)
88
+ self.resolver.commit()
89
+ self.load()
90
+
91
+
92
+ class DedupeAppWidget(Widget):
93
+ @property
94
+ def dedupe(self) -> DedupeState:
95
+ return cast(DedupeApp, self.app).dedupe
96
+
97
+
98
+ class HistoryItem(Static, DedupeAppWidget):
99
+ def __init__(self, edge: Edge) -> None:
100
+ self.edge = edge
101
+ source = self.dedupe.recents.get(edge.source.id, None)
102
+ target = self.dedupe.recents.get(edge.target.id, None)
103
+ if target is None:
104
+ target = self.dedupe.view.get_entity(edge.target.id)
105
+ source_str = f"src: {edge.source.id}"
106
+ if source:
107
+ source_str += f"\n {source.caption}"
108
+ target_str = f"tgt: {edge.target.id}"
109
+ if target:
110
+ target_str += f"\n {target.caption}"
111
+
112
+ content = (
113
+ f"{edge.created_at if edge.created_at else 'unknown time'}\n"
114
+ f"{source_str}\n"
115
+ f"{target_str}\n"
116
+ f"{edge.user} decided {edge.judgement.value}"
117
+ )
118
+ super().__init__(content)
119
+
120
+
121
+ class ConfirmEditModal(ModalScreen[bool]):
122
+ edge: Optional[Edge] = None
123
+ judgement: Optional[Judgement] = None
124
+
125
+ def compose(self) -> ComposeResult:
126
+ assert self.edge is not None
127
+ assert self.judgement is not None
128
+ message = f"Change {self.edge.source.id} -> {self.edge.target.id} to {self.judgement.value}?"
129
+ yield Grid(
130
+ Label(message, id="question"),
131
+ Button("Yes", variant="error", id="yes"),
132
+ Button("No", variant="primary", id="no"),
133
+ id="dialog",
134
+ )
135
+
136
+ def on_button_pressed(self, event: Button.Pressed) -> None:
137
+ if event.button.id == "yes":
138
+ self.dismiss(True)
139
+ else:
140
+ self.dismiss(False)
141
+
142
+
143
+ class HistoryListView(ListView):
144
+ BINDINGS = [
145
+ ("x", "positive", "Match"),
146
+ ("n", "negative", "No match"),
147
+ ("u", "unsure", "Unsure"),
148
+ ("d", "delete", "No judgement"),
149
+ ]
150
+
151
+ async def action_positive(self) -> None:
152
+ await self.trigger_edit(Judgement.POSITIVE)
153
+
154
+ async def action_negative(self) -> None:
155
+ await self.trigger_edit(Judgement.NEGATIVE)
156
+
157
+ async def action_unsure(self) -> None:
158
+ await self.trigger_edit(Judgement.UNSURE)
159
+
160
+ async def action_delete(self) -> None:
161
+ await self.trigger_edit(Judgement.NO_JUDGEMENT)
162
+
163
+ async def trigger_edit(self, judgement: Judgement) -> None:
164
+ selected = self.highlighted_child
165
+ if selected is None:
166
+ return
167
+ edge = selected.query_one(HistoryItem).edge
168
+ await cast(DedupeApp, self.app).edit(edge, judgement)
169
+
170
+
171
+ class HistoryWidget(DedupeAppWidget):
172
+ list_view: ListView
173
+ is_visible: bool = False
174
+
175
+ def on_mount(self) -> None:
176
+ self.border_title = "History"
177
+ self._apply_visibility()
178
+ self.reload_history()
179
+
180
+ def compose(self) -> ComposeResult:
181
+ self.list_view = HistoryListView()
182
+ yield Static(
183
+ (
184
+ "Tab to toggle between dedupe and history.\n"
185
+ "Arrow up/down to select history to edit."
186
+ ),
187
+ classes="help",
188
+ )
189
+ yield self.list_view
190
+
191
+ def reload_history(self) -> None:
192
+ if not self.is_visible:
193
+ return
194
+ self.list_view.clear()
195
+ for edge in self.dedupe.resolver.get_judgements(HISTORY_LENGTH):
196
+ self.list_view.append(ListItem(HistoryItem(edge)))
197
+ self.list_view.scroll_home(animate=False)
198
+
199
+ def toggle_visible(self) -> None:
200
+ self.is_visible = not self.is_visible
201
+ self._apply_visibility()
202
+ self.reload_history()
203
+
204
+ def _apply_visibility(self) -> None:
205
+ if self.is_visible:
206
+ self.styles.display = "block"
207
+ else:
208
+ self.styles.display = "none"
209
+
210
+
211
+ class CompareWidget(DedupeAppWidget, can_focus=True):
212
+ def render(self) -> RenderableType:
213
+ if self.dedupe.message is not None:
214
+ return Text(self.dedupe.message, justify="center")
215
+ if self.dedupe.left and self.dedupe.right:
216
+ return render_comparison(
217
+ self.dedupe.view,
218
+ self.dedupe.left,
219
+ self.dedupe.right,
220
+ self.dedupe.score,
221
+ latinize=self.dedupe.latinize,
222
+ url_base=self.dedupe.url_base,
223
+ )
224
+ return Text("No candidates.", justify="center")
225
+
226
+
227
+ class DedupeWidget(Widget):
228
+ def compose(self) -> ComposeResult:
229
+ yield VerticalScroll(CompareWidget())
230
+ yield HistoryWidget()
231
+
232
+
233
+ class DedupeApp(App[int]):
234
+ CSS_PATH = "app.tcss"
235
+ dedupe: DedupeState
236
+
237
+ BINDINGS = [
238
+ ("x", "positive", "Match"),
239
+ ("n", "negative", "No match"),
240
+ ("u", "unsure", "Unsure"),
241
+ ("l", "latinize", "Latinize"),
242
+ ("h", "history", "Toggle History"),
243
+ ("q", "exit_hard", "Quit"),
244
+ ]
245
+
246
+ async def decide(self, judgement: Judgement) -> None:
247
+ self.dedupe.decide(judgement)
248
+ self.force_render()
249
+
250
+ async def edit(self, edge: Edge, judgement: Judgement) -> None:
251
+ async def handle_confirmation(confirmed: bool | None) -> None:
252
+ if confirmed:
253
+ self.dedupe.edit(edge, judgement)
254
+ self.force_render()
255
+ else:
256
+ self.dedupe.message = "Canceled edit."
257
+ self.force_render()
258
+ await asyncio.sleep(1)
259
+ self.dedupe.message = None
260
+ self.force_render()
261
+
262
+ screen = ConfirmEditModal()
263
+ screen.edge = edge
264
+ screen.judgement = judgement
265
+ self.app.push_screen(screen, handle_confirmation)
266
+
267
+ def force_render(self) -> None:
268
+ self.query_one(CompareWidget).refresh(layout=True)
269
+ self.query_one(HistoryWidget).reload_history()
270
+ self.query_one(HistoryWidget).refresh(layout=True)
271
+
272
+ async def action_positive(self) -> None:
273
+ await self.decide(Judgement.POSITIVE)
274
+
275
+ async def action_negative(self) -> None:
276
+ await self.decide(Judgement.NEGATIVE)
277
+
278
+ async def action_unsure(self) -> None:
279
+ await self.decide(Judgement.UNSURE)
280
+
281
+ async def action_latinize(self) -> None:
282
+ self.dedupe.latinize = not self.dedupe.latinize
283
+ self.force_render()
284
+
285
+ async def action_history(self) -> None:
286
+ self.query_one(HistoryWidget).toggle_visible()
287
+
288
+ async def action_exit_hard(self) -> None:
289
+ self.exit(0)
290
+
291
+ def compose(self) -> ComposeResult:
292
+ self.dedupe.load()
293
+ yield DedupeWidget()
294
+ yield Footer()
@@ -0,0 +1,52 @@
1
+ ConfirmEditModal {
2
+ align: center middle;
3
+ }
4
+
5
+ #dialog {
6
+ grid-size: 2;
7
+ grid-gutter: 1 2;
8
+ grid-rows: 1fr 3;
9
+ padding: 0 1;
10
+ width: 60;
11
+ height: 11;
12
+ border: thick $background 80%;
13
+ background: $surface;
14
+ }
15
+
16
+ #question {
17
+ column-span: 2;
18
+ height: 1fr;
19
+ width: 1fr;
20
+ content-align: center middle;
21
+ }
22
+
23
+
24
+ HistoryItem {
25
+ border: solid white;
26
+ }
27
+
28
+ HistoryWidget {
29
+ width: 50;
30
+ border: solid white;
31
+ }
32
+
33
+ HistoryWidget > .help {
34
+ padding: 1
35
+ }
36
+
37
+ DedupeWidget {
38
+ layout: horizontal;
39
+ }
40
+
41
+ CompareWidget:focus {
42
+ background: #222222;
43
+ }
44
+
45
+ CompareWidget {
46
+ width: 1fr;
47
+ height: auto;
48
+ }
49
+
50
+ DedupeApp {
51
+ layout: vertical;
52
+ }
@@ -0,0 +1,81 @@
1
+ from typing import Optional, Union
2
+ from normality import latinize_text
3
+ from rich.table import Table
4
+ from rich.text import Text
5
+ from followthemoney import DS, registry, Property
6
+ from followthemoney import SE, StatementEntity as Entity
7
+
8
+ from nomenklatura.store import View
9
+ from nomenklatura.tui.util import comparison_props
10
+
11
+
12
+ def render_column(entity: Entity) -> Text:
13
+ return Text.assemble(
14
+ (entity.schema.label, "blue"), " [%s]" % entity.id, no_wrap=True
15
+ )
16
+
17
+
18
+ def render_values(
19
+ view: View[DS, SE], prop: Property, entity: SE, other: SE, latinize: bool
20
+ ) -> Text:
21
+ values = entity.get(prop, quiet=True)
22
+ other_values = other.get_type_values(prop.type)
23
+ text = Text()
24
+ for i, value in enumerate(sorted(values)):
25
+ caption = prop.type.caption(value)
26
+ if prop.type == registry.entity:
27
+ sub = view.get_entity(value)
28
+ if sub is not None:
29
+ caption = sub.caption
30
+ score = prop.type.compare_sets([value], other_values)
31
+ if latinize:
32
+ caption = latinize_text(caption) or caption
33
+ if prop.name == "wikidataId":
34
+ caption = f"https://wikidata.org/wiki/{value}"
35
+ style = "default"
36
+ if score > 0.7:
37
+ style = "orange1"
38
+ if score > 0.95:
39
+ style = "green1"
40
+ if caption is not None:
41
+ if i > 0:
42
+ text.append(" · ", "gray")
43
+ text.append(caption, style)
44
+ return text
45
+
46
+
47
+ def render_comparison(
48
+ view: View[DS, SE],
49
+ left: SE,
50
+ right: SE,
51
+ score: float,
52
+ latinize: bool = False,
53
+ url_base: Optional[str] = None,
54
+ ) -> Union[Table, Text]:
55
+ if left is None or right is None:
56
+ return Text("No candidates loaded.", justify="center")
57
+
58
+ table = Table(expand=True)
59
+ score_text = "Score: %.3f" % score
60
+ table.add_column(score_text, justify="right", no_wrap=True, ratio=2)
61
+ table.add_column(render_column(left), ratio=5)
62
+ table.add_column(render_column(right), ratio=5)
63
+
64
+ for prop in comparison_props(left, right):
65
+ label = Text(prop.label, "white bold")
66
+ left_text = render_values(view, prop, left, right, latinize)
67
+ right_text = render_values(view, prop, right, left, latinize)
68
+ table.add_row(label, left_text, right_text)
69
+
70
+ ds_label = Text("Sources", "grey bold")
71
+ ds_left = Text(", ".join(left.datasets))
72
+ ds_right = Text(", ".join(right.datasets))
73
+ table.add_row(ds_label, ds_left, ds_right)
74
+
75
+ if url_base is not None:
76
+ ds_label = Text("URL", "grey bold")
77
+ ds_left = Text(url_base % left.id)
78
+ ds_right = Text(url_base % right.id)
79
+ table.add_row(ds_label, ds_left, ds_right)
80
+
81
+ return table
@@ -0,0 +1,35 @@
1
+ from typing import Generator, Tuple
2
+ from followthemoney import registry, Property, SE
3
+
4
+ TYPE_ORDER = {
5
+ registry.name: -6,
6
+ registry.identifier: -5,
7
+ registry.date: -4,
8
+ registry.country: -3,
9
+ registry.string: -1,
10
+ registry.text: 3,
11
+ }
12
+
13
+
14
+ def comparison_props(left: SE, right: SE) -> Generator[Property, None, None]:
15
+ """Return an ordered list of properties to be shown in a comparison of
16
+ the two given entities."""
17
+ props = set(left.iterprops())
18
+ props.update(right.iterprops())
19
+ weights = {p.name: TYPE_ORDER.get(p.type, 0) for p in props}
20
+ for prop in props:
21
+ for schema in (left.schema, right.schema):
22
+ if prop.name in schema.featured:
23
+ weights[prop.name] -= 10
24
+
25
+ def sort_props(prop: Property) -> Tuple[int, str]:
26
+ return (weights[prop.name], prop.label)
27
+
28
+ for prop in sorted(props, key=sort_props):
29
+ if prop.hidden:
30
+ continue
31
+ if prop.type.matchable and not prop.matchable:
32
+ continue
33
+ # if prop.type == registry.entity:
34
+ # continue
35
+ yield prop
nomenklatura/util.py ADDED
@@ -0,0 +1,26 @@
1
+ import re
2
+ import os
3
+ from pathlib import Path
4
+ from collections.abc import Mapping
5
+ from typing import Iterable, TypeVar, List, Union, Optional
6
+ from rigour.time import iso_datetime
7
+
8
+ T = TypeVar("T")
9
+ DATA_PATH = Path(os.path.join(os.path.dirname(__file__), "data")).resolve()
10
+ ID_CLEAN = re.compile(r"[^A-Z0-9]+", re.UNICODE)
11
+ HeadersType = Optional[Mapping[str, Union[str, bytes, None]]]
12
+
13
+
14
+ def iso_to_version(value: str) -> Optional[str]:
15
+ ## Phase this out - it won't be used in new FtM metadata, is used by yente
16
+ dt = iso_datetime(value)
17
+ if dt is not None:
18
+ return dt.strftime("%Y%m%d%H%M%S")
19
+ return None
20
+
21
+
22
+ def unroll(values: Iterable[Iterable[T]]) -> List[T]:
23
+ unrolled: List[T] = []
24
+ for sub in values:
25
+ unrolled.extend(sub)
26
+ return unrolled
@@ -0,0 +1,119 @@
1
+ import os
2
+ import json
3
+ import string
4
+ import random
5
+ from rigour.time import utc_now
6
+ from typing import Any, List, Iterator, Optional
7
+ from datetime import datetime
8
+
9
+ ALPHABET = string.ascii_lowercase
10
+
11
+
12
+ class Version(object):
13
+ """A class to represent a dataset version, which consists of a timestamp
14
+ and a string tag."""
15
+
16
+ __slots__ = ["dt", "tag"]
17
+
18
+ def __init__(self, dt: datetime, tag: str) -> None:
19
+ self.dt: datetime = dt
20
+ self.tag: str = tag
21
+
22
+ @classmethod
23
+ def new(cls, tag: Optional[str] = None) -> "Version":
24
+ now = utc_now().replace(tzinfo=None)
25
+
26
+ if tag is None:
27
+ # This keeps the tag sortable but short.
28
+ tag_num = (now.microsecond // 1000) * 10
29
+ tag_num_ = tag_num + random.randint(0, 9)
30
+ tag = cls._tag_encode(int(tag_num_))
31
+
32
+ tag = tag.ljust(3, "x")[:3]
33
+ now = now.replace(microsecond=0)
34
+ return cls(now, tag)
35
+
36
+ @classmethod
37
+ def from_string(cls, id: str) -> "Version":
38
+ if "-" not in id:
39
+ raise ValueError(f"Invalid dataset version: {id}")
40
+ ts, tag = id.split("-", 1)
41
+ dt = datetime.strptime(ts, "%Y%m%d%H%M%S")
42
+ dt = dt.replace(tzinfo=None)
43
+ return cls(dt, tag)
44
+
45
+ @classmethod
46
+ def _tag_encode(cls, number: int, alphabet: str = ALPHABET) -> str:
47
+ """Converts an integer to a base36 string."""
48
+ assert number >= 0, "number must be positive"
49
+ if 0 <= number < len(alphabet):
50
+ return alphabet[number]
51
+
52
+ encoded = ""
53
+ while number != 0:
54
+ number, i = divmod(number, len(alphabet))
55
+ encoded = alphabet[i] + encoded
56
+ return encoded
57
+
58
+ @classmethod
59
+ def from_env(cls, name: str) -> "Version":
60
+ id = os.environ.get(name)
61
+ if id is None:
62
+ return cls.new()
63
+ return cls.from_string(id)
64
+
65
+ @property
66
+ def id(self) -> str:
67
+ return f"{self.dt.strftime('%Y%m%d%H%M%S')}-{self.tag}"
68
+
69
+ def __str__(self) -> str:
70
+ return self.id
71
+
72
+ def __repr__(self) -> str:
73
+ return f"Version({self.id})"
74
+
75
+ def __eq__(self, other: Any) -> bool:
76
+ return self.id == str(other)
77
+
78
+ def __hash__(self) -> int:
79
+ return hash(self.id)
80
+
81
+
82
+ class VersionHistory(object):
83
+ """A class to represent a history of dataset versions."""
84
+
85
+ LENGTH = 100
86
+
87
+ def __init__(self, items: List[Version], max_length: int = LENGTH) -> None:
88
+ self.items = items
89
+ self.max_length = max_length
90
+
91
+ def append(self, version: Version) -> "VersionHistory":
92
+ """Creates a new history with the given RunID appended."""
93
+ items = list(self.items)
94
+ items.append(version)
95
+ return VersionHistory(items[-self.max_length :])
96
+
97
+ @property
98
+ def latest(self) -> Optional[Version]:
99
+ if not len(self.items):
100
+ return None
101
+ return self.items[-1]
102
+
103
+ def to_json(self) -> str:
104
+ """Return a JSON representation of the version history."""
105
+ items = [str(run) for run in self.items[-self.LENGTH :]]
106
+ return json.dumps({"items": items})
107
+
108
+ @classmethod
109
+ def from_json(cls, data: str) -> "VersionHistory":
110
+ """Create a run history from a JSON representation."""
111
+ items = json.loads(data).get("items", [])
112
+ items = [Version.from_string(item) for item in items]
113
+ return cls(items)
114
+
115
+ def __iter__(self) -> Iterator[Version]:
116
+ return iter(self.items)
117
+
118
+ def __len__(self) -> int:
119
+ return len(self.items)
@@ -0,0 +1,14 @@
1
+ from nomenklatura.wikidata.client import WikidataClient
2
+ from nomenklatura.wikidata.lang import LangText
3
+ from nomenklatura.wikidata.model import Item, Claim
4
+ from nomenklatura.wikidata.query import SparqlBinding, SparqlResponse, SparqlValue
5
+
6
+ __all__ = [
7
+ "WikidataClient",
8
+ "LangText",
9
+ "Item",
10
+ "Claim",
11
+ "SparqlBinding",
12
+ "SparqlResponse",
13
+ "SparqlValue",
14
+ ]