personal_knowledge_library 3.1.2__py3-none-any.whl → 3.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of personal_knowledge_library might be problematic. Click here for more details.
- knowledge/__init__.py +1 -1
- knowledge/base/__init__.py +5 -1
- knowledge/base/language.py +2 -2
- knowledge/base/ontology.py +1 -1
- knowledge/base/response.py +361 -0
- knowledge/services/__init__.py +3 -1
- knowledge/services/asyncio/graph.py +116 -1
- knowledge/services/asyncio/search.py +2 -2
- knowledge/services/graph.py +226 -6
- knowledge/services/ontology.py +7 -1
- knowledge/services/search.py +6 -6
- knowledge/services/tenant.py +2 -1
- knowledge/services/users.py +2 -0
- knowledge/utils/diff.py +598 -0
- knowledge/utils/import_format.py +8 -3
- {personal_knowledge_library-3.1.2.dist-info → personal_knowledge_library-3.2.1.dist-info}/METADATA +1 -1
- {personal_knowledge_library-3.1.2.dist-info → personal_knowledge_library-3.2.1.dist-info}/RECORD +19 -17
- {personal_knowledge_library-3.1.2.dist-info → personal_knowledge_library-3.2.1.dist-info}/WHEEL +1 -1
- {personal_knowledge_library-3.1.2.dist-info → personal_knowledge_library-3.2.1.dist-info}/LICENSE +0 -0
knowledge/utils/diff.py
ADDED
|
@@ -0,0 +1,598 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Copyright © 2025-present Wacom. All rights reserved.
|
|
3
|
+
from typing import Optional, Any, List, Dict, Tuple
|
|
4
|
+
|
|
5
|
+
from knowledge.base.entity import Label, Description
|
|
6
|
+
from knowledge.base.ontology import ThingObject
|
|
7
|
+
from knowledge.services.asyncio.graph import AsyncWacomKnowledgeService
|
|
8
|
+
from knowledge.services.graph import WacomKnowledgeService
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def diff_entities(
|
|
12
|
+
client: WacomKnowledgeService,
|
|
13
|
+
file_thing: ThingObject,
|
|
14
|
+
kg_thing: ThingObject,
|
|
15
|
+
kg_things: Optional[Dict[str, ThingObject]] = None,
|
|
16
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]:
|
|
17
|
+
"""
|
|
18
|
+
Check the differences between the two entities.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
client: WacomKnowledgeService
|
|
23
|
+
The client to use.
|
|
24
|
+
file_thing: ThingObject
|
|
25
|
+
The thing to check.
|
|
26
|
+
kg_thing: ThingObject
|
|
27
|
+
The knowledge graph entity to check.
|
|
28
|
+
kg_things: Optional[Dict[str, ThingObject]]
|
|
29
|
+
The entities in the knowledge graph.
|
|
30
|
+
|
|
31
|
+
Returns
|
|
32
|
+
-------
|
|
33
|
+
differences: List[Dict[str, Any]]
|
|
34
|
+
The differences.
|
|
35
|
+
difference_data_properties: List[Dict[str, Any]]
|
|
36
|
+
The differences in the data properties.
|
|
37
|
+
difference_object_properties: List[Dict[str, Any]]
|
|
38
|
+
The differences in the object properties.
|
|
39
|
+
"""
|
|
40
|
+
# Different number of descriptions
|
|
41
|
+
differences: List[Dict[str, Any]] = []
|
|
42
|
+
# Check if the descriptions are different
|
|
43
|
+
if len(file_thing.description) != len(kg_thing.description):
|
|
44
|
+
differences.append(
|
|
45
|
+
{
|
|
46
|
+
"concept_type": file_thing.concept_type.name,
|
|
47
|
+
"type": "description",
|
|
48
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
49
|
+
"uri": kg_thing.uri,
|
|
50
|
+
"kg": len(file_thing.description),
|
|
51
|
+
"file": len(kg_thing.description),
|
|
52
|
+
}
|
|
53
|
+
)
|
|
54
|
+
for desc_file in file_thing.description:
|
|
55
|
+
kg_desc: Optional[Description] = kg_thing.description_lang(desc_file.language_code)
|
|
56
|
+
if kg_desc is None or desc_file.content != kg_desc.content:
|
|
57
|
+
differences.append(
|
|
58
|
+
{
|
|
59
|
+
"concept_type": file_thing.concept_type.name,
|
|
60
|
+
"type": "Description content" if kg_desc else "Missing description",
|
|
61
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
62
|
+
"uri": kg_thing.uri,
|
|
63
|
+
"kg": kg_desc.content if kg_desc else "",
|
|
64
|
+
"file": desc_file.content,
|
|
65
|
+
}
|
|
66
|
+
)
|
|
67
|
+
# Difference in vector index
|
|
68
|
+
if file_thing.use_vector_index != kg_thing.use_vector_index:
|
|
69
|
+
differences.append(
|
|
70
|
+
{
|
|
71
|
+
"concept_type": file_thing.concept_type.name,
|
|
72
|
+
"type": "Vector index",
|
|
73
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
74
|
+
"uri": kg_thing.uri,
|
|
75
|
+
"kg": kg_thing.use_vector_index,
|
|
76
|
+
"file": file_thing.use_vector_index,
|
|
77
|
+
}
|
|
78
|
+
)
|
|
79
|
+
# Difference in NEL index
|
|
80
|
+
if file_thing.use_for_nel != kg_thing.use_for_nel:
|
|
81
|
+
differences.append(
|
|
82
|
+
{
|
|
83
|
+
"concept_type": file_thing.concept_type.name,
|
|
84
|
+
"type": "NEL index",
|
|
85
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
86
|
+
"uri": kg_thing.uri,
|
|
87
|
+
"kg": kg_thing.use_for_nel,
|
|
88
|
+
"file": file_thing.use_for_nel,
|
|
89
|
+
}
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Different number of labels
|
|
93
|
+
if len(file_thing.label) != len(kg_thing.label):
|
|
94
|
+
differences.append(
|
|
95
|
+
{
|
|
96
|
+
"concept_type": file_thing.concept_type.name,
|
|
97
|
+
"type": "Number of labels",
|
|
98
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
99
|
+
"uri": kg_thing.uri,
|
|
100
|
+
"kg": len(kg_thing.label),
|
|
101
|
+
"file": len(file_thing.label),
|
|
102
|
+
}
|
|
103
|
+
)
|
|
104
|
+
# Check if the labels are different
|
|
105
|
+
for label_file in file_thing.label:
|
|
106
|
+
label_kg_lang: Optional[Label] = kg_thing.label_lang(label_file.language_code)
|
|
107
|
+
if label_kg_lang is None or label_file.content != label_kg_lang.content:
|
|
108
|
+
differences.append(
|
|
109
|
+
{
|
|
110
|
+
"concept_type": file_thing.concept_type.name,
|
|
111
|
+
"type": "Label content" if label_kg_lang else "Missing label",
|
|
112
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
113
|
+
"uri": kg_thing.uri,
|
|
114
|
+
"kg": label_kg_lang.content if label_kg_lang else "",
|
|
115
|
+
"file": kg_thing.label[0].content,
|
|
116
|
+
}
|
|
117
|
+
)
|
|
118
|
+
# Different number of aliases
|
|
119
|
+
if len(file_thing.alias) != len(kg_thing.alias):
|
|
120
|
+
differences.append(
|
|
121
|
+
{
|
|
122
|
+
"concept_type": file_thing.concept_type.name,
|
|
123
|
+
"type": "Number of aliases",
|
|
124
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
125
|
+
"uri": kg_thing.uri,
|
|
126
|
+
"kg": len(file_thing.alias),
|
|
127
|
+
"file": len(kg_thing.alias),
|
|
128
|
+
}
|
|
129
|
+
)
|
|
130
|
+
# Check if the aliases are different
|
|
131
|
+
for alias_file in file_thing.alias:
|
|
132
|
+
alias_kg_lang = kg_thing.alias_lang(alias_file.language_code)
|
|
133
|
+
if alias_file.content not in [alias.content for alias in alias_kg_lang]:
|
|
134
|
+
differences.append(
|
|
135
|
+
{
|
|
136
|
+
"concept_type": file_thing.concept_type.name,
|
|
137
|
+
"type": "Alias content",
|
|
138
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
139
|
+
"uri": kg_thing.uri,
|
|
140
|
+
"kg": ", ".join([alias.content for alias in alias_kg_lang]),
|
|
141
|
+
"file": alias_file.content,
|
|
142
|
+
}
|
|
143
|
+
)
|
|
144
|
+
difference_data_properties: List[Dict[str, Any]] = []
|
|
145
|
+
# If the data properties are different
|
|
146
|
+
if len(file_thing.data_properties) != len(kg_thing.data_properties):
|
|
147
|
+
difference_data_properties.append(
|
|
148
|
+
{
|
|
149
|
+
"concept_type": file_thing.concept_type.name,
|
|
150
|
+
"type": "data properties",
|
|
151
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
152
|
+
"uri": kg_thing.uri,
|
|
153
|
+
"kg": len(file_thing.data_properties),
|
|
154
|
+
"file": len(kg_thing.data_properties),
|
|
155
|
+
}
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
for prop, data_properties in file_thing.data_properties.items():
|
|
159
|
+
if prop not in kg_thing.data_properties:
|
|
160
|
+
difference_data_properties.append(
|
|
161
|
+
{
|
|
162
|
+
"concept_type": file_thing.concept_type.name,
|
|
163
|
+
"type": "missing data properties",
|
|
164
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
165
|
+
"uri": kg_thing.uri,
|
|
166
|
+
"kg": None,
|
|
167
|
+
"file": prop,
|
|
168
|
+
}
|
|
169
|
+
)
|
|
170
|
+
continue
|
|
171
|
+
if len(data_properties) != len(kg_thing.data_properties.get(prop, [])):
|
|
172
|
+
difference_data_properties.append(
|
|
173
|
+
{
|
|
174
|
+
"concept_type": file_thing.concept_type.name,
|
|
175
|
+
"type": "Number of data properties values",
|
|
176
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
177
|
+
"uri": kg_thing.uri,
|
|
178
|
+
"kg": len(data_properties),
|
|
179
|
+
"file": len(kg_thing.data_properties.get(prop, [])),
|
|
180
|
+
}
|
|
181
|
+
)
|
|
182
|
+
for dp in data_properties:
|
|
183
|
+
if prop not in kg_thing.data_properties:
|
|
184
|
+
difference_data_properties.append(
|
|
185
|
+
{
|
|
186
|
+
"concept_type": file_thing.concept_type.name,
|
|
187
|
+
"type": "Missing data properties",
|
|
188
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
189
|
+
"uri": kg_thing.uri,
|
|
190
|
+
"kg": "",
|
|
191
|
+
"file": dp.value,
|
|
192
|
+
}
|
|
193
|
+
)
|
|
194
|
+
elif dp.value not in [d.value for d in kg_thing.data_properties.get(prop)]:
|
|
195
|
+
difference_data_properties.append(
|
|
196
|
+
{
|
|
197
|
+
"concept_type": file_thing.concept_type.name,
|
|
198
|
+
"type": "Different data property values",
|
|
199
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
200
|
+
"uri": kg_thing.uri,
|
|
201
|
+
"kg": ", ".join([d.value for d in kg_thing.data_properties.get(prop)]),
|
|
202
|
+
"file": dp.value,
|
|
203
|
+
}
|
|
204
|
+
)
|
|
205
|
+
difference_object_properties: List[Dict[str, Any]] = []
|
|
206
|
+
if kg_things:
|
|
207
|
+
kg_thing.object_properties = client.relations(kg_thing.uri)
|
|
208
|
+
for rel_type, _ in file_thing.object_properties.items():
|
|
209
|
+
# Check if the object property is missing
|
|
210
|
+
if rel_type not in kg_thing.object_properties:
|
|
211
|
+
difference_object_properties.append(
|
|
212
|
+
{
|
|
213
|
+
"concept_type": file_thing.concept_type.name,
|
|
214
|
+
"type": "Object property missing",
|
|
215
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
216
|
+
"uri": kg_thing.uri,
|
|
217
|
+
"kg": "",
|
|
218
|
+
"file": rel_type.iri,
|
|
219
|
+
}
|
|
220
|
+
)
|
|
221
|
+
else:
|
|
222
|
+
# Check if the target entity is different (incoming relations)
|
|
223
|
+
for file_target in file_thing.object_properties[rel_type].incoming_relations:
|
|
224
|
+
ref_obj: Optional[ThingObject] = kg_things.get(file_target)
|
|
225
|
+
uris_kg: List[str] = [
|
|
226
|
+
t.uri if isinstance(t, ThingObject) else t
|
|
227
|
+
for t in kg_thing.object_properties[rel_type].incoming_relations
|
|
228
|
+
]
|
|
229
|
+
if ref_obj is None:
|
|
230
|
+
difference_object_properties.append(
|
|
231
|
+
{
|
|
232
|
+
"concept_type": file_thing.concept_type.name,
|
|
233
|
+
"type": "Object properties target missing",
|
|
234
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
235
|
+
"uri": kg_thing.uri,
|
|
236
|
+
"kg": "",
|
|
237
|
+
"file": file_target,
|
|
238
|
+
}
|
|
239
|
+
)
|
|
240
|
+
elif ref_obj.uri not in uris_kg:
|
|
241
|
+
difference_object_properties.append(
|
|
242
|
+
{
|
|
243
|
+
"concept_type": file_thing.concept_type.name,
|
|
244
|
+
"type": "Object properties target not linked",
|
|
245
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
246
|
+
"uri": kg_thing.uri,
|
|
247
|
+
"kg": "",
|
|
248
|
+
"file": f"{ref_obj.uri} (reference id: {ref_obj.default_source_reference_id()})",
|
|
249
|
+
}
|
|
250
|
+
)
|
|
251
|
+
# Check if the target entity is different (outgoing relations)
|
|
252
|
+
for file_target in file_thing.object_properties[rel_type].outgoing_relations:
|
|
253
|
+
ref_obj: Optional[ThingObject] = kg_things.get(file_target)
|
|
254
|
+
uris_kg: List[str] = [
|
|
255
|
+
t.uri if isinstance(t, ThingObject) else t
|
|
256
|
+
for t in kg_thing.object_properties[rel_type].outgoing_relations
|
|
257
|
+
]
|
|
258
|
+
if ref_obj is None:
|
|
259
|
+
difference_object_properties.append(
|
|
260
|
+
{
|
|
261
|
+
"concept_type": file_thing.concept_type.name,
|
|
262
|
+
"type": "Object properties target missing",
|
|
263
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
264
|
+
"uri": kg_thing.uri,
|
|
265
|
+
"kg": "",
|
|
266
|
+
"file": file_target,
|
|
267
|
+
}
|
|
268
|
+
)
|
|
269
|
+
elif ref_obj.uri not in uris_kg:
|
|
270
|
+
difference_object_properties.append(
|
|
271
|
+
{
|
|
272
|
+
"concept_type": file_thing.concept_type.name,
|
|
273
|
+
"type": "Object properties target not linked",
|
|
274
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
275
|
+
"uri": kg_thing.uri,
|
|
276
|
+
"kg": "",
|
|
277
|
+
"file": f"{ref_obj.uri} (reference id: {ref_obj.default_source_reference_id()})",
|
|
278
|
+
}
|
|
279
|
+
)
|
|
280
|
+
return differences, difference_data_properties, difference_object_properties
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
async def diff_entities_async(
|
|
284
|
+
client: AsyncWacomKnowledgeService,
|
|
285
|
+
file_thing: ThingObject,
|
|
286
|
+
kg_thing: ThingObject,
|
|
287
|
+
kg_things: Optional[Dict[str, ThingObject]] = None,
|
|
288
|
+
) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]:
|
|
289
|
+
"""
|
|
290
|
+
Check the differences between the two entities.
|
|
291
|
+
|
|
292
|
+
Parameters
|
|
293
|
+
----------
|
|
294
|
+
client: WacomKnowledgeService
|
|
295
|
+
The client to use.
|
|
296
|
+
file_thing: ThingObject
|
|
297
|
+
The thing to check.
|
|
298
|
+
kg_thing: ThingObject
|
|
299
|
+
The knowledge graph entity to check.
|
|
300
|
+
kg_things: Optional[Dict[str, ThingObject]]
|
|
301
|
+
The entities in the knowledge graph.
|
|
302
|
+
|
|
303
|
+
Returns
|
|
304
|
+
-------
|
|
305
|
+
differences: List[Dict[str, Any]]
|
|
306
|
+
The differences.
|
|
307
|
+
difference_data_properties: List[Dict[str, Any]]
|
|
308
|
+
The differences in the data properties.
|
|
309
|
+
difference_object_properties: List[Dict[str, Any]]
|
|
310
|
+
The differences in the object properties.
|
|
311
|
+
"""
|
|
312
|
+
# Different number of descriptions
|
|
313
|
+
differences: List[Dict[str, Any]] = []
|
|
314
|
+
# Check if the descriptions are different
|
|
315
|
+
if len(file_thing.description) != len(kg_thing.description):
|
|
316
|
+
differences.append(
|
|
317
|
+
{
|
|
318
|
+
"concept_type": file_thing.concept_type.name,
|
|
319
|
+
"type": "description",
|
|
320
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
321
|
+
"uri": kg_thing.uri,
|
|
322
|
+
"kg": len(file_thing.description),
|
|
323
|
+
"file": len(kg_thing.description),
|
|
324
|
+
}
|
|
325
|
+
)
|
|
326
|
+
for desc_file in file_thing.description:
|
|
327
|
+
kg_desc: Optional[Description] = kg_thing.description_lang(desc_file.language_code)
|
|
328
|
+
if kg_desc is None or desc_file.content != kg_desc.content:
|
|
329
|
+
differences.append(
|
|
330
|
+
{
|
|
331
|
+
"concept_type": file_thing.concept_type.name,
|
|
332
|
+
"type": "Description content" if kg_desc else "Missing description",
|
|
333
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
334
|
+
"uri": kg_thing.uri,
|
|
335
|
+
"kg": kg_desc.content if kg_desc else "",
|
|
336
|
+
"file": desc_file.content,
|
|
337
|
+
}
|
|
338
|
+
)
|
|
339
|
+
# Difference in vector index
|
|
340
|
+
if file_thing.use_vector_index != kg_thing.use_vector_index:
|
|
341
|
+
differences.append(
|
|
342
|
+
{
|
|
343
|
+
"concept_type": file_thing.concept_type.name,
|
|
344
|
+
"type": "Vector index",
|
|
345
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
346
|
+
"uri": kg_thing.uri,
|
|
347
|
+
"kg": kg_thing.use_vector_index,
|
|
348
|
+
"file": file_thing.use_vector_index,
|
|
349
|
+
}
|
|
350
|
+
)
|
|
351
|
+
# Difference in NEL index
|
|
352
|
+
if file_thing.use_for_nel != kg_thing.use_for_nel:
|
|
353
|
+
differences.append(
|
|
354
|
+
{
|
|
355
|
+
"concept_type": file_thing.concept_type.name,
|
|
356
|
+
"type": "NEL index",
|
|
357
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
358
|
+
"uri": kg_thing.uri,
|
|
359
|
+
"kg": kg_thing.use_for_nel,
|
|
360
|
+
"file": file_thing.use_for_nel,
|
|
361
|
+
}
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
# Different number of labels
|
|
365
|
+
if len(file_thing.label) != len(kg_thing.label):
|
|
366
|
+
differences.append(
|
|
367
|
+
{
|
|
368
|
+
"concept_type": file_thing.concept_type.name,
|
|
369
|
+
"type": "Number of labels",
|
|
370
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
371
|
+
"uri": kg_thing.uri,
|
|
372
|
+
"kg": len(kg_thing.label),
|
|
373
|
+
"file": len(file_thing.label),
|
|
374
|
+
}
|
|
375
|
+
)
|
|
376
|
+
# Check if the labels are different
|
|
377
|
+
for label_file in file_thing.label:
|
|
378
|
+
label_kg_lang: Optional[Label] = kg_thing.label_lang(label_file.language_code)
|
|
379
|
+
if label_kg_lang is None or label_file.content != label_kg_lang.content:
|
|
380
|
+
differences.append(
|
|
381
|
+
{
|
|
382
|
+
"concept_type": file_thing.concept_type.name,
|
|
383
|
+
"type": "Label content" if label_kg_lang else "Missing label",
|
|
384
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
385
|
+
"uri": kg_thing.uri,
|
|
386
|
+
"kg": label_kg_lang.content if label_kg_lang else "",
|
|
387
|
+
"file": kg_thing.label[0].content,
|
|
388
|
+
}
|
|
389
|
+
)
|
|
390
|
+
# Different number of aliases
|
|
391
|
+
if len(file_thing.alias) != len(kg_thing.alias):
|
|
392
|
+
differences.append(
|
|
393
|
+
{
|
|
394
|
+
"concept_type": file_thing.concept_type.name,
|
|
395
|
+
"type": "Number of aliases",
|
|
396
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
397
|
+
"uri": kg_thing.uri,
|
|
398
|
+
"kg": len(file_thing.alias),
|
|
399
|
+
"file": len(kg_thing.alias),
|
|
400
|
+
}
|
|
401
|
+
)
|
|
402
|
+
# Check if the aliases are different
|
|
403
|
+
for alias_file in file_thing.alias:
|
|
404
|
+
alias_kg_lang = kg_thing.alias_lang(alias_file.language_code)
|
|
405
|
+
if alias_file.content not in [alias.content for alias in alias_kg_lang]:
|
|
406
|
+
differences.append(
|
|
407
|
+
{
|
|
408
|
+
"concept_type": file_thing.concept_type.name,
|
|
409
|
+
"type": "Alias content",
|
|
410
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
411
|
+
"uri": kg_thing.uri,
|
|
412
|
+
"kg": ", ".join([alias.content for alias in alias_kg_lang]),
|
|
413
|
+
"file": alias_file.content,
|
|
414
|
+
}
|
|
415
|
+
)
|
|
416
|
+
difference_data_properties: List[Dict[str, Any]] = []
|
|
417
|
+
# If the data properties are different
|
|
418
|
+
if len(file_thing.data_properties) != len(kg_thing.data_properties):
|
|
419
|
+
difference_data_properties.append(
|
|
420
|
+
{
|
|
421
|
+
"concept_type": file_thing.concept_type.name,
|
|
422
|
+
"type": "data properties",
|
|
423
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
424
|
+
"uri": kg_thing.uri,
|
|
425
|
+
"kg": len(file_thing.data_properties),
|
|
426
|
+
"file": len(kg_thing.data_properties),
|
|
427
|
+
}
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
for prop, data_properties in file_thing.data_properties.items():
|
|
431
|
+
if prop not in kg_thing.data_properties:
|
|
432
|
+
difference_data_properties.append(
|
|
433
|
+
{
|
|
434
|
+
"concept_type": file_thing.concept_type.name,
|
|
435
|
+
"type": "missing data properties",
|
|
436
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
437
|
+
"uri": kg_thing.uri,
|
|
438
|
+
"kg": None,
|
|
439
|
+
"file": prop,
|
|
440
|
+
}
|
|
441
|
+
)
|
|
442
|
+
continue
|
|
443
|
+
if len(data_properties) != len(kg_thing.data_properties.get(prop, [])):
|
|
444
|
+
difference_data_properties.append(
|
|
445
|
+
{
|
|
446
|
+
"concept_type": file_thing.concept_type.name,
|
|
447
|
+
"type": "Number of data properties values",
|
|
448
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
449
|
+
"uri": kg_thing.uri,
|
|
450
|
+
"kg": len(data_properties),
|
|
451
|
+
"file": len(kg_thing.data_properties.get(prop, [])),
|
|
452
|
+
}
|
|
453
|
+
)
|
|
454
|
+
for dp in data_properties:
|
|
455
|
+
if prop not in kg_thing.data_properties:
|
|
456
|
+
difference_data_properties.append(
|
|
457
|
+
{
|
|
458
|
+
"concept_type": file_thing.concept_type.name,
|
|
459
|
+
"type": "Missing data properties",
|
|
460
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
461
|
+
"uri": kg_thing.uri,
|
|
462
|
+
"kg": "",
|
|
463
|
+
"file": dp.value,
|
|
464
|
+
}
|
|
465
|
+
)
|
|
466
|
+
elif dp.value not in [d.value for d in kg_thing.data_properties.get(prop)]:
|
|
467
|
+
difference_data_properties.append(
|
|
468
|
+
{
|
|
469
|
+
"concept_type": file_thing.concept_type.name,
|
|
470
|
+
"type": "Different data property values",
|
|
471
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
472
|
+
"uri": kg_thing.uri,
|
|
473
|
+
"kg": ", ".join([d.value for d in kg_thing.data_properties.get(prop)]),
|
|
474
|
+
"file": dp.value,
|
|
475
|
+
}
|
|
476
|
+
)
|
|
477
|
+
difference_object_properties: List[Dict[str, Any]] = []
|
|
478
|
+
if kg_things:
|
|
479
|
+
kg_thing.object_properties = await client.relations(kg_thing.uri)
|
|
480
|
+
for rel_type, _ in file_thing.object_properties.items():
|
|
481
|
+
# Check if the object property is missing
|
|
482
|
+
if rel_type not in kg_thing.object_properties:
|
|
483
|
+
difference_object_properties.append(
|
|
484
|
+
{
|
|
485
|
+
"concept_type": file_thing.concept_type.name,
|
|
486
|
+
"type": "Object property missing",
|
|
487
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
488
|
+
"uri": kg_thing.uri,
|
|
489
|
+
"kg": "",
|
|
490
|
+
"file": rel_type.iri,
|
|
491
|
+
}
|
|
492
|
+
)
|
|
493
|
+
else:
|
|
494
|
+
# Check if the target entity is different (incoming relations)
|
|
495
|
+
for file_target in file_thing.object_properties[rel_type].incoming_relations:
|
|
496
|
+
ref_obj: Optional[ThingObject] = kg_things.get(file_target)
|
|
497
|
+
uris_kg: List[str] = [
|
|
498
|
+
t.uri if isinstance(t, ThingObject) else t
|
|
499
|
+
for t in kg_thing.object_properties[rel_type].incoming_relations
|
|
500
|
+
]
|
|
501
|
+
if ref_obj is None:
|
|
502
|
+
difference_object_properties.append(
|
|
503
|
+
{
|
|
504
|
+
"concept_type": file_thing.concept_type.name,
|
|
505
|
+
"type": "Object properties target missing",
|
|
506
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
507
|
+
"uri": kg_thing.uri,
|
|
508
|
+
"kg": "",
|
|
509
|
+
"file": file_target,
|
|
510
|
+
}
|
|
511
|
+
)
|
|
512
|
+
elif ref_obj.uri not in uris_kg:
|
|
513
|
+
difference_object_properties.append(
|
|
514
|
+
{
|
|
515
|
+
"concept_type": file_thing.concept_type.name,
|
|
516
|
+
"type": "Object properties target not linked",
|
|
517
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
518
|
+
"uri": kg_thing.uri,
|
|
519
|
+
"kg": "",
|
|
520
|
+
"file": f"{ref_obj.uri} (reference id: {ref_obj.default_source_reference_id()})",
|
|
521
|
+
}
|
|
522
|
+
)
|
|
523
|
+
# Check if the target entity is different (outgoing relations)
|
|
524
|
+
for file_target in file_thing.object_properties[rel_type].outgoing_relations:
|
|
525
|
+
ref_obj: Optional[ThingObject] = kg_things.get(file_target)
|
|
526
|
+
uris_kg: List[str] = [
|
|
527
|
+
t.uri if isinstance(t, ThingObject) else t
|
|
528
|
+
for t in kg_thing.object_properties[rel_type].outgoing_relations
|
|
529
|
+
]
|
|
530
|
+
if ref_obj is None:
|
|
531
|
+
difference_object_properties.append(
|
|
532
|
+
{
|
|
533
|
+
"concept_type": file_thing.concept_type.name,
|
|
534
|
+
"type": "Object properties target missing",
|
|
535
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
536
|
+
"uri": kg_thing.uri,
|
|
537
|
+
"kg": "",
|
|
538
|
+
"file": file_target,
|
|
539
|
+
}
|
|
540
|
+
)
|
|
541
|
+
elif ref_obj.uri not in uris_kg:
|
|
542
|
+
difference_object_properties.append(
|
|
543
|
+
{
|
|
544
|
+
"concept_type": file_thing.concept_type.name,
|
|
545
|
+
"type": "Object properties target not linked",
|
|
546
|
+
"resource_id": kg_thing.default_source_reference_id(),
|
|
547
|
+
"uri": kg_thing.uri,
|
|
548
|
+
"kg": "",
|
|
549
|
+
"file": f"{ref_obj.uri} (reference id: {ref_obj.default_source_reference_id()})",
|
|
550
|
+
}
|
|
551
|
+
)
|
|
552
|
+
return differences, difference_data_properties, difference_object_properties
|
|
553
|
+
|
|
554
|
+
|
|
555
|
+
def is_different(client: WacomKnowledgeService, thing_file: ThingObject, thing_kg: ThingObject) -> bool:
|
|
556
|
+
"""
|
|
557
|
+
Check if the two entities are different.
|
|
558
|
+
|
|
559
|
+
Parameters
|
|
560
|
+
----------
|
|
561
|
+
client: WacomKnowledgeService
|
|
562
|
+
The client to use.
|
|
563
|
+
thing_file: ThingObject
|
|
564
|
+
The thing from the file.
|
|
565
|
+
thing_kg: ThingObject
|
|
566
|
+
The thing from the knowledge graph.
|
|
567
|
+
|
|
568
|
+
Returns
|
|
569
|
+
-------
|
|
570
|
+
is_different: bool
|
|
571
|
+
True if the entities are different, False otherwise.
|
|
572
|
+
"""
|
|
573
|
+
differences, data_properties_diff, _ = diff_entities(client, thing_file, thing_kg)
|
|
574
|
+
return len(differences) > 0 or len(data_properties_diff) > 0
|
|
575
|
+
|
|
576
|
+
|
|
577
|
+
async def is_different_async(
|
|
578
|
+
client: AsyncWacomKnowledgeService, thing_file: ThingObject, thing_kg: ThingObject
|
|
579
|
+
) -> bool:
|
|
580
|
+
"""
|
|
581
|
+
Check if the two entities are different.
|
|
582
|
+
|
|
583
|
+
Parameters
|
|
584
|
+
----------
|
|
585
|
+
client: WacomKnowledgeService
|
|
586
|
+
The client to use.
|
|
587
|
+
thing_file: ThingObject
|
|
588
|
+
The thing from the file.
|
|
589
|
+
thing_kg: ThingObject
|
|
590
|
+
The thing from the knowledge graph.
|
|
591
|
+
|
|
592
|
+
Returns
|
|
593
|
+
-------
|
|
594
|
+
is_different: bool
|
|
595
|
+
True if the entities are different, False otherwise.
|
|
596
|
+
"""
|
|
597
|
+
differences, data_properties_diff, _ = await diff_entities_async(client, thing_file, thing_kg)
|
|
598
|
+
return len(differences) > 0 or len(data_properties_diff) > 0
|
knowledge/utils/import_format.py
CHANGED
|
@@ -109,9 +109,14 @@ def load_import_format(file_path: Path) -> List[ThingObject]:
|
|
|
109
109
|
raise FileNotFoundError(f"Path {file_path} is not a file.")
|
|
110
110
|
cached_entities: List[ThingObject] = []
|
|
111
111
|
if file_path.suffix == ".gz":
|
|
112
|
-
with gzip.open(file_path, "
|
|
113
|
-
for line in f_gz
|
|
114
|
-
|
|
112
|
+
with gzip.open(file_path, "rt", encoding="utf-8") as f_gz:
|
|
113
|
+
for line_number, line in enumerate(f_gz):
|
|
114
|
+
stripped_line: str = line.strip()
|
|
115
|
+
if not stripped_line:
|
|
116
|
+
continue # Skip empty lines
|
|
117
|
+
if line_number == 0:
|
|
118
|
+
# Skip the first line (header)
|
|
119
|
+
continue
|
|
115
120
|
try:
|
|
116
121
|
cached_entities.append(__import_format_to_thing__(line))
|
|
117
122
|
except JSONDecodeError as e:
|