personal_knowledge_library 3.1.1__py3-none-any.whl → 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of personal_knowledge_library might be problematic. Click here for more details.

@@ -0,0 +1,598 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Copyright © 2025-present Wacom. All rights reserved.
3
+ from typing import Optional, Any, List, Dict, Tuple
4
+
5
+ from knowledge.base.entity import Label, Description
6
+ from knowledge.base.ontology import ThingObject
7
+ from knowledge.services.asyncio.graph import AsyncWacomKnowledgeService
8
+ from knowledge.services.graph import WacomKnowledgeService
9
+
10
+
11
+ def diff_entities(
12
+ client: WacomKnowledgeService,
13
+ file_thing: ThingObject,
14
+ kg_thing: ThingObject,
15
+ kg_things: Optional[Dict[str, ThingObject]] = None,
16
+ ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]:
17
+ """
18
+ Check the differences between the two entities.
19
+
20
+ Parameters
21
+ ----------
22
+ client: WacomKnowledgeService
23
+ The client to use.
24
+ file_thing: ThingObject
25
+ The thing to check.
26
+ kg_thing: ThingObject
27
+ The knowledge graph entity to check.
28
+ kg_things: Optional[Dict[str, ThingObject]]
29
+ The entities in the knowledge graph.
30
+
31
+ Returns
32
+ -------
33
+ differences: List[Dict[str, Any]]
34
+ The differences.
35
+ difference_data_properties: List[Dict[str, Any]]
36
+ The differences in the data properties.
37
+ difference_object_properties: List[Dict[str, Any]]
38
+ The differences in the object properties.
39
+ """
40
+ # Different number of descriptions
41
+ differences: List[Dict[str, Any]] = []
42
+ # Check if the descriptions are different
43
+ if len(file_thing.description) != len(kg_thing.description):
44
+ differences.append(
45
+ {
46
+ "concept_type": file_thing.concept_type.name,
47
+ "type": "description",
48
+ "resource_id": kg_thing.default_source_reference_id(),
49
+ "uri": kg_thing.uri,
50
+ "kg": len(file_thing.description),
51
+ "file": len(kg_thing.description),
52
+ }
53
+ )
54
+ for desc_file in file_thing.description:
55
+ kg_desc: Optional[Description] = kg_thing.description_lang(desc_file.language_code)
56
+ if kg_desc is None or desc_file.content != kg_desc.content:
57
+ differences.append(
58
+ {
59
+ "concept_type": file_thing.concept_type.name,
60
+ "type": "Description content" if kg_desc else "Missing description",
61
+ "resource_id": kg_thing.default_source_reference_id(),
62
+ "uri": kg_thing.uri,
63
+ "kg": kg_desc.content if kg_desc else "",
64
+ "file": desc_file.content,
65
+ }
66
+ )
67
+ # Difference in vector index
68
+ if file_thing.use_vector_index != kg_thing.use_vector_index:
69
+ differences.append(
70
+ {
71
+ "concept_type": file_thing.concept_type.name,
72
+ "type": "Vector index",
73
+ "resource_id": kg_thing.default_source_reference_id(),
74
+ "uri": kg_thing.uri,
75
+ "kg": kg_thing.use_vector_index,
76
+ "file": file_thing.use_vector_index,
77
+ }
78
+ )
79
+ # Difference in NEL index
80
+ if file_thing.use_for_nel != kg_thing.use_for_nel:
81
+ differences.append(
82
+ {
83
+ "concept_type": file_thing.concept_type.name,
84
+ "type": "NEL index",
85
+ "resource_id": kg_thing.default_source_reference_id(),
86
+ "uri": kg_thing.uri,
87
+ "kg": kg_thing.use_for_nel,
88
+ "file": file_thing.use_for_nel,
89
+ }
90
+ )
91
+
92
+ # Different number of labels
93
+ if len(file_thing.label) != len(kg_thing.label):
94
+ differences.append(
95
+ {
96
+ "concept_type": file_thing.concept_type.name,
97
+ "type": "Number of labels",
98
+ "resource_id": kg_thing.default_source_reference_id(),
99
+ "uri": kg_thing.uri,
100
+ "kg": len(kg_thing.label),
101
+ "file": len(file_thing.label),
102
+ }
103
+ )
104
+ # Check if the labels are different
105
+ for label_file in file_thing.label:
106
+ label_kg_lang: Optional[Label] = kg_thing.label_lang(label_file.language_code)
107
+ if label_kg_lang is None or label_file.content != label_kg_lang.content:
108
+ differences.append(
109
+ {
110
+ "concept_type": file_thing.concept_type.name,
111
+ "type": "Label content" if label_kg_lang else "Missing label",
112
+ "resource_id": kg_thing.default_source_reference_id(),
113
+ "uri": kg_thing.uri,
114
+ "kg": label_kg_lang.content if label_kg_lang else "",
115
+ "file": kg_thing.label[0].content,
116
+ }
117
+ )
118
+ # Different number of aliases
119
+ if len(file_thing.alias) != len(kg_thing.alias):
120
+ differences.append(
121
+ {
122
+ "concept_type": file_thing.concept_type.name,
123
+ "type": "Number of aliases",
124
+ "resource_id": kg_thing.default_source_reference_id(),
125
+ "uri": kg_thing.uri,
126
+ "kg": len(file_thing.alias),
127
+ "file": len(kg_thing.alias),
128
+ }
129
+ )
130
+ # Check if the aliases are different
131
+ for alias_file in file_thing.alias:
132
+ alias_kg_lang = kg_thing.alias_lang(alias_file.language_code)
133
+ if alias_file.content not in [alias.content for alias in alias_kg_lang]:
134
+ differences.append(
135
+ {
136
+ "concept_type": file_thing.concept_type.name,
137
+ "type": "Alias content",
138
+ "resource_id": kg_thing.default_source_reference_id(),
139
+ "uri": kg_thing.uri,
140
+ "kg": ", ".join([alias.content for alias in alias_kg_lang]),
141
+ "file": alias_file.content,
142
+ }
143
+ )
144
+ difference_data_properties: List[Dict[str, Any]] = []
145
+ # If the data properties are different
146
+ if len(file_thing.data_properties) != len(kg_thing.data_properties):
147
+ difference_data_properties.append(
148
+ {
149
+ "concept_type": file_thing.concept_type.name,
150
+ "type": "data properties",
151
+ "resource_id": kg_thing.default_source_reference_id(),
152
+ "uri": kg_thing.uri,
153
+ "kg": len(file_thing.data_properties),
154
+ "file": len(kg_thing.data_properties),
155
+ }
156
+ )
157
+
158
+ for prop, data_properties in file_thing.data_properties.items():
159
+ if prop not in kg_thing.data_properties:
160
+ difference_data_properties.append(
161
+ {
162
+ "concept_type": file_thing.concept_type.name,
163
+ "type": "missing data properties",
164
+ "resource_id": kg_thing.default_source_reference_id(),
165
+ "uri": kg_thing.uri,
166
+ "kg": None,
167
+ "file": prop,
168
+ }
169
+ )
170
+ continue
171
+ if len(data_properties) != len(kg_thing.data_properties.get(prop, [])):
172
+ difference_data_properties.append(
173
+ {
174
+ "concept_type": file_thing.concept_type.name,
175
+ "type": "Number of data properties values",
176
+ "resource_id": kg_thing.default_source_reference_id(),
177
+ "uri": kg_thing.uri,
178
+ "kg": len(data_properties),
179
+ "file": len(kg_thing.data_properties.get(prop, [])),
180
+ }
181
+ )
182
+ for dp in data_properties:
183
+ if prop not in kg_thing.data_properties:
184
+ difference_data_properties.append(
185
+ {
186
+ "concept_type": file_thing.concept_type.name,
187
+ "type": "Missing data properties",
188
+ "resource_id": kg_thing.default_source_reference_id(),
189
+ "uri": kg_thing.uri,
190
+ "kg": "",
191
+ "file": dp.value,
192
+ }
193
+ )
194
+ elif dp.value not in [d.value for d in kg_thing.data_properties.get(prop)]:
195
+ difference_data_properties.append(
196
+ {
197
+ "concept_type": file_thing.concept_type.name,
198
+ "type": "Different data property values",
199
+ "resource_id": kg_thing.default_source_reference_id(),
200
+ "uri": kg_thing.uri,
201
+ "kg": ", ".join([d.value for d in kg_thing.data_properties.get(prop)]),
202
+ "file": dp.value,
203
+ }
204
+ )
205
+ difference_object_properties: List[Dict[str, Any]] = []
206
+ if kg_things:
207
+ kg_thing.object_properties = client.relations(kg_thing.uri)
208
+ for rel_type, _ in file_thing.object_properties.items():
209
+ # Check if the object property is missing
210
+ if rel_type not in kg_thing.object_properties:
211
+ difference_object_properties.append(
212
+ {
213
+ "concept_type": file_thing.concept_type.name,
214
+ "type": "Object property missing",
215
+ "resource_id": kg_thing.default_source_reference_id(),
216
+ "uri": kg_thing.uri,
217
+ "kg": "",
218
+ "file": rel_type.iri,
219
+ }
220
+ )
221
+ else:
222
+ # Check if the target entity is different (incoming relations)
223
+ for file_target in file_thing.object_properties[rel_type].incoming_relations:
224
+ ref_obj: Optional[ThingObject] = kg_things.get(file_target)
225
+ uris_kg: List[str] = [
226
+ t.uri if isinstance(t, ThingObject) else t
227
+ for t in kg_thing.object_properties[rel_type].incoming_relations
228
+ ]
229
+ if ref_obj is None:
230
+ difference_object_properties.append(
231
+ {
232
+ "concept_type": file_thing.concept_type.name,
233
+ "type": "Object properties target missing",
234
+ "resource_id": kg_thing.default_source_reference_id(),
235
+ "uri": kg_thing.uri,
236
+ "kg": "",
237
+ "file": file_target,
238
+ }
239
+ )
240
+ elif ref_obj.uri not in uris_kg:
241
+ difference_object_properties.append(
242
+ {
243
+ "concept_type": file_thing.concept_type.name,
244
+ "type": "Object properties target not linked",
245
+ "resource_id": kg_thing.default_source_reference_id(),
246
+ "uri": kg_thing.uri,
247
+ "kg": "",
248
+ "file": f"{ref_obj.uri} (reference id: {ref_obj.default_source_reference_id()})",
249
+ }
250
+ )
251
+ # Check if the target entity is different (outgoing relations)
252
+ for file_target in file_thing.object_properties[rel_type].outgoing_relations:
253
+ ref_obj: Optional[ThingObject] = kg_things.get(file_target)
254
+ uris_kg: List[str] = [
255
+ t.uri if isinstance(t, ThingObject) else t
256
+ for t in kg_thing.object_properties[rel_type].outgoing_relations
257
+ ]
258
+ if ref_obj is None:
259
+ difference_object_properties.append(
260
+ {
261
+ "concept_type": file_thing.concept_type.name,
262
+ "type": "Object properties target missing",
263
+ "resource_id": kg_thing.default_source_reference_id(),
264
+ "uri": kg_thing.uri,
265
+ "kg": "",
266
+ "file": file_target,
267
+ }
268
+ )
269
+ elif ref_obj.uri not in uris_kg:
270
+ difference_object_properties.append(
271
+ {
272
+ "concept_type": file_thing.concept_type.name,
273
+ "type": "Object properties target not linked",
274
+ "resource_id": kg_thing.default_source_reference_id(),
275
+ "uri": kg_thing.uri,
276
+ "kg": "",
277
+ "file": f"{ref_obj.uri} (reference id: {ref_obj.default_source_reference_id()})",
278
+ }
279
+ )
280
+ return differences, difference_data_properties, difference_object_properties
281
+
282
+
283
+ async def diff_entities_async(
284
+ client: AsyncWacomKnowledgeService,
285
+ file_thing: ThingObject,
286
+ kg_thing: ThingObject,
287
+ kg_things: Optional[Dict[str, ThingObject]] = None,
288
+ ) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]:
289
+ """
290
+ Check the differences between the two entities.
291
+
292
+ Parameters
293
+ ----------
294
+ client: WacomKnowledgeService
295
+ The client to use.
296
+ file_thing: ThingObject
297
+ The thing to check.
298
+ kg_thing: ThingObject
299
+ The knowledge graph entity to check.
300
+ kg_things: Optional[Dict[str, ThingObject]]
301
+ The entities in the knowledge graph.
302
+
303
+ Returns
304
+ -------
305
+ differences: List[Dict[str, Any]]
306
+ The differences.
307
+ difference_data_properties: List[Dict[str, Any]]
308
+ The differences in the data properties.
309
+ difference_object_properties: List[Dict[str, Any]]
310
+ The differences in the object properties.
311
+ """
312
+ # Different number of descriptions
313
+ differences: List[Dict[str, Any]] = []
314
+ # Check if the descriptions are different
315
+ if len(file_thing.description) != len(kg_thing.description):
316
+ differences.append(
317
+ {
318
+ "concept_type": file_thing.concept_type.name,
319
+ "type": "description",
320
+ "resource_id": kg_thing.default_source_reference_id(),
321
+ "uri": kg_thing.uri,
322
+ "kg": len(file_thing.description),
323
+ "file": len(kg_thing.description),
324
+ }
325
+ )
326
+ for desc_file in file_thing.description:
327
+ kg_desc: Optional[Description] = kg_thing.description_lang(desc_file.language_code)
328
+ if kg_desc is None or desc_file.content != kg_desc.content:
329
+ differences.append(
330
+ {
331
+ "concept_type": file_thing.concept_type.name,
332
+ "type": "Description content" if kg_desc else "Missing description",
333
+ "resource_id": kg_thing.default_source_reference_id(),
334
+ "uri": kg_thing.uri,
335
+ "kg": kg_desc.content if kg_desc else "",
336
+ "file": desc_file.content,
337
+ }
338
+ )
339
+ # Difference in vector index
340
+ if file_thing.use_vector_index != kg_thing.use_vector_index:
341
+ differences.append(
342
+ {
343
+ "concept_type": file_thing.concept_type.name,
344
+ "type": "Vector index",
345
+ "resource_id": kg_thing.default_source_reference_id(),
346
+ "uri": kg_thing.uri,
347
+ "kg": kg_thing.use_vector_index,
348
+ "file": file_thing.use_vector_index,
349
+ }
350
+ )
351
+ # Difference in NEL index
352
+ if file_thing.use_for_nel != kg_thing.use_for_nel:
353
+ differences.append(
354
+ {
355
+ "concept_type": file_thing.concept_type.name,
356
+ "type": "NEL index",
357
+ "resource_id": kg_thing.default_source_reference_id(),
358
+ "uri": kg_thing.uri,
359
+ "kg": kg_thing.use_for_nel,
360
+ "file": file_thing.use_for_nel,
361
+ }
362
+ )
363
+
364
+ # Different number of labels
365
+ if len(file_thing.label) != len(kg_thing.label):
366
+ differences.append(
367
+ {
368
+ "concept_type": file_thing.concept_type.name,
369
+ "type": "Number of labels",
370
+ "resource_id": kg_thing.default_source_reference_id(),
371
+ "uri": kg_thing.uri,
372
+ "kg": len(kg_thing.label),
373
+ "file": len(file_thing.label),
374
+ }
375
+ )
376
+ # Check if the labels are different
377
+ for label_file in file_thing.label:
378
+ label_kg_lang: Optional[Label] = kg_thing.label_lang(label_file.language_code)
379
+ if label_kg_lang is None or label_file.content != label_kg_lang.content:
380
+ differences.append(
381
+ {
382
+ "concept_type": file_thing.concept_type.name,
383
+ "type": "Label content" if label_kg_lang else "Missing label",
384
+ "resource_id": kg_thing.default_source_reference_id(),
385
+ "uri": kg_thing.uri,
386
+ "kg": label_kg_lang.content if label_kg_lang else "",
387
+ "file": kg_thing.label[0].content,
388
+ }
389
+ )
390
+ # Different number of aliases
391
+ if len(file_thing.alias) != len(kg_thing.alias):
392
+ differences.append(
393
+ {
394
+ "concept_type": file_thing.concept_type.name,
395
+ "type": "Number of aliases",
396
+ "resource_id": kg_thing.default_source_reference_id(),
397
+ "uri": kg_thing.uri,
398
+ "kg": len(file_thing.alias),
399
+ "file": len(kg_thing.alias),
400
+ }
401
+ )
402
+ # Check if the aliases are different
403
+ for alias_file in file_thing.alias:
404
+ alias_kg_lang = kg_thing.alias_lang(alias_file.language_code)
405
+ if alias_file.content not in [alias.content for alias in alias_kg_lang]:
406
+ differences.append(
407
+ {
408
+ "concept_type": file_thing.concept_type.name,
409
+ "type": "Alias content",
410
+ "resource_id": kg_thing.default_source_reference_id(),
411
+ "uri": kg_thing.uri,
412
+ "kg": ", ".join([alias.content for alias in alias_kg_lang]),
413
+ "file": alias_file.content,
414
+ }
415
+ )
416
+ difference_data_properties: List[Dict[str, Any]] = []
417
+ # If the data properties are different
418
+ if len(file_thing.data_properties) != len(kg_thing.data_properties):
419
+ difference_data_properties.append(
420
+ {
421
+ "concept_type": file_thing.concept_type.name,
422
+ "type": "data properties",
423
+ "resource_id": kg_thing.default_source_reference_id(),
424
+ "uri": kg_thing.uri,
425
+ "kg": len(file_thing.data_properties),
426
+ "file": len(kg_thing.data_properties),
427
+ }
428
+ )
429
+
430
+ for prop, data_properties in file_thing.data_properties.items():
431
+ if prop not in kg_thing.data_properties:
432
+ difference_data_properties.append(
433
+ {
434
+ "concept_type": file_thing.concept_type.name,
435
+ "type": "missing data properties",
436
+ "resource_id": kg_thing.default_source_reference_id(),
437
+ "uri": kg_thing.uri,
438
+ "kg": None,
439
+ "file": prop,
440
+ }
441
+ )
442
+ continue
443
+ if len(data_properties) != len(kg_thing.data_properties.get(prop, [])):
444
+ difference_data_properties.append(
445
+ {
446
+ "concept_type": file_thing.concept_type.name,
447
+ "type": "Number of data properties values",
448
+ "resource_id": kg_thing.default_source_reference_id(),
449
+ "uri": kg_thing.uri,
450
+ "kg": len(data_properties),
451
+ "file": len(kg_thing.data_properties.get(prop, [])),
452
+ }
453
+ )
454
+ for dp in data_properties:
455
+ if prop not in kg_thing.data_properties:
456
+ difference_data_properties.append(
457
+ {
458
+ "concept_type": file_thing.concept_type.name,
459
+ "type": "Missing data properties",
460
+ "resource_id": kg_thing.default_source_reference_id(),
461
+ "uri": kg_thing.uri,
462
+ "kg": "",
463
+ "file": dp.value,
464
+ }
465
+ )
466
+ elif dp.value not in [d.value for d in kg_thing.data_properties.get(prop)]:
467
+ difference_data_properties.append(
468
+ {
469
+ "concept_type": file_thing.concept_type.name,
470
+ "type": "Different data property values",
471
+ "resource_id": kg_thing.default_source_reference_id(),
472
+ "uri": kg_thing.uri,
473
+ "kg": ", ".join([d.value for d in kg_thing.data_properties.get(prop)]),
474
+ "file": dp.value,
475
+ }
476
+ )
477
+ difference_object_properties: List[Dict[str, Any]] = []
478
+ if kg_things:
479
+ kg_thing.object_properties = await client.relations(kg_thing.uri)
480
+ for rel_type, _ in file_thing.object_properties.items():
481
+ # Check if the object property is missing
482
+ if rel_type not in kg_thing.object_properties:
483
+ difference_object_properties.append(
484
+ {
485
+ "concept_type": file_thing.concept_type.name,
486
+ "type": "Object property missing",
487
+ "resource_id": kg_thing.default_source_reference_id(),
488
+ "uri": kg_thing.uri,
489
+ "kg": "",
490
+ "file": rel_type.iri,
491
+ }
492
+ )
493
+ else:
494
+ # Check if the target entity is different (incoming relations)
495
+ for file_target in file_thing.object_properties[rel_type].incoming_relations:
496
+ ref_obj: Optional[ThingObject] = kg_things.get(file_target)
497
+ uris_kg: List[str] = [
498
+ t.uri if isinstance(t, ThingObject) else t
499
+ for t in kg_thing.object_properties[rel_type].incoming_relations
500
+ ]
501
+ if ref_obj is None:
502
+ difference_object_properties.append(
503
+ {
504
+ "concept_type": file_thing.concept_type.name,
505
+ "type": "Object properties target missing",
506
+ "resource_id": kg_thing.default_source_reference_id(),
507
+ "uri": kg_thing.uri,
508
+ "kg": "",
509
+ "file": file_target,
510
+ }
511
+ )
512
+ elif ref_obj.uri not in uris_kg:
513
+ difference_object_properties.append(
514
+ {
515
+ "concept_type": file_thing.concept_type.name,
516
+ "type": "Object properties target not linked",
517
+ "resource_id": kg_thing.default_source_reference_id(),
518
+ "uri": kg_thing.uri,
519
+ "kg": "",
520
+ "file": f"{ref_obj.uri} (reference id: {ref_obj.default_source_reference_id()})",
521
+ }
522
+ )
523
+ # Check if the target entity is different (outgoing relations)
524
+ for file_target in file_thing.object_properties[rel_type].outgoing_relations:
525
+ ref_obj: Optional[ThingObject] = kg_things.get(file_target)
526
+ uris_kg: List[str] = [
527
+ t.uri if isinstance(t, ThingObject) else t
528
+ for t in kg_thing.object_properties[rel_type].outgoing_relations
529
+ ]
530
+ if ref_obj is None:
531
+ difference_object_properties.append(
532
+ {
533
+ "concept_type": file_thing.concept_type.name,
534
+ "type": "Object properties target missing",
535
+ "resource_id": kg_thing.default_source_reference_id(),
536
+ "uri": kg_thing.uri,
537
+ "kg": "",
538
+ "file": file_target,
539
+ }
540
+ )
541
+ elif ref_obj.uri not in uris_kg:
542
+ difference_object_properties.append(
543
+ {
544
+ "concept_type": file_thing.concept_type.name,
545
+ "type": "Object properties target not linked",
546
+ "resource_id": kg_thing.default_source_reference_id(),
547
+ "uri": kg_thing.uri,
548
+ "kg": "",
549
+ "file": f"{ref_obj.uri} (reference id: {ref_obj.default_source_reference_id()})",
550
+ }
551
+ )
552
+ return differences, difference_data_properties, difference_object_properties
553
+
554
+
555
+ def is_different(client: WacomKnowledgeService, thing_file: ThingObject, thing_kg: ThingObject) -> bool:
556
+ """
557
+ Check if the two entities are different.
558
+
559
+ Parameters
560
+ ----------
561
+ client: WacomKnowledgeService
562
+ The client to use.
563
+ thing_file: ThingObject
564
+ The thing from the file.
565
+ thing_kg: ThingObject
566
+ The thing from the knowledge graph.
567
+
568
+ Returns
569
+ -------
570
+ is_different: bool
571
+ True if the entities are different, False otherwise.
572
+ """
573
+ differences, data_properties_diff, _ = diff_entities(client, thing_file, thing_kg)
574
+ return len(differences) > 0 or len(data_properties_diff) > 0
575
+
576
+
577
+ async def is_different_async(
578
+ client: AsyncWacomKnowledgeService, thing_file: ThingObject, thing_kg: ThingObject
579
+ ) -> bool:
580
+ """
581
+ Check if the two entities are different.
582
+
583
+ Parameters
584
+ ----------
585
+ client: WacomKnowledgeService
586
+ The client to use.
587
+ thing_file: ThingObject
588
+ The thing from the file.
589
+ thing_kg: ThingObject
590
+ The thing from the knowledge graph.
591
+
592
+ Returns
593
+ -------
594
+ is_different: bool
595
+ True if the entities are different, False otherwise.
596
+ """
597
+ differences, data_properties_diff, _ = await diff_entities_async(client, thing_file, thing_kg)
598
+ return len(differences) > 0 or len(data_properties_diff) > 0