rara-tools 0.7.14__py3-none-any.whl → 0.7.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

@@ -11,7 +11,7 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
11
11
  """ Normalize authorities records """
12
12
 
13
13
  def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
14
- classified_fields: List[str] = [],
14
+ classified_fields: List[List[dict]] = [],
15
15
  ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
16
16
  REPEATABLE_FIELDS: List[str] = ["024", "035", "400", "667"]):
17
17
 
@@ -34,7 +34,7 @@ class RecordNormalizer:
34
34
  entities: List of Full names (str). If included, will use NormLinker to match with normalized records on KATA elastic.
35
35
  """
36
36
 
37
- def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [], classified_fields: List[str] = [],
37
+ def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [], classified_fields: List[List[dict]] = [],
38
38
  ALLOW_EDIT_FIELDS: List[str] = ["925"], REPEATABLE_FIELDS: List[str] = ["667"]):
39
39
 
40
40
  # Include, if will replace existing field
@@ -44,17 +44,30 @@ class RecordNormalizer:
44
44
  # leader applied to new records
45
45
  self.DEFAULT_LEADER = "01682nz a2200349n 4500" # must be 24 digits
46
46
 
47
- def _setup_records(self, linking_results: List[dict], sierra_data: List[dict], classified_fields: List[str] = []) -> JSONReader:
47
+ def _setup_records(self, linking_results: List[dict], sierra_data: List[dict], classified_fields: List[List[dict]] = []) -> JSONReader:
48
48
  """Setup initial MARC records and data.
49
49
 
50
50
  If no linked entities or more than one linked entity found, we create a new record.
51
51
  If one linked entity found, we create an updated record from the linked entity data.
52
52
  """
53
-
54
53
  linked_records = []
55
54
 
56
- for linked in linking_results or []:
55
+ def handle_create_new_record(entity, idx):
56
+ logger.info(f"No linked entities found for {entity}, Creating new record.")
57
+ linked_records.append({
58
+ "leader": self.DEFAULT_LEADER,
59
+ "fields": []
60
+ })
61
+ self.records_extra_data.append({
62
+ "entity": entity,
63
+ "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
64
+ "edited": False,
65
+ })
66
+
67
+ for idx, linked in enumerate(linking_results or []):
68
+
57
69
  if not isinstance(linked, dict):
70
+ logger.debug(f"Skipping invalid linked result: {linked}")
58
71
  continue
59
72
 
60
73
  entity = linked.get("original_entity")
@@ -62,46 +75,32 @@ class RecordNormalizer:
62
75
 
63
76
  if not isinstance(linked_info, list) or not linked_info:
64
77
  # No linked entities found, create new record
65
- logger.info(
66
- f"No linked entities found for {entity}, Creating new record.")
67
- linked_records.append({
68
- "leader": self.DEFAULT_LEADER,
69
- "fields": []
70
- })
71
- self.records_extra_data.append({
72
- "entity": entity,
73
- "classified_fields": classified_fields,
74
- "edited": False
75
- })
78
+ handle_create_new_record(entity, idx)
76
79
  continue
77
80
 
78
- if len(linked_info) > 1:
81
+ elif len(linked_info) > 1:
79
82
  # Multiple linked entities found, create new record
80
- logger.info(
81
- f"Multiple linked entities found for {entity}. Creating new record.")
82
- linked_records.append({
83
- "leader": self.DEFAULT_LEADER,
84
- "fields": []
85
- })
86
- self.records_extra_data.append({
87
- "entity": entity,
88
- "classified_fields": classified_fields,
89
- "edited": False
90
- })
83
+ handle_create_new_record(entity, idx)
91
84
  continue
92
85
 
93
86
  elif len(linked_info) == 1:
87
+ # one record match found, we update existing record
88
+
94
89
  linked_item = linked_info[0]
95
90
  if not isinstance(linked_item, dict):
96
91
  continue
97
92
 
93
+ # handle case where we have linked an entity without a record
94
+ if not linked_item.get("json", None):
95
+ handle_create_new_record(entity, idx)
96
+ continue
97
+
98
98
  linked_records.append(linked_item.get("json", {}))
99
99
 
100
100
  self.records_extra_data.append({
101
101
  "entity": entity,
102
102
  "viaf": linked_item.get("viaf", {}),
103
- "classified_fields": classified_fields,
104
- "type": "linked",
103
+ "classified_fields": classified_fields[idx] if idx < len(classified_fields) else [],
105
104
  "edited": True
106
105
  })
107
106
  continue
@@ -109,7 +108,6 @@ class RecordNormalizer:
109
108
  self.records_extra_data.extend(
110
109
  {
111
110
  "sierraID": obj.get("sierraID"),
112
- "type": "sierra",
113
111
  "edited": True
114
112
  }
115
113
  for obj in (sierra_data or [])
@@ -359,10 +357,12 @@ class RecordNormalizer:
359
357
  def _include_classified_fields(self, record: Record, classified_fields: list[dict]) -> None:
360
358
  """Include classified fields from core, if any.
361
359
  e.g. classified_fields=[{'670': {'ind1': ' ', 'ind2': '0', 'subfields': [{'a': 'Päikesekiri, 2021'}]}}]
360
+
361
+ For each record, we need a list of dicts, to handle repeatable fields.
362
362
  """
363
363
  if not classified_fields:
364
364
  return
365
-
365
+
366
366
  fields = [
367
367
  Field(
368
368
  tag=str(tag),
@@ -373,7 +373,6 @@ class RecordNormalizer:
373
373
  for tag, v in field_dict.items()
374
374
  ]
375
375
 
376
- logger.info(f"Adding classified fields: {[f.tag for f in fields]}")
377
376
  self._add_fields_to_record(record, fields)
378
377
 
379
378
  def _normalize_common(self, record: Record, is_editing_existing_record: bool, classified_fields: List[dict]) -> None:
@@ -464,6 +463,8 @@ class RecordNormalizer:
464
463
  verify=verify,
465
464
  threshold=threshold
466
465
  )
466
+ if viaf_record:
467
+ logger.debug(f"VIAF {search_term}, linked to ID: {viaf_record.viaf_id}")
467
468
 
468
469
  except Exception as e:
469
470
  logger.error(
@@ -472,8 +473,16 @@ class RecordNormalizer:
472
473
  return viaf_record
473
474
 
474
475
  def _normalize_record(self, record: Record, sierraID: str,
475
- viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
476
+ viaf_record: VIAFRecord, is_editing_existing_record: bool,
477
+ original_entity: str) -> Record:
476
478
  return record
479
+
480
+ def get_record(self, index: int) -> Record:
481
+ """Get normalized record by index."""
482
+ for idx, record in enumerate(self):
483
+ if idx == index:
484
+ return record
485
+ raise IndexError("Record index out of range.")
477
486
 
478
487
  @property
479
488
  def data(self) -> List[dict]:
@@ -490,21 +499,28 @@ class RecordNormalizer:
490
499
  @property
491
500
  def first(self) -> Record:
492
501
  return next(iter(self))
493
-
502
+
494
503
  def __iter__(self) -> Iterator:
495
- viaf_id_path = "viaf.queryResult.records.record.0.recordData.VIAFCluster.viafID"
504
+ # viaf_id_path = "viaf.original.queryResult.viafID"
505
+ viaf_id_path = "viaf.parsed.viaf_id"
506
+
496
507
  sierra_id_path = "sierraID"
497
-
508
+
498
509
  for record, extra_data in zip(self.records, self.records_extra_data):
499
510
 
500
511
  sierra_id = glom(extra_data, sierra_id_path, default="")
501
512
  viaf_id = glom(extra_data, viaf_id_path, default=None)
502
- classified_fields = extra_data.get("classified_fields", [])
503
513
 
514
+ classified_fields = extra_data.get("classified_fields", [])
504
515
  entity = extra_data.get("entity")
505
516
  is_editing_existing_record = extra_data.get("edited") == True
506
517
 
507
518
  viaf_record = self._get_viaf_record(record, viaf_id, entity)
519
+ if viaf_record:
520
+ logger.debug(
521
+ f"linked VIAF record with ID {viaf_record.viaf_id} for entity '{entity}'"
522
+ )
523
+
508
524
  record = self._normalize_common(record, is_editing_existing_record, classified_fields)
509
525
 
510
526
  normalized_record = self._normalize_record(
@@ -12,7 +12,7 @@ class BibRecordNormalizer(RecordNormalizer):
12
12
  """ Normalize bib records. """
13
13
 
14
14
  def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
15
- classified_fields: List[str] = [],
15
+ classified_fields: List[List[dict]] = [],
16
16
  ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
17
17
  REPEATABLE_FIELDS: List[str] = ["667"]):
18
18
  super().__init__(linking_results, sierra_data, classified_fields)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.7.14
3
+ Version: 0.7.16
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -20,9 +20,9 @@ rara_tools/core_formatters/formatted_keyword.py,sha256=hhi6wh4ErFionjBqYsEeKGbf1
20
20
  rara_tools/core_formatters/formatted_meta.py,sha256=WEnMs8K0YeTLGjXn_mxQTpshxcz5_9YjvqcbRFa3M1g,5605
21
21
  rara_tools/core_formatters/formatted_object.py,sha256=7a499ZmcZXOqtlwxDi6FWHWF5a6HdCsduS22wV3uHIE,5656
22
22
  rara_tools/normalizers/__init__.py,sha256=_NqpS5w710DhaURytHq9JpEt8HgYpSPfRDcOtOymJgE,193
23
- rara_tools/normalizers/authorities.py,sha256=U3IjQW3XaxAiIJ30Jlq6ON8fdXgrtgNwrvX8oy_iKL0,5304
24
- rara_tools/normalizers/base.py,sha256=SkbzmAOX5C6PbEFC9i_mpzK1u6LMAh7PavG52ULeO2Y,19744
25
- rara_tools/normalizers/bibs.py,sha256=DMjJj1mCVVWRpL2NuR6b7W1RWqkx50xix34X_tyxJig,3931
23
+ rara_tools/normalizers/authorities.py,sha256=iW3cYOqqVJKy4CcnG9_T6dN-1bBT1e-0jtLYvco-MyQ,5311
24
+ rara_tools/normalizers/base.py,sha256=DhMicY5p_N2SC_E3lbWUvSM77AOy_pBjQpbLSvYWDxM,20488
25
+ rara_tools/normalizers/bibs.py,sha256=s8NGoieCjiftASUb--1YvYZ0VzW6uBt2ZidhLi_wP9A,3938
26
26
  rara_tools/normalizers/reader.py,sha256=GYCkAtnsNx135w5lD-_MqCZzdHQHHPDF-pDxYj839Vo,1595
27
27
  rara_tools/normalizers/viaf.py,sha256=C-NfbvL83ZcHVB9ICMw43wAMYKTqDTHU3ZT2mXKec00,24288
28
28
  rara_tools/parsers/marc_parsers/base_parser.py,sha256=Kdw4aivJf2FkWgIK7pJtHtVXF_G1pjHVQ7IcFItSqy8,1649
@@ -39,8 +39,8 @@ rara_tools/parsers/marc_records/title_record.py,sha256=XrtJ4gj7wzSaGxNaPtPuawmqq
39
39
  rara_tools/parsers/tools/entity_normalizers.py,sha256=VyCy_NowCLpOsL0luQ55IW-Qi-J5oBH0Ofzr7HRFBhM,8949
40
40
  rara_tools/parsers/tools/marc_converter.py,sha256=LgSHe-7n7aiDrw2bnsB53r3fXTRFjZXTwBYfTpL0pfs,415
41
41
  rara_tools/parsers/tools/russian_transliterator.py,sha256=5ZU66iTqAhr7pmfVqXPAI_cidF43VqqmuN4d7H4_JuA,9770
42
- rara_tools-0.7.14.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
43
- rara_tools-0.7.14.dist-info/METADATA,sha256=GIybax1V5ZqALB6SH-oN-e0Cu9-0dbt7rRph7lsEY-Q,4080
44
- rara_tools-0.7.14.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
45
- rara_tools-0.7.14.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
46
- rara_tools-0.7.14.dist-info/RECORD,,
42
+ rara_tools-0.7.16.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
43
+ rara_tools-0.7.16.dist-info/METADATA,sha256=yBmOUFVy7V6RyZdN_qBGkY7M8zH7H7h7v_QLQeZ3bAM,4080
44
+ rara_tools-0.7.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
45
+ rara_tools-0.7.16.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
46
+ rara_tools-0.7.16.dist-info/RECORD,,