rara-tools 0.7.10__tar.gz → 0.7.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

Files changed (68) hide show
  1. {rara_tools-0.7.10/rara_tools.egg-info → rara_tools-0.7.11}/PKG-INFO +1 -1
  2. rara_tools-0.7.11/VERSION +1 -0
  3. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/normalizers/authorities.py +3 -1
  4. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/normalizers/base.py +7 -3
  5. rara_tools-0.7.11/rara_tools/normalizers/bibs.py +111 -0
  6. {rara_tools-0.7.10 → rara_tools-0.7.11/rara_tools.egg-info}/PKG-INFO +1 -1
  7. {rara_tools-0.7.10 → rara_tools-0.7.11}/tests/test_normalization.py +47 -23
  8. rara_tools-0.7.10/VERSION +0 -1
  9. rara_tools-0.7.10/rara_tools/normalizers/bibs.py +0 -63
  10. {rara_tools-0.7.10 → rara_tools-0.7.11}/LICENSE.md +0 -0
  11. {rara_tools-0.7.10 → rara_tools-0.7.11}/README.md +0 -0
  12. {rara_tools-0.7.10 → rara_tools-0.7.11}/pyproject.toml +0 -0
  13. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/constants/__init__.py +0 -0
  14. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/constants/digitizer.py +0 -0
  15. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/constants/general.py +0 -0
  16. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/constants/language_evaluator.py +0 -0
  17. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/constants/linker.py +0 -0
  18. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/constants/meta_extractor.py +0 -0
  19. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/constants/normalizers.py +0 -0
  20. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/constants/parsers.py +0 -0
  21. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/constants/subject_indexer.py +0 -0
  22. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/converters.py +0 -0
  23. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/core_formatters/core_formatter.py +0 -0
  24. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/core_formatters/formatted_keyword.py +0 -0
  25. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/core_formatters/formatted_meta.py +0 -0
  26. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/core_formatters/formatted_object.py +0 -0
  27. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/decorators.py +0 -0
  28. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/digar_schema_converter.py +0 -0
  29. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/elastic.py +0 -0
  30. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/exceptions.py +0 -0
  31. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/normalizers/__init__.py +0 -0
  32. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/normalizers/reader.py +0 -0
  33. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/normalizers/viaf.py +0 -0
  34. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/marc_parsers/base_parser.py +0 -0
  35. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/marc_parsers/ems_parser.py +0 -0
  36. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/marc_parsers/location_parser.py +0 -0
  37. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/marc_parsers/organization_parser.py +0 -0
  38. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/marc_parsers/person_parser.py +0 -0
  39. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/marc_parsers/title_parser.py +0 -0
  40. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/marc_records/base_record.py +0 -0
  41. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/marc_records/ems_record.py +0 -0
  42. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/marc_records/organization_record.py +0 -0
  43. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/marc_records/person_record.py +0 -0
  44. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/marc_records/title_record.py +0 -0
  45. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/tools/entity_normalizers.py +0 -0
  46. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/tools/marc_converter.py +0 -0
  47. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/parsers/tools/russian_transliterator.py +0 -0
  48. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/s3.py +0 -0
  49. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/task_reporter.py +0 -0
  50. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools/utils.py +0 -0
  51. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools.egg-info/SOURCES.txt +0 -0
  52. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools.egg-info/dependency_links.txt +0 -0
  53. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools.egg-info/requires.txt +0 -0
  54. {rara_tools-0.7.10 → rara_tools-0.7.11}/rara_tools.egg-info/top_level.txt +0 -0
  55. {rara_tools-0.7.10 → rara_tools-0.7.11}/requirements.txt +0 -0
  56. {rara_tools-0.7.10 → rara_tools-0.7.11}/setup.cfg +0 -0
  57. {rara_tools-0.7.10 → rara_tools-0.7.11}/tests/test_digar_schema_converter.py +0 -0
  58. {rara_tools-0.7.10 → rara_tools-0.7.11}/tests/test_elastic.py +0 -0
  59. {rara_tools-0.7.10 → rara_tools-0.7.11}/tests/test_elastic_vector_and_search_operations.py +0 -0
  60. {rara_tools-0.7.10 → rara_tools-0.7.11}/tests/test_entity_normalizers.py +0 -0
  61. {rara_tools-0.7.10 → rara_tools-0.7.11}/tests/test_formatters.py +0 -0
  62. {rara_tools-0.7.10 → rara_tools-0.7.11}/tests/test_marc_parsers.py +0 -0
  63. {rara_tools-0.7.10 → rara_tools-0.7.11}/tests/test_s3_exceptions.py +0 -0
  64. {rara_tools-0.7.10 → rara_tools-0.7.11}/tests/test_s3_file_operations.py +0 -0
  65. {rara_tools-0.7.10 → rara_tools-0.7.11}/tests/test_sierra_converters.py +0 -0
  66. {rara_tools-0.7.10 → rara_tools-0.7.11}/tests/test_task_reporter.py +0 -0
  67. {rara_tools-0.7.10 → rara_tools-0.7.11}/tests/test_utils.py +0 -0
  68. {rara_tools-0.7.10 → rara_tools-0.7.11}/tests/test_viaf_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.7.10
3
+ Version: 0.7.11
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -0,0 +1 @@
1
+ 0.7.11
@@ -117,7 +117,9 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
117
117
  self._add_author(record, viaf_record)
118
118
 
119
119
  def _normalize_record(self, record: Record, sierraID: str,
120
- viaf_record: VIAFRecord, is_editing_existing_record: bool) -> Record:
120
+ viaf_record: VIAFRecord,
121
+ is_editing_existing_record: bool,
122
+ original_entity: str) -> Record:
121
123
 
122
124
  self._normalize_sierra(record, sierraID)
123
125
  self._normalize_viaf(record, viaf_record)
@@ -360,7 +360,7 @@ class RecordNormalizer:
360
360
  return viaf_record
361
361
 
362
362
  def _normalize_record(self, record: Record, sierraID: str,
363
- viaf_record: VIAFRecord, is_editing_existing_record: bool) -> Record:
363
+ viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
364
364
  return record
365
365
 
366
366
  @property
@@ -374,7 +374,11 @@ class RecordNormalizer:
374
374
  logger.error(f"Failed to normalize record: {e}")
375
375
  continue
376
376
  return result
377
-
377
+
378
+ @property
379
+ def first(self) -> Record:
380
+ return next(iter(self))
381
+
378
382
  def __iter__(self) -> Iterator:
379
383
  viaf_id_path = "viaf.queryResult.records.record.0.recordData.VIAFCluster.viafID"
380
384
  sierra_id_path = "sierraID"
@@ -391,7 +395,7 @@ class RecordNormalizer:
391
395
  record = self._normalize_common(record, is_editing_existing_record)
392
396
 
393
397
  normalized_record = self._normalize_record(
394
- record, sierra_id, viaf_record, is_editing_existing_record)
398
+ record, sierra_id, viaf_record, is_editing_existing_record, original_entity=entity)
395
399
 
396
400
  normalized_record.fields.sort(key=lambda field: field.tag)
397
401
 
@@ -0,0 +1,111 @@
1
+ from pymarc import (Field, Subfield, Record)
2
+ from typing import List, Optional
3
+
4
+ from rara_tools.constants import EMPTY_INDICATORS
5
+ from rara_tools.normalizers.viaf import VIAFRecord
6
+ from rara_tools.normalizers import RecordNormalizer
7
+
8
+ from typing import List
9
+
10
+
11
+ class BibRecordNormalizer(RecordNormalizer):
12
+ """ Normalize bib records. """
13
+
14
+ def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
15
+ ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
16
+ REPEATABLE_FIELDS: List[str] = ["667"]):
17
+ super().__init__(linking_results, sierra_data)
18
+ self.DEFAULT_LEADER = "00399nz a2200145n 4500" # must be 24 digits
19
+ self.ALLOW_EDIT_FIELDS = ALLOW_EDIT_FIELDS
20
+ self.REPEATABLE_FIELDS = REPEATABLE_FIELDS
21
+
22
+ self.records_extra_data = []
23
+ self.sierra_data = sierra_data
24
+ self.records = self._setup_records(linking_results, sierra_data)
25
+
26
+ def _normalize_sierra(self, record: Record) -> Record:
27
+
28
+ suffix_008 = "|||aznnnaabn || ||| "
29
+
30
+ fields = [
31
+ Field(
32
+ tag="008",
33
+ data=f"{self.current_timestamp()}{suffix_008}"
34
+ ),
35
+ ]
36
+
37
+ self._add_fields_to_record(record, fields)
38
+
39
+ def _include_name_variations(self, record: Record, viaf_record: VIAFRecord) -> None:
40
+ """ Include name variations from VIAF record as 400|t fields """
41
+
42
+ if not viaf_record or not viaf_record.name_variations:
43
+ return
44
+
45
+ existing_name_variations = record.get_fields("400")
46
+ existing_variations = [sf.value for field in existing_name_variations for sf in field.get_subfields("t")]
47
+
48
+ fields = []
49
+
50
+ for variation in viaf_record.name_variations:
51
+ if variation not in existing_variations:
52
+ fields.append(
53
+ Field(
54
+ tag="400",
55
+ indicators=EMPTY_INDICATORS,
56
+ subfields=[
57
+ Subfield("t", variation)
58
+ ]
59
+ )
60
+ )
61
+
62
+ self._add_fields_to_record(record, fields)
63
+
64
+ def _add_author(self, record: Record, viaf_record: Optional[VIAFRecord], original_entity: str) -> Optional[Field]:
65
+ if record.get("100") or record.get("110") or record.get("111"):
66
+ return record
67
+
68
+ type_map = {
69
+ "Personal": "100",
70
+ "Corporate": "110",
71
+ "Collective": "111"
72
+ }
73
+
74
+ tag = type_map.get(getattr(viaf_record, "name_type", None), "100")
75
+ title = getattr(viaf_record, "name", None) or original_entity
76
+
77
+ fields = [Field(tag=tag, indicators=EMPTY_INDICATORS, subfields=[Subfield("t", title)])]
78
+
79
+ self._add_fields_to_record(record, fields)
80
+
81
+ if viaf_record:
82
+ self._include_name_variations(record, viaf_record)
83
+
84
+ def _normalize_viaf(self, record: Record, viaf_record: VIAFRecord, original_entity: str) -> None:
85
+
86
+ if not viaf_record:
87
+ # viaf record not found, include original entity as 100|t
88
+ self._add_author(record, viaf_record=None, original_entity=original_entity)
89
+ return record
90
+
91
+ viaf_id = viaf_record.viaf_id
92
+ fields = [
93
+ Field(
94
+ tag="035",
95
+ indicators=EMPTY_INDICATORS,
96
+ subfields=[
97
+ Subfield("a", viaf_id)
98
+ ]
99
+ )
100
+ ]
101
+
102
+ self._add_fields_to_record(record, fields)
103
+ self._add_author(record, viaf_record, original_entity=original_entity)
104
+
105
+ def _normalize_record(self, record: Record, sierraID: str,
106
+ viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
107
+
108
+ self._normalize_sierra(record)
109
+ self._normalize_viaf(record, viaf_record, original_entity=original_entity)
110
+
111
+ return record
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.7.10
3
+ Version: 0.7.11
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -456,7 +456,7 @@ def test_add_nationality():
456
456
  # Case 4 - 043 field already exists - should not get edited (not in ALLOW_EDIT_FIELDS)
457
457
  linking_results = [{
458
458
  "original_entity": "Eduard Vilde",
459
- "entity_type": "PER",
459
+ "entity_type": EntityType.PER,
460
460
  "linked_info": [
461
461
  {
462
462
  "json": {
@@ -482,12 +482,7 @@ def test_add_nationality():
482
482
  normalizer = AuthoritiesRecordNormalizer(
483
483
  linking_results=linking_results,
484
484
  )
485
- records = JSONReader(
486
- json.dumps(normalizer.data, ensure_ascii=False)
487
- )
488
-
489
- record = next(iter(records))
490
-
485
+ record = normalizer.first
491
486
  # mock run add nationality with foreign VIAF record
492
487
  viaf_record = normalizer._get_viaf_record(
493
488
  record,
@@ -519,9 +514,7 @@ def test_create_new_normrecord():
519
514
  data = normalizer.data
520
515
  assert len(data) == 1
521
516
 
522
- record = next(iter(JSONReader(
523
- json.dumps(normalizer.data, ensure_ascii=False)
524
- )))
517
+ record = normalizer.first
525
518
  leader = str(record.leader)
526
519
  assert leader == "01682nz a2200349n 4500"
527
520
  assert len(leader) == 24
@@ -534,9 +527,7 @@ def test_create_new_normrecord():
534
527
  normalizer = AuthoritiesRecordNormalizer(linking_results=linking_results)
535
528
  data = normalizer.data
536
529
  assert len(data) == 1
537
- record = next(iter(JSONReader(
538
- json.dumps(normalizer.data, ensure_ascii=False)
539
- )))
530
+ record = normalizer.first
540
531
 
541
532
  _validate_new_record_notes(record)
542
533
  # validate leader
@@ -552,9 +543,7 @@ def test_create_new_normrecord():
552
543
  normalizer = BibRecordNormalizer(linking_results=linking_results)
553
544
  data = normalizer.data
554
545
  assert len(data) == 1
555
- record = next(iter(JSONReader(
556
- json.dumps(normalizer.data, ensure_ascii=False)
557
- )))
546
+ record = normalizer.first
558
547
 
559
548
  _validate_new_record_notes(record)
560
549
  # validate leader
@@ -571,9 +560,7 @@ def test_create_new_normrecord():
571
560
  normalizer = BibRecordNormalizer(linking_results=linking_results)
572
561
  data = normalizer.data
573
562
  assert len(data) == 1
574
- record = next(iter(JSONReader(
575
- json.dumps(normalizer.data, ensure_ascii=False)
576
- )))
563
+ record = normalizer.first
577
564
 
578
565
  def test_680_field_on_existing_record_moved_to_667():
579
566
  """ 680 Should not be added for new, if exists on existing record, should be moved to 667 """
@@ -604,9 +591,7 @@ def test_680_field_on_existing_record_moved_to_667():
604
591
  normalizer = AuthoritiesRecordNormalizer(
605
592
  linking_results=linking_results
606
593
  )
607
- record = next(iter(JSONReader(
608
- json.dumps(normalizer.data, ensure_ascii=False)
609
- )))
594
+ record = normalizer.first
610
595
 
611
596
  fields_680 = record.get_fields("680")
612
597
  assert len(fields_680) == 0
@@ -630,4 +615,43 @@ def test_date_formatting():
630
615
  # invalid date formats - should return empty string
631
616
  invalid_dates = ["abcd", "199A0101"]
632
617
  for date in invalid_dates:
633
- assert normalizer._format_date(date) == ""
618
+ assert normalizer._format_date(date) == ""
619
+
620
+ def test_new_bibrecord_title_included():
621
+ """ normrecord for bibs has to always have the 1XX|t field filled """
622
+
623
+ # Case 1 No linker response, & Viaf record found
624
+ linking_results = [{
625
+ "original_entity": "Lord of the Rings",
626
+ "entity_type": EntityType.TITLE,
627
+ "linked_info": []
628
+ }]
629
+
630
+ normalizer = BibRecordNormalizer(
631
+ linking_results=linking_results,
632
+ )
633
+ data = normalizer.data
634
+ assert len(data) == 1 # should enrich existing record
635
+ record = normalizer.first
636
+
637
+ _validate_new_record_notes(record)
638
+ fields_100 = record.get_fields("100")
639
+ assert len(fields_100) == 1
640
+ assert fields_100[0].get_subfields("t")[0] == "Lord of the rings"
641
+
642
+ # Case 2 - Viaf record not found - should use original entity
643
+ linking_results = [{
644
+ "original_entity": "Roolijoodiku katastroofiline jõulusõit",
645
+ "entity_type": EntityType.TITLE,
646
+ "linked_info": []
647
+ }]
648
+ normalizer = BibRecordNormalizer(
649
+ linking_results=linking_results,
650
+ )
651
+ record = normalizer.first
652
+ data = normalizer.data
653
+ assert len(data) == 1 # should enrich existing record
654
+
655
+ fields_100 = record.get_fields("100")
656
+ assert len(fields_100) == 1
657
+ assert fields_100[0].get_subfields("t")[0] == "Roolijoodiku katastroofiline jõulusõit"
rara_tools-0.7.10/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.7.10
@@ -1,63 +0,0 @@
1
- from pymarc import (Field, Subfield, Record)
2
-
3
- from rara_tools.constants import EMPTY_INDICATORS
4
- from rara_tools.normalizers.viaf import VIAFRecord
5
- from rara_tools.normalizers import RecordNormalizer
6
-
7
- from typing import List
8
-
9
-
10
- class BibRecordNormalizer(RecordNormalizer):
11
- """ Normalize bib records. """
12
-
13
- def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
14
- ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
15
- REPEATABLE_FIELDS: List[str] = ["667"]):
16
- super().__init__(linking_results, sierra_data)
17
- self.DEFAULT_LEADER = "00399nz a2200145n 4500" # must be 24 digits
18
- self.ALLOW_EDIT_FIELDS = ALLOW_EDIT_FIELDS
19
- self.REPEATABLE_FIELDS = REPEATABLE_FIELDS
20
-
21
- self.records_extra_data = []
22
- self.sierra_data = sierra_data
23
- self.records = self._setup_records(linking_results, sierra_data)
24
-
25
- def _normalize_sierra(self, record: Record) -> Record:
26
-
27
- suffix_008 = "|||aznnnaabn || ||| "
28
-
29
- fields = [
30
- Field(
31
- tag="008",
32
- data=f"{self.current_timestamp()}{suffix_008}"
33
- ),
34
- ]
35
-
36
- self._add_fields_to_record(record, fields)
37
-
38
- def _normalize_viaf(self, record: Record, viaf_record: VIAFRecord) -> None:
39
-
40
- if not viaf_record:
41
- return record
42
-
43
- viaf_id = viaf_record.viaf_id
44
- fields = [
45
- Field(
46
- tag="035",
47
- indicators=EMPTY_INDICATORS,
48
- subfields=[
49
- Subfield("a", viaf_id)
50
- ]
51
- )
52
- ]
53
-
54
- self._add_fields_to_record(record, fields)
55
- self._add_author(record, viaf_record)
56
-
57
- def _normalize_record(self, record: Record, sierraID: str,
58
- viaf_record: VIAFRecord, is_editing_existing_record: bool) -> Record:
59
-
60
- self._normalize_sierra(record)
61
- self._normalize_viaf(record, viaf_record)
62
-
63
- return record
File without changes
File without changes
File without changes
File without changes