rara-tools 0.7.8__tar.gz → 0.7.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

Files changed (68) hide show
  1. {rara_tools-0.7.8/rara_tools.egg-info → rara_tools-0.7.10}/PKG-INFO +1 -1
  2. rara_tools-0.7.10/VERSION +1 -0
  3. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/constants/normalizers.py +3 -1
  4. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/normalizers/authorities.py +32 -27
  5. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/normalizers/base.py +116 -88
  6. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/normalizers/bibs.py +14 -27
  7. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/normalizers/reader.py +1 -1
  8. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/normalizers/viaf.py +5 -2
  9. {rara_tools-0.7.8 → rara_tools-0.7.10/rara_tools.egg-info}/PKG-INFO +1 -1
  10. rara_tools-0.7.10/tests/test_normalization.py +633 -0
  11. rara_tools-0.7.8/VERSION +0 -1
  12. rara_tools-0.7.8/tests/test_normalization.py +0 -350
  13. {rara_tools-0.7.8 → rara_tools-0.7.10}/LICENSE.md +0 -0
  14. {rara_tools-0.7.8 → rara_tools-0.7.10}/README.md +0 -0
  15. {rara_tools-0.7.8 → rara_tools-0.7.10}/pyproject.toml +0 -0
  16. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/constants/__init__.py +0 -0
  17. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/constants/digitizer.py +0 -0
  18. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/constants/general.py +0 -0
  19. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/constants/language_evaluator.py +0 -0
  20. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/constants/linker.py +0 -0
  21. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/constants/meta_extractor.py +0 -0
  22. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/constants/parsers.py +0 -0
  23. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/constants/subject_indexer.py +0 -0
  24. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/converters.py +0 -0
  25. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/core_formatters/core_formatter.py +0 -0
  26. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/core_formatters/formatted_keyword.py +0 -0
  27. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/core_formatters/formatted_meta.py +0 -0
  28. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/core_formatters/formatted_object.py +0 -0
  29. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/decorators.py +0 -0
  30. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/digar_schema_converter.py +0 -0
  31. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/elastic.py +0 -0
  32. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/exceptions.py +0 -0
  33. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/normalizers/__init__.py +0 -0
  34. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/marc_parsers/base_parser.py +0 -0
  35. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/marc_parsers/ems_parser.py +0 -0
  36. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/marc_parsers/location_parser.py +0 -0
  37. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/marc_parsers/organization_parser.py +0 -0
  38. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/marc_parsers/person_parser.py +0 -0
  39. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/marc_parsers/title_parser.py +0 -0
  40. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/marc_records/base_record.py +0 -0
  41. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/marc_records/ems_record.py +0 -0
  42. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/marc_records/organization_record.py +0 -0
  43. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/marc_records/person_record.py +0 -0
  44. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/marc_records/title_record.py +0 -0
  45. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/tools/entity_normalizers.py +0 -0
  46. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/tools/marc_converter.py +0 -0
  47. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/parsers/tools/russian_transliterator.py +0 -0
  48. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/s3.py +0 -0
  49. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/task_reporter.py +0 -0
  50. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools/utils.py +0 -0
  51. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools.egg-info/SOURCES.txt +0 -0
  52. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools.egg-info/dependency_links.txt +0 -0
  53. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools.egg-info/requires.txt +0 -0
  54. {rara_tools-0.7.8 → rara_tools-0.7.10}/rara_tools.egg-info/top_level.txt +0 -0
  55. {rara_tools-0.7.8 → rara_tools-0.7.10}/requirements.txt +0 -0
  56. {rara_tools-0.7.8 → rara_tools-0.7.10}/setup.cfg +0 -0
  57. {rara_tools-0.7.8 → rara_tools-0.7.10}/tests/test_digar_schema_converter.py +0 -0
  58. {rara_tools-0.7.8 → rara_tools-0.7.10}/tests/test_elastic.py +0 -0
  59. {rara_tools-0.7.8 → rara_tools-0.7.10}/tests/test_elastic_vector_and_search_operations.py +0 -0
  60. {rara_tools-0.7.8 → rara_tools-0.7.10}/tests/test_entity_normalizers.py +0 -0
  61. {rara_tools-0.7.8 → rara_tools-0.7.10}/tests/test_formatters.py +0 -0
  62. {rara_tools-0.7.8 → rara_tools-0.7.10}/tests/test_marc_parsers.py +0 -0
  63. {rara_tools-0.7.8 → rara_tools-0.7.10}/tests/test_s3_exceptions.py +0 -0
  64. {rara_tools-0.7.8 → rara_tools-0.7.10}/tests/test_s3_file_operations.py +0 -0
  65. {rara_tools-0.7.8 → rara_tools-0.7.10}/tests/test_sierra_converters.py +0 -0
  66. {rara_tools-0.7.8 → rara_tools-0.7.10}/tests/test_task_reporter.py +0 -0
  67. {rara_tools-0.7.8 → rara_tools-0.7.10}/tests/test_utils.py +0 -0
  68. {rara_tools-0.7.8 → rara_tools-0.7.10}/tests/test_viaf_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.7.8
3
+ Version: 0.7.10
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -0,0 +1 @@
1
+ 0.7.10
@@ -1,5 +1,7 @@
1
1
  from pymarc import Indicators
2
- import os
2
+
3
+ YYMMDD_FORMAT = "%y%m%d"
4
+ YY_DD_FORMAT = "%Y-%m"
3
5
 
4
6
  class EntityType:
5
7
  PER = "PER"
@@ -11,13 +11,15 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
11
11
  """ Normalize authorities records """
12
12
 
13
13
  def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
14
- ALLOW_EDIT_FIELDS: List[str] = [
15
- "667", "925", "043"],
16
- REPEATABLE_FIELDS: List[str] = ["024", "035", "400", "670"]):
14
+ ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
15
+ REPEATABLE_FIELDS: List[str] = ["024", "035", "400", "670", "667"]):
17
16
 
18
17
  super().__init__(linking_results, sierra_data)
19
18
  self.ALLOW_EDIT_FIELDS = ALLOW_EDIT_FIELDS
20
19
  self.REPEATABLE_FIELDS = REPEATABLE_FIELDS
20
+ self.records_extra_data = []
21
+ self.sierra_data = sierra_data
22
+ self.records = self._setup_records(linking_results, sierra_data)
21
23
 
22
24
  def _normalize_sierra(self, record: Record, sierraID: str) -> None:
23
25
 
@@ -26,7 +28,6 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
26
28
  fields = [
27
29
  Field(
28
30
  tag="008",
29
- indicators=EMPTY_INDICATORS,
30
31
  data=f"{self.current_timestamp()}{suffix_008}"
31
32
  ),
32
33
 
@@ -50,22 +51,24 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
50
51
  return record
51
52
 
52
53
  def _add_birth_and_death_dates(self, record: Record, viaf_record: VIAFRecord) -> None:
54
+
55
+ formatted_birth_date = self._format_date(viaf_record.birth_date)
56
+ formatted_death_date = self._format_date(viaf_record.death_date) if viaf_record.death_date != 0 else ""
57
+
58
+ birth_date = self.get_subfield(
59
+ record, "046", "f", formatted_birth_date)
60
+ death_date = self.get_subfield(
61
+ record, "046", "g", formatted_death_date)
62
+
53
63
  subfields_046 = [
54
- Subfield("f", self.get_subfield(
55
- record, "046", "f", viaf_record.birth_date)),
56
- Subfield("g", self.get_subfield(
57
- record, "046", "g", viaf_record.death_date)),
58
- Subfield("s", self.get_subfield(
59
- record, "046", "s", viaf_record.activity_start)),
60
- Subfield("t", self.get_subfield(
61
- record, "046", "t", viaf_record.activity_end)),
64
+ Subfield("f", birth_date),
65
+ Subfield("g", death_date),
62
66
  ]
63
67
 
64
68
  self._add_fields_to_record(
65
69
  record, [Field(tag="046", indicators=EMPTY_INDICATORS, subfields=subfields_046)])
66
70
 
67
- def _add_viaf_url_and_isni(self, record: Record, viaf_record: VIAFRecord) -> None:
68
- # TODO 024. will be used to store KRATT KATA ID. Just generate one?
71
+ def _add_viaf_url_or_isni(self, record: Record, viaf_record: VIAFRecord) -> None:
69
72
  viaf_url = f"https://viaf.org/viaf/{viaf_record.viaf_id}"
70
73
 
71
74
  subfields = [Subfield("0", self.get_subfield(
@@ -80,17 +83,20 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
80
83
  self._add_fields_to_record(record, [field])
81
84
 
82
85
  def _add_nationality(self, record: Record, viaf_record: VIAFRecord) -> None:
86
+ """ Non-repeatable field 043 - adds ee only if is estonian nationality and
87
+ the records does not have the field already."""
88
+
89
+ is_person_est = self._is_person_est_nationality(viaf_record)
90
+
91
+ if is_person_est:
92
+ fields = [
93
+ Field(
94
+ tag="043",
95
+ indicators=EMPTY_INDICATORS,
96
+ subfields=[Subfield("c", "ee")])
97
+ ]
83
98
 
84
- fields = [
85
- Field(
86
- tag="043",
87
- indicators=EMPTY_INDICATORS,
88
- subfields=[
89
- Subfield("c", "ee")
90
- ] if self._is_person_est_nationality(viaf_record) else []
91
- )]
92
-
93
- self._add_fields_to_record(record, fields)
99
+ self._add_fields_to_record(record, fields)
94
100
 
95
101
  def _normalize_viaf(self, record: Record, viaf_record: VIAFRecord) -> None:
96
102
  """"
@@ -102,18 +108,17 @@ class AuthoritiesRecordNormalizer(RecordNormalizer):
102
108
  100, 110, 111 - non-repeatable field, attempts to add author type, if missing.
103
109
 
104
110
  """
105
- # TODO: include KRATT KATA ID to 024 and remove on delete. Increment last elastic ID?
106
111
  if not viaf_record:
107
112
  return
108
113
 
109
114
  self._add_nationality(record, viaf_record)
110
- self._add_viaf_url_and_isni(record, viaf_record)
115
+ self._add_viaf_url_or_isni(record, viaf_record)
111
116
  self._add_birth_and_death_dates(record, viaf_record)
112
117
  self._add_author(record, viaf_record)
113
118
 
114
119
  def _normalize_record(self, record: Record, sierraID: str,
115
120
  viaf_record: VIAFRecord, is_editing_existing_record: bool) -> Record:
116
-
121
+
117
122
  self._normalize_sierra(record, sierraID)
118
123
  self._normalize_viaf(record, viaf_record)
119
124
 
@@ -7,9 +7,12 @@ from rara_tools.normalizers.viaf import VIAFRecord, VIAFClient
7
7
  from rara_tools.constants.normalizers import (
8
8
  DEFAULT_VIAF_FIELD, ALLOWED_VIAF_FIELDS, ALLOWED_VIAF_WIKILINK_LANGS,
9
9
  VIAF_SIMILARITY_THRESHOLD, VERIFY_VIAF_RECORD, MAX_VIAF_RECORDS_TO_VERIFY,
10
- EMPTY_INDICATORS
10
+ EMPTY_INDICATORS, YYMMDD_FORMAT, YY_DD_FORMAT
11
11
  )
12
12
  from glom import glom
13
+ from dateutil import parser
14
+ from datetime import date
15
+
13
16
  import logging
14
17
  import json
15
18
 
@@ -18,7 +21,7 @@ logger = logging.getLogger(__name__)
18
21
 
19
22
  class RecordNormalizer:
20
23
  """
21
- Base class. For normalizing different record types corresponding classes have been created.
24
+ Base class for normalizing different record types corresponding classes have been created.
22
25
  By default existing record fields will not be changed, unless included in ALLOW_EDIT_FIELDS. If a field
23
26
  included in the normalization is not present, it will be added to the record. If under REPEATABLE_FIELDS.
24
27
  a new record field is added.
@@ -30,72 +33,15 @@ class RecordNormalizer:
30
33
  """
31
34
 
32
35
  def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
33
- ALLOW_EDIT_FIELDS: List[str] = ["667", "925"], REPEATABLE_FIELDS: List[str] = []):
36
+ ALLOW_EDIT_FIELDS: List[str] = ["925"], REPEATABLE_FIELDS: List[str] = ["667"]):
34
37
 
35
38
  # Include, if will replace existing field
36
39
  self.ALLOW_EDIT_FIELDS = ALLOW_EDIT_FIELDS
37
40
  # include, if should be added alongside existing fields
38
41
  self.REPEATABLE_FIELDS = REPEATABLE_FIELDS
39
- self.records_extra_data = []
40
- self.records = self._setup_records(linking_results, sierra_data)
41
- self.sierra_data = sierra_data
42
-
43
- def _setup_records(self, linking_results: List[dict], sierra_data: List[dict]) -> JSONReader:
44
- """Setup initial MARC records and data.
45
-
46
- For linked entities:
47
- 1. Try to get single linked normalized record from KATA elastic. If more than one found, skip.
48
- 2. If 0 matches, search from VIAF and if 1 result found, create a new authority record from the data.
49
- 3. If none or more than one responses found, use only Classificator data (coming from Linker?).
50
-
51
- for SIERRA records: normalize.
52
- """
53
- linked_records = []
54
-
55
- for linked in linking_results:
56
- entity = linked.get("original_entity")
57
- try:
58
- linked_info = linked.get("linked_info", [])
59
- linked_num = len(linked_info)
60
-
61
- if not linked_info:
62
- # new record will be created
63
- logger.info(
64
- f"No linked entities found for {entity}")
65
- continue
66
-
67
- if linked_num == 1:
68
- linked = linked_info[0]
69
- linked_records.append(linked.get("json", {}))
70
- self.records_extra_data.append({
71
- "entity": entity,
72
- "viaf": linked.get("viaf", {}),
73
- "type": "linked",
74
- "edited": True
75
- })
76
- else:
77
- # new record will be created
78
- logger.info(
79
- f"Multiple linked entities found for {entity}")
80
-
81
- except Exception as e:
82
- logger.error(f"Error processing entity {entity}: {e}")
83
-
84
- self.records_extra_data.extend(
85
- {
86
- "sierraID": obj.get("sierraID"),
87
- "type": "sierra",
88
- "edited": True
89
- }
90
- for obj in (sierra_data or [])
91
- )
92
-
93
- all_records = linked_records + (sierra_data or [])
94
-
95
- return SafeJSONReader(
96
- json.dumps(all_records, ensure_ascii=False),
97
- )
98
-
42
+ # leader applied to new records
43
+ self.DEFAULT_LEADER = "01682nz a2200349n 4500" # must be 24 digits
44
+
99
45
  def _setup_records(self, linking_results: List[dict], sierra_data: List[dict]) -> JSONReader:
100
46
  """Setup initial MARC records and data.
101
47
 
@@ -114,9 +60,34 @@ class RecordNormalizer:
114
60
  linked_info = linked.get("linked_info", [])
115
61
 
116
62
  if not isinstance(linked_info, list) or not linked_info:
63
+ # No linked entities found, create new record
64
+ logger.info(
65
+ f"No linked entities found for {entity}, Creating new record.")
66
+ linked_records.append({
67
+ "leader": self.DEFAULT_LEADER,
68
+ "fields": []
69
+ })
70
+ self.records_extra_data.append({
71
+ "entity": entity,
72
+ "edited": False
73
+ })
117
74
  continue
75
+
76
+ if len(linked_info) > 1:
77
+ # Multiple linked entities found, create new record
78
+ logger.info(
79
+ f"Multiple linked entities found for {entity}. Creating new record.")
80
+ linked_records.append({
81
+ "leader": self.DEFAULT_LEADER,
82
+ "fields": []
83
+ })
84
+ self.records_extra_data.append({
85
+ "entity": entity,
86
+ "edited": False
87
+ })
88
+ continue
118
89
 
119
- if len(linked_info) == 1:
90
+ elif len(linked_info) == 1:
120
91
  linked_item = linked_info[0]
121
92
  if not isinstance(linked_item, dict):
122
93
  continue
@@ -128,7 +99,8 @@ class RecordNormalizer:
128
99
  "type": "linked",
129
100
  "edited": True
130
101
  })
131
-
102
+ continue
103
+
132
104
  self.records_extra_data.extend(
133
105
  {
134
106
  "sierraID": obj.get("sierraID"),
@@ -138,25 +110,25 @@ class RecordNormalizer:
138
110
  for obj in (sierra_data or [])
139
111
  if isinstance(obj, dict)
140
112
  )
141
-
113
+
142
114
  all_records = linked_records + (sierra_data or [])
143
-
115
+
144
116
  return SafeJSONReader(json.dumps(all_records, ensure_ascii=False))
145
117
 
146
118
  @staticmethod
147
119
  def current_timestamp():
148
- """6 digit timestamp."""
149
- return datetime.now().strftime("%H%M%S")
120
+ """6 digit timestamp, format YYMMDD"""
121
+ return datetime.now().strftime(YYMMDD_FORMAT)
150
122
 
151
123
  @staticmethod
152
124
  def current_yyyy_dd():
153
125
  """format of 2025-03"""
154
- return datetime.now().strftime("%Y-%m")
126
+ return datetime.now().strftime(YY_DD_FORMAT)
155
127
 
156
128
  @staticmethod
157
129
  def _is_person_est_nationality(viaf_record: VIAFRecord) -> bool:
158
- return viaf_record.nationality == "ee"
159
-
130
+ return hasattr(viaf_record, 'nationality') and viaf_record.nationality == "ee"
131
+
160
132
  def _is_nxx(self, field: Field, n: str):
161
133
  """ Check if fields tag is in nxx range. """
162
134
  return field.tag.startswith(n)
@@ -173,6 +145,27 @@ class RecordNormalizer:
173
145
  def _filter_equivalent_field_not_in_record(self, record: Record, fields: List[Field]) -> bool:
174
146
  """ filter out fields, that do not have an equivalent in the record. """
175
147
  return filter(lambda field: not self._field_in_record(field, record), fields)
148
+
149
+ def _format_date(self, value: str) -> str:
150
+
151
+ if not value:
152
+ return ""
153
+
154
+ if isinstance(value, (datetime, date)):
155
+ return value.strftime("%Y%m%d")
156
+
157
+ val = str(value).strip()
158
+
159
+ try:
160
+ dt = parser.parse(val, fuzzy=False, default=datetime(1, 1, 1))
161
+ except Exception:
162
+ return ""
163
+
164
+ if len(val) == 4 and val.isdigit():
165
+ return dt.strftime("%Y") # YYYY
166
+ if len(val) in (6, 7): # YYYYMM or YYYY-MM
167
+ return dt.strftime("%Y%m") # YYYYMM
168
+ return dt.strftime("%Y%m%d") # YYYYMMDD
176
169
 
177
170
  def get_subfield(self, record: Record, tag: str, subfield: str, default: str) -> str:
178
171
  """ get record existing subfield value or assign a fallback value. """
@@ -220,7 +213,10 @@ class RecordNormalizer:
220
213
  )
221
214
 
222
215
  def _add_fields_to_record(self, record: Record, fields: List[Field]) -> Record:
223
-
216
+ # filter out subfields that are empty, or 0, as VIAF returns 0 for unknown dates
217
+ for field in fields:
218
+ field.subfields = [sub for sub in field.subfields if sub.value and sub.value not in ["0", 0]]
219
+
224
220
  self._handle_repeatable_fields(record, *fields)
225
221
  self._handle_editable_fields(record, *fields)
226
222
  self._handle_default_fields(record, *fields)
@@ -247,31 +243,63 @@ class RecordNormalizer:
247
243
  indicators=EMPTY_INDICATORS,
248
244
  subfields=[
249
245
  Subfield("a", viaf_record.name),
250
- Subfield("b", viaf_record.name_type), # Is this correct??
251
- Subfield("c", viaf_record.name_type) # Is this correct??
252
246
  ]
253
247
  )
254
248
  ]
255
249
 
256
250
  self._add_fields_to_record(record, fields)
251
+
252
+ def _move680_fields_to_667(self, record: Record) -> None:
253
+ """ Move existing 680 fields to 667, if any. """
254
+ fields_680 = record.get_fields("680")
255
+ if not fields_680:
256
+ return
257
+
258
+ fields_667 = [
259
+ Field(
260
+ tag="667",
261
+ indicators=EMPTY_INDICATORS,
262
+ subfields=field.subfields
263
+ ) for field in fields_680
264
+ ]
257
265
 
258
- def _normalize_common(self, record: Record, is_editing_existing_record: bool) -> None:
259
- """Common logic for all normalizations. """
266
+ record.remove_fields("680")
267
+ self._add_fields_to_record(record, fields_667)
260
268
 
269
+ def _normalize_common(self, record: Record, is_editing_existing_record: bool) -> None:
270
+ """Common logic for all normalizations.
271
+ - Includes note about record being created/edited.
272
+ - include date note with a different subfield, depending on if record is new or edited.
273
+ - move existing 680 fields to 667
274
+ """
275
+ # before adding new notes
276
+ self._move680_fields_to_667(record)
277
+
261
278
  note = "Muudetud AI poolt" if is_editing_existing_record else "Loodud AI poolt"
262
279
  date_note = f"KRATT {self.current_yyyy_dd()}"
263
280
 
264
- fields = [
265
- Field(tag="667",
266
- indicators=EMPTY_INDICATORS,
267
- subfields=[Subfield("a", note)]),
268
- Field(tag="925",
269
- indicators=EMPTY_INDICATORS,
270
- subfields=[Subfield("t", self.get_subfield(record, "925", "t", date_note))
271
- ] + ([Subfield("p", self.get_subfield(record, "925", "p", date_note))]
272
- if is_editing_existing_record else []))
273
- ]
274
-
281
+ field_667 = Field(tag="667",
282
+ indicators=EMPTY_INDICATORS,
283
+ subfields=[Subfield("a", note)])
284
+
285
+ fields = [field_667]
286
+
287
+ if is_editing_existing_record:
288
+ field_925 = Field(tag="925",
289
+ indicators=EMPTY_INDICATORS,
290
+ subfields=[
291
+ Subfield("p", self.get_subfield(record, "925", "p", date_note))
292
+ ])
293
+ fields.append(field_925)
294
+
295
+ else:
296
+ field_925 = Field(tag="925",
297
+ indicators=EMPTY_INDICATORS,
298
+ subfields=[
299
+ Subfield("t", self.get_subfield(record, "925", "t", date_note))
300
+ ])
301
+ fields.append(field_925)
302
+
275
303
  self._add_fields_to_record(record, fields)
276
304
 
277
305
  return record
@@ -1,4 +1,4 @@
1
- from pymarc import (Field, Indicators, Subfield, Record)
1
+ from pymarc import (Field, Subfield, Record)
2
2
 
3
3
  from rara_tools.constants import EMPTY_INDICATORS
4
4
  from rara_tools.normalizers.viaf import VIAFRecord
@@ -11,32 +11,25 @@ class BibRecordNormalizer(RecordNormalizer):
11
11
  """ Normalize bib records. """
12
12
 
13
13
  def __init__(self, linking_results: List[dict] = [], sierra_data: List[dict] = [],
14
- ALLOW_EDIT_FIELDS: List[str] = ["667", "925"],
15
- REPEATABLE_FIELDS: List[str] = []):
14
+ ALLOW_EDIT_FIELDS: List[str] = ["008", "925"],
15
+ REPEATABLE_FIELDS: List[str] = ["667"]):
16
16
  super().__init__(linking_results, sierra_data)
17
+ self.DEFAULT_LEADER = "00399nz a2200145n 4500" # must be 24 digits
17
18
  self.ALLOW_EDIT_FIELDS = ALLOW_EDIT_FIELDS
18
19
  self.REPEATABLE_FIELDS = REPEATABLE_FIELDS
20
+
21
+ self.records_extra_data = []
22
+ self.sierra_data = sierra_data
23
+ self.records = self._setup_records(linking_results, sierra_data)
19
24
 
20
25
  def _normalize_sierra(self, record: Record) -> Record:
26
+
27
+ suffix_008 = "|||aznnnaabn || ||| "
28
+
21
29
  fields = [
22
30
  Field(
23
31
  tag="008",
24
- indicators=EMPTY_INDICATORS,
25
- data=f"{self.current_timestamp()} | | | aznnnaabn | | | | |"
26
- ),
27
- Field(
28
- tag="046",
29
- indicators=EMPTY_INDICATORS,
30
- subfields=[
31
- Subfield("k", "Pub date")
32
- ]
33
- ),
34
- Field(
35
- tag="245",
36
- indicators=Indicators("1", "0"),
37
- subfields=[
38
- Subfield("a", "Title")
39
- ]
32
+ data=f"{self.current_timestamp()}{suffix_008}"
40
33
  ),
41
34
  ]
42
35
 
@@ -55,14 +48,8 @@ class BibRecordNormalizer(RecordNormalizer):
55
48
  subfields=[
56
49
  Subfield("a", viaf_id)
57
50
  ]
58
- ),
59
- Field(
60
- tag="100",
61
- indicators=EMPTY_INDICATORS,
62
- subfields=[
63
- Subfield("a", "?")
64
- ]
65
- )]
51
+ )
52
+ ]
66
53
 
67
54
  self._add_fields_to_record(record, fields)
68
55
  self._add_author(record, viaf_record)
@@ -3,7 +3,7 @@ import logging
3
3
 
4
4
  logger = logging.getLogger(__name__)
5
5
 
6
- DEFAULT_LEADER = '01682nz a2200349n 4500'
6
+ DEFAULT_LEADER = "01682nz a2200349n 4500" # must be 24 digits
7
7
 
8
8
  class SafeJSONReader(JSONReader):
9
9
 
@@ -10,6 +10,7 @@ from rara_tools.constants.normalizers import (
10
10
  DEFAULT_VIAF_FIELD, ALLOWED_VIAF_FIELDS, ALLOWED_VIAF_WIKILINK_LANGS,
11
11
  VIAF_SIMILARITY_THRESHOLD, VIAF_ALLOWED_SOURCES
12
12
  )
13
+ from glom import glom
13
14
 
14
15
  import logging
15
16
  logger = logging.getLogger(__name__)
@@ -598,11 +599,13 @@ class VIAFClient:
598
599
  """
599
600
  logger.debug("Extracting VIAF IDs from VIAF search query results.")
600
601
  try:
601
- records = search_query_response.json()["queryResult"]["records"]["record"]
602
+ res_json = search_query_response.json()
603
+ records = glom(res_json, "queryResult.records.record", default=[])
604
+
602
605
  except Exception as e:
603
606
  logger.error(
604
607
  f"Parsing records from search query " \
605
- f"response failed with error: {e}."
608
+ f"failed with error: {e}."
606
609
  )
607
610
  records = []
608
611
  viaf_ids = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.7.8
3
+ Version: 0.7.10
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10