rara-tools 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

@@ -35,6 +35,12 @@ class PersonMarcIDs:
35
35
  SOURCE = ["670"]
36
36
  DESCRIPTION = ["680"]
37
37
 
38
+ class TitleMarcIDs:
39
+ NAME = ["100"]
40
+ NAME_VARIATIONS = ["400"]
41
+ YEAR = ["046"]
42
+ TYPE = ["075"]
43
+
38
44
  class KeywordType:
39
45
  LOC = "Kohamärksõnad"
40
46
  TIME = "Ajamärksõnad"
rara_tools/converters.py CHANGED
@@ -14,9 +14,46 @@ class SierraResponseConverter:
14
14
  def _map_control_fields(field: dict) -> dict:
15
15
  # for tags < 010, no subfields, instead one str value in "value"
16
16
  return {field["tag"]: field["value"]}
17
+
18
+ def _map_data_field_subfields(self, subfields: list) -> list:
19
+ """ Maps data field subfields to the standardised format, if necessary.
17
20
 
18
- @staticmethod
19
- def _map_data_fields(field: dict) -> dict:
21
+ Args:
22
+ subfields (list): List of subfields in format e.g
23
+ "subfields": [
24
+ {
25
+ "code": "a",
26
+ "data": "foo"
27
+ },
28
+ {
29
+ "code": "c",
30
+ "data": "by me."
31
+ }
32
+ ]
33
+
34
+ Returns:
35
+ list: standardised marc-in-json format. e.g
36
+ "subfields": [
37
+ { "a": "foo /" },
38
+ { "c": "by me." }
39
+ ]
40
+ """
41
+ result = []
42
+ for subfield in subfields:
43
+
44
+ code = subfield.get("code")
45
+ data = subfield.get("data")
46
+
47
+ if not code or not data:
48
+ # assume that the subfield is already in the correct format
49
+ return subfields
50
+ else:
51
+ result.append({code: data})
52
+
53
+ return result
54
+
55
+
56
+ def _map_data_fields(self, field: dict) -> dict:
20
57
  """ Maps marc fields > 010.
21
58
 
22
59
  Args:
@@ -27,12 +64,13 @@ class SierraResponseConverter:
27
64
  """
28
65
 
29
66
  data = field["data"]
67
+ subfields = data.get("subfields", [])
30
68
 
31
69
  # Order matters ind1, in2, subfields
32
70
  field_data = {
33
71
  "ind1": data.get("ind1", " "),
34
72
  "ind2": data.get("ind2", " "),
35
- "subfields": data.get("subfields", [])
73
+ "subfields": self._map_data_field_subfields(subfields)
36
74
  }
37
75
 
38
76
  return {field["tag"]: field_data}
@@ -41,7 +79,7 @@ class SierraResponseConverter:
41
79
  def _is_marc21structured(field: dict) -> bool:
42
80
  """Checks if the field is already structured according to MARC21 in JSON"""
43
81
  return any(key.isdigit() for key in field.keys())
44
-
82
+
45
83
  def _handle_field_type(self, field: dict) -> dict:
46
84
 
47
85
  if self._is_marc21structured(field):
@@ -1 +1,44 @@
1
- # Coming soon
1
+ from typing import NoReturn
2
+ from collections.abc import Iterator
3
+ from rara_tools.parsers.marc_parsers.base_parser import BaseMARCParser
4
+ from rara_tools.parsers.marc_records.title_record import TitleRecord
5
+ from rara_tools.constants.parsers import LOGGER
6
+
7
+
8
+ class TitlesMARCParser(BaseMARCParser):
9
+ """ MARC parser for titles' .mrc files.
10
+ """
11
+ def __init__(self,
12
+ marc_file_path: str,
13
+ add_variations: bool = True
14
+ ) -> NoReturn:
15
+ """ Initializes OrganizationsMARCParser object.
16
+
17
+ Parameters
18
+ -----------
19
+ marc_file_path: str
20
+ Full path to .mrc file containing titles' data.
21
+ add_variations: bool
22
+ If enabled, constructs an additional variations field, which
23
+ combines the content of multiple fields + adds some generated
24
+ variations. If the output is uploaded into Elastic and used
25
+ via rara-norm-linker, it is necessary to enable this.
26
+ """
27
+ super().__init__(
28
+ marc_file_path=marc_file_path,
29
+ add_variations=add_variations
30
+ )
31
+
32
+ def record_generator(self) -> Iterator[TitleRecord]:
33
+ """ Generates TitleRecord objects.
34
+ """
35
+ LOGGER.info(
36
+ f"Generating title records from " \
37
+ f"MARC dump '{self.marc_file_path}'."
38
+ )
39
+ for record in self.marc_record_generator():
40
+ title_record = TitleRecord(
41
+ record=record,
42
+ add_variations=self.add_variations
43
+ )
44
+ yield title_record.full_record
@@ -61,7 +61,7 @@ class BaseRecord:
61
61
  if isinstance(subfield_id, str):
62
62
  if _subfield_id == subfield_id:
63
63
  value = subfield[_subfield_id]
64
- values.append(value)
64
+ values.append(value.strip())
65
65
  elif isinstance(subfield_id, list):
66
66
  if _subfield_id in subfield_id:
67
67
  value = subfield[_subfield_id]
@@ -1 +1,196 @@
1
- # Coming soon
1
+ from typing import List, NoReturn
2
+ from pymarc.record import Record
3
+ from rara_tools.parsers.marc_records.base_record import BaseRecord
4
+ from rara_tools.constants.parsers import TitleMarcIDs, LOGGER
5
+ import regex as re
6
+ import json
7
+ import logging
8
+
9
+
10
+ class TitleRecord(BaseRecord):
11
+ """ Generates a simplified title JSON record
12
+ from a pymarc MARC record.
13
+ """
14
+ def __init__(self, record: Record, add_variations: bool = False) -> NoReturn:
15
+ """ Initializes TitleRecord object.
16
+
17
+ Parameters
18
+ -----------
19
+ record: Record
20
+ pymarc.record.Record object.
21
+ add_variations: bool
22
+ If enabled, constructs an additional variations field, which
23
+ combines the content of multiple fields + adds some generated
24
+ variations. If the output is uploaded into Elastic and used
25
+ via rara-norm-linker, it is necessary to enable this.
26
+ """
27
+ super().__init__(record=record, add_variations=add_variations)
28
+
29
+ self.__name_field_id: List[str] = TitleMarcIDs.NAME
30
+ self.__name_variations_field_id: List[str]= TitleMarcIDs.NAME_VARIATIONS
31
+ self.__year_field_id: List[str] = TitleMarcIDs.YEAR
32
+ self.__type_field_id: List[str] = TitleMarcIDs.TYPE
33
+
34
+ self.__default_year: int | None = None
35
+
36
+ self.__name: str = ""
37
+ self.__author_original_name: dict = {}
38
+ self.__author_name: str = ""
39
+ self.__year: int = -1
40
+ self.__type: str = ""
41
+
42
+ self.__author_life_years: str = ""
43
+ self.__author_birth_year: int = -1
44
+ self.__author_death_year: int = -1
45
+ self.__name_variations: List[str] = []
46
+ self.__full_record: dict = {}
47
+
48
+
49
+ def _parse_year(self, year: str) -> int:
50
+ year = year.strip()
51
+ _year = self.__default_year
52
+ if len(year) >= 4:
53
+ if year[:4].isnumeric():
54
+ _year = int(year[:4])
55
+ elif len(year) == 3 and year.isnumeric():
56
+ _year = int(year)
57
+ return _year
58
+
59
+ @property
60
+ def name(self) -> str:
61
+ if not self.__name:
62
+ values = self.get_values(
63
+ marc_ids=self.__name_field_id,
64
+ subfield_id="t"
65
+ )
66
+ if values:
67
+ self.__name = self._clean_value(values[0])
68
+ else:
69
+ pass
70
+ return self.__name
71
+
72
+
73
+ @property
74
+ def year(self) -> str:
75
+ if self.__year == -1:
76
+ values = self.get_values(
77
+ marc_ids=self.__year_field_id,
78
+ subfield_id="k"
79
+ )
80
+ self.__year = self._clean_value(values[0]) if values else None
81
+ return self.__year
82
+
83
+ @property
84
+ def type(self) -> str:
85
+ if not self.__type:
86
+ values = self.get_values(
87
+ marc_ids = self.__type_field_id,
88
+ subfield_id="a"
89
+ )
90
+ self.__type = self._clean_value(values[0]) if values else ""
91
+ return self.__type
92
+
93
+ @property
94
+ def author_original_name(self) -> str:
95
+ if not self.__author_original_name:
96
+ values = self.get_values(
97
+ marc_ids=self.__name_field_id,
98
+ subfield_id=["a", "b"]
99
+ )
100
+ if values:
101
+ self.__author_original_name = {
102
+ "a": self._clean_value(values[0].get("a", "")),
103
+ "b": self._clean_value(values[0].get("b", ""))
104
+ }
105
+ else:
106
+ pass
107
+ return self.__author_original_name
108
+
109
+ @property
110
+ def author_name(self) -> str:
111
+ if not self.__author_name:
112
+ self.__author_name = self._merge_and_clean(self.author_original_name, ["a", "b"])
113
+ return self.__author_name
114
+
115
+ @property
116
+ def author_life_years(self) -> str:
117
+ if not self.__author_life_years:
118
+ values = self.get_values(
119
+ marc_ids = self.__name_field_id,
120
+ subfield_id="d"
121
+ )
122
+ self.__author_life_years = self._clean_value(values[0]) if values else ""
123
+ return self.__author_life_years
124
+
125
+
126
+ @property
127
+ def author_birth_year(self) -> int:
128
+ if self.__author_birth_year == -1:
129
+ try:
130
+ birth_year, death_year = self.author_life_years.split("-")
131
+ self.__author_birth_year = self._parse_year(birth_year)
132
+ self.__author_death_year = self._parse_year(death_year)
133
+ except Exception as e:
134
+ LOGGER.error(
135
+ f"Failed extracting birth and/or death year " \
136
+ f"from '{self.author_life_years}' with the following " \
137
+ f"exception: '{e}'."
138
+ )
139
+ return self.__author_birth_year
140
+
141
+
142
+ @property
143
+ def author_death_year(self) -> int:
144
+ if self.__author_death_year == -1:
145
+ try:
146
+ birth_year, death_year = self.author_life_years.split("-")
147
+ self.__author_birth_year = self._parse_year(birth_year)
148
+ self.__author_death_year = self._parse_year(death_year)
149
+ except Exception as e:
150
+ LOGGER.error(
151
+ f"Failed extracting birth and/or death year " \
152
+ f"from '{self.author_life_years}' with the following " \
153
+ f"exception: '{e}'."
154
+ )
155
+ return self.__author_death_year
156
+
157
+ @property
158
+ def name_variations(self) -> List[str]:
159
+ if not self.__name_variations:
160
+ values = self.get_values(
161
+ marc_ids=self.__name_variations_field_id,
162
+ subfield_id="t"
163
+ )
164
+ variations = [self.name]
165
+ if values:
166
+ _variations = [
167
+ self._clean_value(value)
168
+ for value in values
169
+ ]
170
+ variations.extend(_variations)
171
+ variations = [v.lower() for v in variations]
172
+ self.__name_variations = list(set(variations))
173
+ return self.__name_variations
174
+
175
+
176
+ @property
177
+ def full_record(self) -> dict:
178
+ if not self.__full_record:
179
+ self.__full_record = {
180
+ "name": self.name,
181
+ "author_name": self.author_name,
182
+ "year": self.year,
183
+ "type": self.type,
184
+ "author_life_years": self.author_life_years,
185
+ "author_birth_year": self.author_birth_year,
186
+ "auhtor_death_year": self.author_death_year,
187
+ "name_variations": self.name_variations,
188
+ "full_record_marc": str(self.marc_record),
189
+ "full_record_json": json.dumps(self.marc_json_record)
190
+ }
191
+ if self.add_variations:
192
+ self.__full_record.update(
193
+ {"link_variations": self.name_variations}
194
+ )
195
+
196
+ return self.__full_record
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.4.3
3
+ Version: 0.5.0
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -1,4 +1,4 @@
1
- rara_tools/converters.py,sha256=_1ZRH4ACLOolI1G5b_aSssN68rWOvan-q2dTq7D7-j4,2794
1
+ rara_tools/converters.py,sha256=a1dEMa0TwcO9UmjuSBkiuc7LGmH0d_dB6wwoTLpdZhI,4040
2
2
  rara_tools/decorators.py,sha256=MjOyvZ5nTkwxwx2JLFEGpKKBysvecFw6EN6UDrSvZLU,2187
3
3
  rara_tools/digar_schema_converter.py,sha256=k95U2iRlEA3sh772-v6snhHW6fju6qSTMnvWJ6DpzZk,14254
4
4
  rara_tools/elastic.py,sha256=dw61Z6SxhItNqN35m_3UBy41ppRMhBQLqYriZRo6zGA,13513
@@ -11,7 +11,7 @@ rara_tools/constants/digitizer.py,sha256=MND0dUQySBAOVWzuUBxQGZWv_Ckdz2jCp25F2_o
11
11
  rara_tools/constants/general.py,sha256=aVUQTMss89atAkTDZKJXNdnsBHPX-RSrlBOtt-wdPGU,195
12
12
  rara_tools/constants/linker.py,sha256=XUI-fD1LfvpdMDeLmMU3siAsc0pleQ92m6Cdk3_OGmo,169
13
13
  rara_tools/constants/normalizers.py,sha256=GmWY89kYfX7_YJ8sdy1vb8ABJc_ABdw_zVVOxd9UZgY,171
14
- rara_tools/constants/parsers.py,sha256=stXOyA1dEOgxdCUT4Mp4pvvGLmdE7DAjTe8Jq71tcS4,5453
14
+ rara_tools/constants/parsers.py,sha256=L6nh1Itget9_9DMsliDkh6T25z78eMFPWVkbaU08DwU,5561
15
15
  rara_tools/normalizers/__init__.py,sha256=_NqpS5w710DhaURytHq9JpEt8HgYpSPfRDcOtOymJgE,193
16
16
  rara_tools/normalizers/authorities.py,sha256=IDtcm0yNZNhv1f-WcdqWFSRzZk_CoKuBFsk6hEPddWM,4513
17
17
  rara_tools/normalizers/base.py,sha256=taOboGURQF_ACPVWHX_wMsaDEo8gYdAkiOw0yT0zzR8,10910
@@ -22,17 +22,17 @@ rara_tools/parsers/marc_parsers/ems_parser.py,sha256=LFuhZcVwmHMcJknX9p4ZkO8RdjP
22
22
  rara_tools/parsers/marc_parsers/location_parser.py,sha256=dSU9dQoGV5z0ajhLI1bn3AAghkOr79qKIrX7sO0_4lA,1873
23
23
  rara_tools/parsers/marc_parsers/organization_parser.py,sha256=faqQEYsut_ZF3kX1QycTnbRIqC7W8sULxmG75ICfya8,1629
24
24
  rara_tools/parsers/marc_parsers/person_parser.py,sha256=iMycHSlgfvgB0axE_rneB5sImVlc920FcBnTsUsmVW4,1582
25
- rara_tools/parsers/marc_parsers/title_parser.py,sha256=0FnX1kl9InELlSqMGECjswEbhP-sKl55TuhV05RhWSw,14
26
- rara_tools/parsers/marc_records/base_record.py,sha256=oDp4yjPMEmSD3F_dWIdx7IRtZfKwD7ydMFUW9YXAhSQ,4322
25
+ rara_tools/parsers/marc_parsers/title_parser.py,sha256=uZiYb_aZWzv_xLEBSZmFt2vN6UIauNSFRCkNG_ZKL10,1570
26
+ rara_tools/parsers/marc_records/base_record.py,sha256=05XW1oQ5fCJWxBpmBFwGVGLChGE0P605HNUdvXGiif8,4330
27
27
  rara_tools/parsers/marc_records/ems_record.py,sha256=B2YZLEeDd-GmmYqxhczbMsSEB7-x6ZLjB8OeDnzOxww,9376
28
28
  rara_tools/parsers/marc_records/organization_record.py,sha256=HmDqAqAL_Tw7ppEsS5HfogrfNuQMNChCkrdPu6K-SUE,9141
29
29
  rara_tools/parsers/marc_records/person_record.py,sha256=BZrXqd7hCOqm-c-sjmsOfaAI4L7lLSjIUWtxHqPjhTs,7863
30
- rara_tools/parsers/marc_records/title_record.py,sha256=0FnX1kl9InELlSqMGECjswEbhP-sKl55TuhV05RhWSw,14
30
+ rara_tools/parsers/marc_records/title_record.py,sha256=NyrubWvouZEb46vaoy9NHLCznobz5avqaNbQCeIEuxI,6851
31
31
  rara_tools/parsers/tools/entity_normalizers.py,sha256=afOMqJoL4aeq0cfsohIuxkxzvqNdZ_ba7U32eyogbzk,8722
32
32
  rara_tools/parsers/tools/marc_converter.py,sha256=PUbggzJ_wHfke_bHTF2LOZyzX1t0wRM8qIFL36Dl3AI,414
33
33
  rara_tools/parsers/tools/russian_transliterator.py,sha256=5ZU66iTqAhr7pmfVqXPAI_cidF43VqqmuN4d7H4_JuA,9770
34
- rara_tools-0.4.3.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
35
- rara_tools-0.4.3.dist-info/METADATA,sha256=5cvQf4-fk2OhU1uF7xYg-UhIWHYma9MfCB_7d8XwsJ0,4054
36
- rara_tools-0.4.3.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
37
- rara_tools-0.4.3.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
38
- rara_tools-0.4.3.dist-info/RECORD,,
34
+ rara_tools-0.5.0.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
35
+ rara_tools-0.5.0.dist-info/METADATA,sha256=BluQzPlq1P9dpwdskMNDEPYtZwbAsbLQbbYybU4YktI,4054
36
+ rara_tools-0.5.0.dist-info/WHEEL,sha256=0CuiUZ_p9E4cD6NyLD6UG80LBXYyiSYZOKDm5lp32xk,91
37
+ rara_tools-0.5.0.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
38
+ rara_tools-0.5.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (79.0.0)
2
+ Generator: setuptools (80.3.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5