rara-tools 0.7.16__py3-none-any.whl → 0.7.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

@@ -77,7 +77,7 @@ class PageSchema:
77
77
  self.__schema = {
78
78
  "@type": "CreativeWork", # CONSTANT for pages
79
79
  "@id": self.page_id,
80
- "hasPart": []
80
+ "dcterms:hasPart": []
81
81
  }
82
82
  text_schemas = [
83
83
  TextPageSchema(page).schema
@@ -91,7 +91,7 @@ class PageSchema:
91
91
  page_schemas = text_schemas + image_schemas
92
92
  page_schemas_with_ids = self._add_segment_ids(page_schemas)
93
93
 
94
- self.__schema["hasPart"].extend(page_schemas_with_ids)
94
+ self.__schema["dcterms:hasPart"].extend(page_schemas_with_ids)
95
95
 
96
96
  return self.__schema
97
97
 
@@ -3,6 +3,8 @@ from pymarc import (Field, Subfield, JSONReader, Record)
3
3
  from typing import List, Optional, Iterator
4
4
  from rara_tools.normalizers.reader import SafeJSONReader
5
5
 
6
+ from rara_tools.parsers.tools.validators import filter_names
7
+
6
8
  from rara_tools.normalizers.viaf import VIAFRecord, VIAFClient
7
9
  from rara_tools.constants.normalizers import (
8
10
  DEFAULT_VIAF_FIELD, ALLOWED_VIAF_FIELDS, ALLOWED_VIAF_WIKILINK_LANGS,
@@ -311,25 +313,34 @@ class RecordNormalizer:
311
313
  if viaf_record:
312
314
  self._include_name_variations(record, viaf_record)
313
315
 
314
- def _include_name_variations(self, record: Record, viaf_record: VIAFRecord) -> None:
316
+ def _include_name_variations(self, record: Record, viaf_record: VIAFRecord, filter_variations=True) -> None:
315
317
  """ Include name variations from VIAF record as 400|t fields """
316
318
 
317
319
  if not viaf_record or not viaf_record.name_variations:
318
320
  return
319
321
 
320
322
  existing_name_variations = record.get_fields("400")
321
- existing_variations = [sf.value for field in existing_name_variations for sf in field.get_subfields("t")]
323
+ existing_variations = [sf.value for field in existing_name_variations for sf in field.get_subfields("a")]
324
+
325
+ if filter_variations:
326
+ allowed_variations = filter_names(viaf_record.name_variations)
327
+ logger.debug(
328
+ f"filtered out {len(viaf_record.name_variations) - len(allowed_variations)} name variations for '{viaf_record.name}'"
329
+ )
330
+
331
+ else:
332
+ allowed_variations = viaf_record.name_variations
322
333
 
323
334
  fields = []
324
335
 
325
- for variation in viaf_record.name_variations:
336
+ for variation in allowed_variations:
326
337
  if variation not in existing_variations:
327
338
  fields.append(
328
339
  Field(
329
340
  tag="400",
330
341
  indicators=EMPTY_INDICATORS,
331
342
  subfields=[
332
- Subfield("t", variation)
343
+ Subfield("a", variation)
333
344
  ]
334
345
  )
335
346
  )
@@ -73,26 +73,13 @@ class BibRecordNormalizer(RecordNormalizer):
73
73
 
74
74
 
75
75
  def _normalize_viaf(self, record: Record, viaf_record: VIAFRecord, original_entity: str) -> None:
76
-
77
76
  if not viaf_record:
78
77
  # viaf record not found, include original entity as 100|t
79
78
  self._add_author(record, viaf_record=None, original_entity=original_entity)
80
79
  return record
81
-
82
- viaf_id = viaf_record.viaf_id
83
- fields = [
84
- Field(
85
- tag="035",
86
- indicators=EMPTY_INDICATORS,
87
- subfields=[
88
- Subfield("a", viaf_id)
89
- ]
90
- )
91
- ]
92
-
93
- self._add_fields_to_record(record, fields)
80
+
94
81
  self._add_author(record, viaf_record, original_entity=original_entity)
95
-
82
+
96
83
  def _normalize_record(self, record: Record, sierraID: str,
97
84
  viaf_record: VIAFRecord, is_editing_existing_record: bool, original_entity: str) -> Record:
98
85
 
@@ -0,0 +1,54 @@
1
+ import regex as re
2
+ from typing import List
3
+
4
+ def has_valid_chars(entity: str, allow_cyrillic: bool = True) -> bool:
5
+ """ Checks if entity contains any valid characters in latin
6
+ or in cyrillic, if the latter is enabled
7
+
8
+ Parameters
9
+ ------------
10
+ entity: str
11
+ String to validate.
12
+ allow_cyrillic: bool
13
+ Allow strings in cyrillic?
14
+
15
+ Returns
16
+ ------------
17
+ bool
18
+ Boolean value indicating, if the string
19
+ contains any valid characters.
20
+
21
+ """
22
+ # Check for latin characters
23
+ is_valid = bool(re.search(r"[a-züõöäA-ZÜÕÖÄ]", entity))
24
+
25
+ if allow_cyrillic and not is_valid:
26
+ # If cyrillic characters are allowed,
27
+ # check for them as well
28
+ is_valid = bool(re.search(r"[а-яА-Я]", entity))
29
+
30
+ return is_valid
31
+
32
+
33
+ def filter_names(names: List[str], allow_cyrillic: bool = True) -> List[str]:
34
+ """ Filters out names not in allowed encodings (latin / cyrillic).
35
+
36
+ Parameters
37
+ ------------
38
+ names: List[str]
39
+ Names to filters.
40
+ allow_cyrillic: bool
41
+ Allow strings in cyrillic?
42
+
43
+ Returns
44
+ ------------
45
+ List[str]
46
+ List of filtered names.
47
+
48
+ """
49
+ filtered_names = [
50
+ name for name in names
51
+ if has_valid_chars(entity=name, allow_cyrillic=allow_cyrillic)
52
+ ]
53
+ return filtered_names
54
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.7.16
3
+ Version: 0.7.18
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -1,6 +1,6 @@
1
1
  rara_tools/converters.py,sha256=a1dEMa0TwcO9UmjuSBkiuc7LGmH0d_dB6wwoTLpdZhI,4040
2
2
  rara_tools/decorators.py,sha256=MjOyvZ5nTkwxwx2JLFEGpKKBysvecFw6EN6UDrSvZLU,2187
3
- rara_tools/digar_schema_converter.py,sha256=usrNwlbN63wTE5U56vbmyzT_SxGLXO6ZF4JwY3Lnkqg,15061
3
+ rara_tools/digar_schema_converter.py,sha256=wd6QeSxC1nfiH5tDogfNl0zO1VnS5IiPZ5Y2UIrjOL4,15077
4
4
  rara_tools/elastic.py,sha256=4D9yoyMy6AJIKwhSi2H1usffDHAh2A_IZfv5BtYnBKg,13992
5
5
  rara_tools/exceptions.py,sha256=YQyaueUbXeTkJYFDEuN6iWTXMI3eCv5l7PxGp87vg5I,550
6
6
  rara_tools/s3.py,sha256=9ziDXsLjBtFAvsjTPxFddhfvkpA8773rzPJqO7y1N5Q,6415
@@ -21,8 +21,8 @@ rara_tools/core_formatters/formatted_meta.py,sha256=WEnMs8K0YeTLGjXn_mxQTpshxcz5
21
21
  rara_tools/core_formatters/formatted_object.py,sha256=7a499ZmcZXOqtlwxDi6FWHWF5a6HdCsduS22wV3uHIE,5656
22
22
  rara_tools/normalizers/__init__.py,sha256=_NqpS5w710DhaURytHq9JpEt8HgYpSPfRDcOtOymJgE,193
23
23
  rara_tools/normalizers/authorities.py,sha256=iW3cYOqqVJKy4CcnG9_T6dN-1bBT1e-0jtLYvco-MyQ,5311
24
- rara_tools/normalizers/base.py,sha256=DhMicY5p_N2SC_E3lbWUvSM77AOy_pBjQpbLSvYWDxM,20488
25
- rara_tools/normalizers/bibs.py,sha256=s8NGoieCjiftASUb--1YvYZ0VzW6uBt2ZidhLi_wP9A,3938
24
+ rara_tools/normalizers/base.py,sha256=tw64ZK7KXg9O2IPMxICMogYHAG6il10qQqCd4fIjQL0,20941
25
+ rara_tools/normalizers/bibs.py,sha256=5pOw8RsQ4eDwbREbYySeI_b7dQyGlJnfMRSS-tWGJ9c,3632
26
26
  rara_tools/normalizers/reader.py,sha256=GYCkAtnsNx135w5lD-_MqCZzdHQHHPDF-pDxYj839Vo,1595
27
27
  rara_tools/normalizers/viaf.py,sha256=C-NfbvL83ZcHVB9ICMw43wAMYKTqDTHU3ZT2mXKec00,24288
28
28
  rara_tools/parsers/marc_parsers/base_parser.py,sha256=Kdw4aivJf2FkWgIK7pJtHtVXF_G1pjHVQ7IcFItSqy8,1649
@@ -39,8 +39,9 @@ rara_tools/parsers/marc_records/title_record.py,sha256=XrtJ4gj7wzSaGxNaPtPuawmqq
39
39
  rara_tools/parsers/tools/entity_normalizers.py,sha256=VyCy_NowCLpOsL0luQ55IW-Qi-J5oBH0Ofzr7HRFBhM,8949
40
40
  rara_tools/parsers/tools/marc_converter.py,sha256=LgSHe-7n7aiDrw2bnsB53r3fXTRFjZXTwBYfTpL0pfs,415
41
41
  rara_tools/parsers/tools/russian_transliterator.py,sha256=5ZU66iTqAhr7pmfVqXPAI_cidF43VqqmuN4d7H4_JuA,9770
42
- rara_tools-0.7.16.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
43
- rara_tools-0.7.16.dist-info/METADATA,sha256=yBmOUFVy7V6RyZdN_qBGkY7M8zH7H7h7v_QLQeZ3bAM,4080
44
- rara_tools-0.7.16.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
45
- rara_tools-0.7.16.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
46
- rara_tools-0.7.16.dist-info/RECORD,,
42
+ rara_tools/parsers/tools/validators.py,sha256=JTGbfAWcLldlZrX0nb343P9RJ8QwSh3455fYap3UxxY,1335
43
+ rara_tools-0.7.18.dist-info/licenses/LICENSE.md,sha256=hkZVnIZll7e_KNEQzeY94Y9tlzVL8iVZBTMBvDykksU,35142
44
+ rara_tools-0.7.18.dist-info/METADATA,sha256=9AR_e8-yNVW_qp6Iaxp0IP2_HxV_NU87DE_I2GQOuJg,4080
45
+ rara_tools-0.7.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
46
+ rara_tools-0.7.18.dist-info/top_level.txt,sha256=JwfB5b8BAtW5OFKRln2AQ_WElTRyIBM4nO0FKN1cupY,11
47
+ rara_tools-0.7.18.dist-info/RECORD,,