rara-tools 0.6.16__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of rara-tools might be problematic. Click here for more details.

Files changed (69) hide show
  1. {rara_tools-0.6.16/rara_tools.egg-info → rara_tools-0.7.0}/PKG-INFO +1 -1
  2. rara_tools-0.7.0/VERSION +1 -0
  3. {rara_tools-0.6.16 → rara_tools-0.7.0}/pyproject.toml +2 -1
  4. rara_tools-0.7.0/rara_tools/constants/linker.py +136 -0
  5. rara_tools-0.7.0/rara_tools/core_formatters/core_formatter.py +86 -0
  6. rara_tools-0.7.0/rara_tools/core_formatters/formatted_keyword.py +229 -0
  7. rara_tools-0.7.0/rara_tools/core_formatters/formatted_meta.py +154 -0
  8. rara_tools-0.7.0/rara_tools/core_formatters/formatted_object.py +137 -0
  9. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/normalizers/viaf.py +1 -0
  10. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/utils.py +43 -21
  11. {rara_tools-0.6.16 → rara_tools-0.7.0/rara_tools.egg-info}/PKG-INFO +1 -1
  12. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools.egg-info/SOURCES.txt +4 -1
  13. rara_tools-0.7.0/tests/test_formatters.py +41 -0
  14. rara_tools-0.6.16/VERSION +0 -1
  15. rara_tools-0.6.16/rara_tools/constants/linker.py +0 -22
  16. rara_tools-0.6.16/rara_tools/formatters.py +0 -106
  17. rara_tools-0.6.16/tests/test_formatters.py +0 -15
  18. {rara_tools-0.6.16 → rara_tools-0.7.0}/LICENSE.md +0 -0
  19. {rara_tools-0.6.16 → rara_tools-0.7.0}/README.md +0 -0
  20. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/constants/__init__.py +0 -0
  21. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/constants/digitizer.py +0 -0
  22. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/constants/general.py +0 -0
  23. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/constants/language_evaluator.py +0 -0
  24. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/constants/meta_extractor.py +0 -0
  25. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/constants/normalizers.py +0 -0
  26. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/constants/parsers.py +0 -0
  27. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/constants/subject_indexer.py +0 -0
  28. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/converters.py +0 -0
  29. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/decorators.py +0 -0
  30. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/digar_schema_converter.py +0 -0
  31. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/elastic.py +0 -0
  32. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/exceptions.py +0 -0
  33. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/normalizers/__init__.py +0 -0
  34. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/normalizers/authorities.py +0 -0
  35. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/normalizers/base.py +0 -0
  36. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/normalizers/bibs.py +0 -0
  37. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/marc_parsers/base_parser.py +0 -0
  38. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/marc_parsers/ems_parser.py +0 -0
  39. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/marc_parsers/location_parser.py +0 -0
  40. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/marc_parsers/organization_parser.py +0 -0
  41. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/marc_parsers/person_parser.py +0 -0
  42. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/marc_parsers/title_parser.py +0 -0
  43. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/marc_records/base_record.py +0 -0
  44. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/marc_records/ems_record.py +0 -0
  45. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/marc_records/organization_record.py +0 -0
  46. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/marc_records/person_record.py +0 -0
  47. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/marc_records/title_record.py +0 -0
  48. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/tools/entity_normalizers.py +0 -0
  49. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/tools/marc_converter.py +0 -0
  50. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/parsers/tools/russian_transliterator.py +0 -0
  51. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/s3.py +0 -0
  52. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools/task_reporter.py +0 -0
  53. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools.egg-info/dependency_links.txt +0 -0
  54. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools.egg-info/requires.txt +0 -0
  55. {rara_tools-0.6.16 → rara_tools-0.7.0}/rara_tools.egg-info/top_level.txt +0 -0
  56. {rara_tools-0.6.16 → rara_tools-0.7.0}/requirements.txt +0 -0
  57. {rara_tools-0.6.16 → rara_tools-0.7.0}/setup.cfg +0 -0
  58. {rara_tools-0.6.16 → rara_tools-0.7.0}/tests/test_digar_schema_converter.py +0 -0
  59. {rara_tools-0.6.16 → rara_tools-0.7.0}/tests/test_elastic.py +0 -0
  60. {rara_tools-0.6.16 → rara_tools-0.7.0}/tests/test_elastic_vector_and_search_operations.py +0 -0
  61. {rara_tools-0.6.16 → rara_tools-0.7.0}/tests/test_entity_normalizers.py +0 -0
  62. {rara_tools-0.6.16 → rara_tools-0.7.0}/tests/test_marc_parsers.py +0 -0
  63. {rara_tools-0.6.16 → rara_tools-0.7.0}/tests/test_normalization.py +0 -0
  64. {rara_tools-0.6.16 → rara_tools-0.7.0}/tests/test_s3_exceptions.py +0 -0
  65. {rara_tools-0.6.16 → rara_tools-0.7.0}/tests/test_s3_file_operations.py +0 -0
  66. {rara_tools-0.6.16 → rara_tools-0.7.0}/tests/test_sierra_converters.py +0 -0
  67. {rara_tools-0.6.16 → rara_tools-0.7.0}/tests/test_task_reporter.py +0 -0
  68. {rara_tools-0.6.16 → rara_tools-0.7.0}/tests/test_utils.py +0 -0
  69. {rara_tools-0.6.16 → rara_tools-0.7.0}/tests/test_viaf_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rara-tools
3
- Version: 0.6.16
3
+ Version: 0.7.0
4
4
  Summary: Tools to support Kata's work.
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -0,0 +1 @@
1
+ 0.7.0
@@ -14,7 +14,8 @@ include = [
14
14
  "rara_tools.parsers",
15
15
  "rara_tools.parsers.marc_parsers",
16
16
  "rara_tools.parsers.marc_records",
17
- "rara_tools.parsers.tools"
17
+ "rara_tools.parsers.tools",
18
+ "rara_tools.core_formatters"
18
19
  ]
19
20
 
20
21
  [project]
@@ -0,0 +1,136 @@
1
+ import logging
2
+ from rara_tools.constants.normalizers import EntityType, VIAF_ENTITY_MAP
3
+
4
+ COMPONENT_KEY = "linker"
5
+
6
+
7
+ class Tasks:
8
+ BASE = "base_linker_task"
9
+ VECTORIZE = "vectorize_text"
10
+ VECTORIZE_WITH_CORE = "vectorize_text_with_core_logic"
11
+ PIPELINE = "link_keywords_with_core_logic"
12
+
13
+ LINK_AND_NORMALIZE = "core_linker_with_normalization"
14
+ VECTORIZE_AND_INDEX = "core_vectorize_and_index"
15
+ RECEIVE_LINK_AND_NORMALIZE = "receive_link_and_normalize"
16
+
17
+
18
+ class Queue:
19
+ LINKER = "linker"
20
+ VECTORIZER = "vectorizer"
21
+
22
+
23
+ class StatusKeys:
24
+ VECTORIZE_CONTEXT = "vectorize_context"
25
+ LINK_KEYWORDS = "link_keywords"
26
+
27
+ class URLSource:
28
+ VIAF = "VIAF"
29
+ SIERRA = "Sierra"
30
+ EMS = "EMS"
31
+
32
+ class KeywordType:
33
+ LOC = "Kohamärksõnad"
34
+ TIME = "Ajamärksõnad"
35
+ TOPIC = "Teemamärksõnad"
36
+ GENRE = "Vormimärksõnad"
37
+ TITLE = "Teose pealkiri"
38
+ PER = "Isikunimi"
39
+ ORG = "Kollektiivi nimi"
40
+ EVENT = "Ajutine kollektiiv või sündmus"
41
+ CATEGORY = "Valdkonnamärksõnad"
42
+ UDC = "UDC Summary"
43
+ UDK = "UDK Rahvusbibliograafia"
44
+
45
+
46
+ class KeywordMARC:
47
+ PER = 600
48
+ ORG = 610
49
+ TOPIC = 650
50
+ GENRE = 655
51
+ TIME = 648
52
+ LOC = 651
53
+ EVENT = 611
54
+ TITLE = 630
55
+ TITLE_LINKED = 600
56
+
57
+ class KeywordSource:
58
+ EMS = "EMS"
59
+ SIERRA = "SIERRA"
60
+ VIAF = "VIAF"
61
+ AI = "AI"
62
+
63
+ class Filters:
64
+ AUTHOR = "author"
65
+ YEAR = "year"
66
+
67
+
68
+ UNLINKED_KEYWORD_MARC_FIELD = 693
69
+
70
+ ALLOWED_FILTERS_MAP = {
71
+ EntityType.PER: [Filters.YEAR],
72
+ EntityType.ORG: [Filters.YEAR],
73
+ EntityType.TITLE: [Filters.YEAR, Filters.AUTHOR],
74
+ EntityType.KEYWORD: [],
75
+ EntityType.LOC: []
76
+ }
77
+ KEYWORD_MARC_MAP = {
78
+ KeywordType.LOC: KeywordMARC.LOC,
79
+ KeywordType.TIME: KeywordMARC.TIME,
80
+ KeywordType.TOPIC: KeywordMARC.TOPIC,
81
+ KeywordType.GENRE: KeywordMARC.GENRE,
82
+ KeywordType.TITLE: KeywordMARC.TITLE,
83
+ KeywordType.ORG: KeywordMARC.ORG,
84
+ KeywordType.PER: KeywordMARC.PER,
85
+ KeywordType.EVENT: KeywordMARC.EVENT
86
+ }
87
+
88
+ URL_SOURCE_MAP = {
89
+ EntityType.PER: URLSource.VIAF,
90
+ EntityType.ORG: URLSource.VIAF,
91
+ EntityType.TITLE: URLSource.VIAF,
92
+ EntityType.KEYWORD: URLSource.EMS,
93
+ EntityType.LOC: URLSource.EMS
94
+ }
95
+
96
+ # Ignore those "keyword types" while linking the
97
+ # rara-subject-indexer results
98
+ KEYWORD_TYPES_TO_IGNORE = [
99
+ KeywordType.CATEGORY,
100
+ KeywordType.UDC,
101
+ KeywordType.UDK
102
+ ]
103
+
104
+ ALLOWED_ENTITY_TYPES = [
105
+ EntityType.PER,
106
+ EntityType.ORG,
107
+ EntityType.KEYWORD,
108
+ EntityType.LOC,
109
+ EntityType.TITLE,
110
+ EntityType.UNK,
111
+ ]
112
+
113
+
114
+ KEYWORD_TYPE_MAP = {
115
+ KeywordType.TIME: EntityType.KEYWORD,
116
+ KeywordType.GENRE: EntityType.KEYWORD,
117
+ KeywordType.LOC: EntityType.LOC,
118
+ KeywordType.PER: EntityType.PER,
119
+ KeywordType.ORG: EntityType.ORG,
120
+ KeywordType.TOPIC: EntityType.KEYWORD,
121
+ KeywordType.TITLE: EntityType.TITLE,
122
+ KeywordType.EVENT: EntityType.ORG
123
+ }
124
+
125
+ EMS_ENTITY_TYPES = [EntityType.KEYWORD, EntityType.LOC]
126
+ SIERRA_ENTITY_TYPES = [EntityType.PER, EntityType.ORG, EntityType.TITLE]
127
+ VIAF_ENTITY_TYPES = [EntityType.PER, EntityType.ORG, EntityType.TITLE]
128
+
129
+ # Params for filters
130
+ MIN_AUTHOR_SIMILARITY = 0.95
131
+ YEAR_EXCEPTION_VALUE = True
132
+
133
+ LOGGER_NAME = "rara-tools-norm-linker"
134
+ LOGGER = logging.getLogger(LOGGER_NAME)
135
+
136
+ MAIN_TAXONOMY_LANG = "et"
@@ -0,0 +1,86 @@
1
+ from typing import List, Tuple, Any
2
+ from rara_tools.core_formatters.formatted_keyword import FormattedKeyword
3
+ from rara_tools.core_formatters.formatted_meta import FormattedAuthor
4
+ from rara_tools.constants.linker import MAIN_TAXONOMY_LANG, KEYWORD_TYPES_TO_IGNORE, EntityType
5
+
6
+ def get_primary_author(authors: List[dict]) -> str:
7
+ primary_author = ""
8
+ for author in authors:
9
+ if author.get("is_primary", False):
10
+ primary_author = author.get("name", "")
11
+ return primary_author
12
+
13
+ def format_authors(authors: List[dict]) -> List[dict]:
14
+ formatted_authors = []
15
+ for author in authors:
16
+ entity_type = author.get("type", EntityType.UNK)
17
+
18
+ formatted_author = FormattedAuthor(
19
+ object_dict=author,
20
+ linked_doc=None,
21
+ entity_type=entity_type
22
+ ).to_dict()
23
+ formatted_authors.append(formatted_author)
24
+ return formatted_authors
25
+
26
+ def format_sections(sections: List[dict]) -> List[dict]:
27
+ for section in sections:
28
+ authors = section.pop("authors", [])
29
+ titles = section.pop("titles", [])
30
+ primary_author = get_primary_author(authors)
31
+ if primary_author:
32
+ for title in titles:
33
+ title["author_from_title"] = primary_author
34
+ section["titles"] = titles
35
+
36
+ formatted_authors = format_authors(authors)
37
+ section["authors"] = formatted_authors
38
+
39
+ return sections
40
+
41
+ def format_meta(meta: dict) -> dict:
42
+ """ Formats unlinked meta for Kata CORE.
43
+ """
44
+
45
+ meta_to_format = meta.get("meta")
46
+
47
+ authors = meta_to_format.pop("authors", [])
48
+ sections = meta_to_format.pop("sections", [])
49
+
50
+ formatted_authors = format_authors(authors)
51
+ formatted_sections = format_sections(sections)
52
+
53
+ if sections and formatted_sections:
54
+ meta_to_format["sections"] = formatted_sections
55
+ if authors and formatted_authors:
56
+ meta_to_format["authors"] = formatted_authors
57
+
58
+ meta["meta"] = meta_to_format
59
+
60
+ return meta
61
+
62
+
63
+ def format_keywords(flat_keywords: List[dict]) -> List[dict]:
64
+ """ Formats unlinked keywords for Kata CORE.
65
+ """
66
+ ignored_keywords = []
67
+ filtered_keywords = []
68
+
69
+ for keyword_dict in flat_keywords:
70
+ keyword_type = keyword_dict.get("entity_type")
71
+ if keyword_type in KEYWORD_TYPES_TO_IGNORE:
72
+ ignored_keywords.append(keyword_dict)
73
+ else:
74
+ filtered_keywords.append(keyword_dict)
75
+
76
+ formatted_keywords = []
77
+
78
+ for keyword_dict in filtered_keywords:
79
+ formatted_keyword = FormattedKeyword(
80
+ object_dict=keyword_dict,
81
+ linked_doc=None,
82
+ main_taxnomy_lang=MAIN_TAXONOMY_LANG
83
+ ).to_dict()
84
+ formatted_keywords.append(formatted_keyword)
85
+
86
+ return formatted_keywords
@@ -0,0 +1,229 @@
1
+ from rara_tools.constants.linker import (
2
+ LOGGER, URLSource, KeywordSource, EntityType, KeywordType, KeywordMARC,
3
+ KEYWORD_MARC_MAP, KEYWORD_TYPES_TO_IGNORE, KEYWORD_TYPE_MAP,
4
+ EMS_ENTITY_TYPES, SIERRA_ENTITY_TYPES, UNLINKED_KEYWORD_MARC_FIELD,
5
+ URL_SOURCE_MAP
6
+ )
7
+ from rara_tools.core_formatters.formatted_object import FormattedObject
8
+ from typing import List, Dict, NoReturn, Tuple, Any
9
+
10
+ class FormattedKeyword(FormattedObject):
11
+ def __init__(self, object_dict: dict, linked_doc: Any,
12
+ main_taxnomy_lang: str, url_source_map: str = URL_SOURCE_MAP
13
+ ) -> NoReturn:
14
+ super().__init__(
15
+ object_dict=object_dict,
16
+ linked_doc=linked_doc,
17
+ original_entity_key="keyword"
18
+ )
19
+
20
+ self.main_taxnomy_lang: str = main_taxnomy_lang
21
+
22
+ self.original_keyword: str = self.original_entity
23
+ self.score: float = self.object_dict.get("score")
24
+ self.count: int = self.object_dict.get("count")
25
+ self.method: str = self.object_dict.get("method")
26
+ self.model_arch: str = self.object_dict.get("model_arch", self.method)
27
+ self.keyword_type: str = self.object_dict.get("entity_type")
28
+
29
+ self.entity_type: str = KEYWORD_TYPE_MAP.get(self.keyword_type, "")
30
+ self.url_source_map: dict = url_source_map
31
+
32
+ self.__keyword_source: str = ""
33
+ self.__indicator_1: str = ""
34
+ self.__indicator_2: str = ""
35
+ self.__url: str | None = None
36
+ self.__url_source: str | None = None
37
+ self.__marc_field: str = ""
38
+
39
+ self.__language: str = ""
40
+ self.__author: str | None = None
41
+
42
+ @property
43
+ def keyword(self) -> str:
44
+ return self.entity
45
+
46
+ @property
47
+ def keyword_source(self) -> str:
48
+ if not self.__keyword_source:
49
+ if not self.is_linked:
50
+ source = KeywordSource.AI
51
+ elif self.entity_type in EMS_ENTITY_TYPES:
52
+ source = KeywordSource.EMS
53
+ elif self.entity_type in SIERRA_ENTITY_TYPES:
54
+ if self.linked_doc and self.linked_doc.elastic:
55
+ source = KeywordSource.SIERRA
56
+ elif self.linked_doc and self.linked_doc.viaf:
57
+ source = KeywordSource.VIAF
58
+ else:
59
+ source = KeywordSource.AI
60
+ else:
61
+ source = KeywordSource.AI
62
+ self.__keyword_source = source
63
+ return self.__keyword_source
64
+
65
+ @property
66
+ def indicator1(self) -> str:
67
+ if not self.__indicator_1:
68
+ ind1, ind2 = self._get_indicators()
69
+ self.__indicator_1 = ind1
70
+ self.__indicator_2 = ind2
71
+ return self.__indicator_1
72
+
73
+ @property
74
+ def indicator2(self) -> str:
75
+ if not self.__indicator_2:
76
+ ind1, ind2 = self._get_indicators()
77
+ self.__indicator_1 = ind1
78
+ self.__indicator_2 = ind2
79
+ return self.__indicator_2
80
+
81
+ @property
82
+ def url(self) -> str:
83
+ if self.__url == None:
84
+ url_info = self._get_url_info()
85
+ self.__url = url_info.get("url")
86
+ self.__url_source = url_info.get("url_source")
87
+ return self.__url
88
+
89
+ @property
90
+ def url_source(self) -> str:
91
+ if self.__url_source == None:
92
+ url_info = self._get_url_info()
93
+ self.__url = url_info.get("url")
94
+ self.__url_source = url_info.get("url_source")
95
+ return self.__url_source
96
+
97
+ @property
98
+ def marc_field(self) -> int:
99
+ if not self.__marc_field:
100
+ # TODO: teoste + isikute loogika!!!!
101
+ if self.is_linked:
102
+ marc_field = KEYWORD_MARC_MAP.get(str(self.keyword_type), "")
103
+ else:
104
+ marc_field = UNLINKED_KEYWORD_MARC_FIELD
105
+
106
+ if self.entity_type == EntityType.TITLE:
107
+ if self.author:
108
+ marc_field = KeywordMARC.TITLE_LINKED
109
+ else:
110
+ marc_field = KeywordMARC.TITLE
111
+ self.__marc_field = marc_field
112
+ return self.__marc_field
113
+
114
+
115
+ @property
116
+ def persons_title(self) -> str:
117
+ return self.titles
118
+
119
+
120
+ @property
121
+ def language(self) -> str:
122
+ if not self.__language:
123
+ if self.is_linked:
124
+ self.__language = self.main_taxnomy_lang
125
+ else:
126
+ self.__language = self.object_dict.get("language", "")
127
+ return self.__language
128
+
129
+ @property
130
+ def author(self) -> str:
131
+ # Only relevant for titles!
132
+ if self.__author == None:
133
+ self.__author = ""
134
+ if self.entity_type == EntityType.TITLE:
135
+ if self.original_record:
136
+ self.__author = self.original_record.author_name
137
+ elif self.viaf_info:
138
+ pass
139
+ #self.__author = self.viaf_info.get
140
+ return self.__author
141
+
142
+ def _get_url_info(self) -> dict:
143
+ """ Finds URL identifier from LinkedDoc based on
144
+ given entity type.
145
+
146
+ Parameters
147
+ -----------
148
+ linked_doc: LinkedDoc | None
149
+ A LinkedDoc class instance.
150
+ entity_type: str
151
+ Entity type for detecting correct URL source.
152
+
153
+ Returns
154
+ ----------
155
+ dict:
156
+ Dictionary with keys `url` - URL identifier and
157
+ `url_source` - source of the URL (e.g. "EMS").
158
+
159
+ """
160
+ url_source = self.url_source_map.get(self.entity_type, "")
161
+ url = ""
162
+
163
+ if self.linked_doc:
164
+ if url_source == URLSource.EMS:
165
+ url = self.linked_doc.elastic.get("ems_url", "")
166
+ elif url_source == URLSource.VIAF:
167
+ url = self.viaf_info.get("viaf_url", "")
168
+ if not url:
169
+ url_source = ""
170
+
171
+ url_info = {"url": url, "url_source": url_source}
172
+
173
+ LOGGER.debug(
174
+ f"Detected URL info: {url_info}. Used entity_type = {self.entity_type}. " \
175
+ f"URL source map = {self.url_source_map}."
176
+ )
177
+ return url_info
178
+
179
+ def _get_indicators(self) -> Tuple[str, str]:
180
+ """ Find MARC indicators 1 and 2.
181
+ """
182
+ ind1 = " "
183
+ ind2 = " "
184
+ if self.entity_type in SIERRA_ENTITY_TYPES:
185
+ if self.entity_type == EntityType.PER:
186
+ if "," in self.keyword:
187
+ ind1 = "1"
188
+ else:
189
+ ind1 = "0"
190
+ elif self.entity_type == EntityType.ORG:
191
+ # 1 märksõna esimeseks elemendiks võimupiirkonna nimi, nt:
192
+ # (a) Eesti (b) Riigikogu - raske automaatselt määrata
193
+ # 2 märksõna esimeseks elemendiks nimi pärijärjestuses
194
+ ind1 = "2"
195
+ else:
196
+ ind1 = "0"
197
+
198
+ if not self.is_linked:
199
+ ind2 = "4"
200
+ elif self.entity_type in EMS_ENTITY_TYPES:
201
+ ind2 = "4"
202
+ return (ind1, ind2)
203
+
204
+
205
+ def to_dict(self) -> dict:
206
+ keyword_dict = {
207
+ "count": self.count,
208
+ "dates": self.dates,
209
+ "entity_type": self.keyword_type,
210
+ "indicator1": self.indicator1,
211
+ "indicator2": self.indicator2,
212
+ "is_linked": self.is_linked,
213
+ "keyword": self.keyword,
214
+ "keyword_source": self.keyword_source,
215
+ "lang": self.language,
216
+ "location": self.location,
217
+ "marc_field": self.marc_field,
218
+ "method": self.method,
219
+ "model_arch": self.model_arch,
220
+ "numeration": self.numeration,
221
+ "organisation_sub_unit": self.organisation_sub_unit,
222
+ "original_keyword": self.original_keyword,
223
+ "persons_title": self.persons_title,
224
+ "score": self.score,
225
+ "url": self.url,
226
+ "url_source": self.url_source,
227
+ "author": self.author
228
+ }
229
+ return keyword_dict
@@ -0,0 +1,154 @@
1
+ from rara_tools.constants.linker import (
2
+ LOGGER, EntityType
3
+ )
4
+ from rara_tools.core_formatters.formatted_object import FormattedObject
5
+ from typing import List, Dict, NoReturn, Tuple, Any
6
+
7
+
8
+ class FormattedTitle(FormattedObject):
9
+ # TODO: Kas seda on üldse vaja?
10
+ def __init__(self, object_dict: dict, linked_doc: Any):
11
+ super().__init__(
12
+ object_dict=object_dict,
13
+ linked_doc=linked_doc,
14
+ original_entity_key="name"
15
+ )
16
+
17
+
18
+ class FormattedAuthor(FormattedObject):
19
+ def __init__(self, object_dict: dict, linked_doc: Any, entity_type: str):
20
+ super().__init__(
21
+ object_dict=object_dict,
22
+ linked_doc=linked_doc,
23
+ original_entity_key="name"
24
+ )
25
+ self.entity_type: str = entity_type
26
+
27
+ self.is_linked: bool = True if self.linked_doc else False # NB! Lisada andmebaasi uus veerg!
28
+ self.original_name: str = self.original_entity # NB! Lisada andmebaasi uus veerg
29
+ self.author_role: str = self.object_dict.get("role")
30
+ self.is_primary: bool = self.object_dict.get("is_primary")
31
+
32
+ self.__primary_author_type: str = None
33
+
34
+ self.__name_order_type: str = ""
35
+ self.__event_sub_unit: str = ""
36
+ self.__order_number: str = ""
37
+ self.__sub_title: str = ""
38
+ self.__additional_info: str = ""
39
+ self.__publication_type: str = ""
40
+ self.__publication_language: str = ""
41
+ #self.__standardized_uri: str = ""
42
+ self.__viaf_id: str = ""
43
+
44
+
45
+ @property
46
+ def primary_author_type(self) -> str:
47
+ if self.__primary_author_type == None:
48
+ if self.is_primary:
49
+ if self.entity_type != EntityType.UNK:
50
+ self.__primary_author_type = self.entity_type
51
+ else:
52
+ self.__primary_author_type = EntityType.PER
53
+ else:
54
+ self.__primary_author_type = ""
55
+ return self.__primary_author_type
56
+
57
+
58
+ @property
59
+ def name(self) -> str:
60
+ return self.entity
61
+
62
+ @property
63
+ def name_order(self) -> str:
64
+ if not self.__name_order_type:
65
+ if self.entity_type == EntityType.PER or self.entity_type == EntityType.UNK:
66
+ if "," in self.name:
67
+ ind1 = "1"
68
+ else:
69
+ ind1 = "0"
70
+ elif self.entity_type == EntityType.ORG:
71
+ #LOGGER.debug(f"Entity type {self.entity_type} is not {EntityType.PER}.")
72
+ # 1 märksõna esimeseks elemendiks võimupiirkonna nimi, nt:
73
+ # (a) Eesti (b) Riigikogu - raske automaatselt määrata
74
+ # 2 märksõna esimeseks elemendiks nimi pärijärjestuses
75
+ ind1 = "2" #????????
76
+ else:
77
+ ind1 = "0"
78
+ self.__name_order_type = ind1
79
+ return self.__name_order_type
80
+
81
+ @property
82
+ def event_sub_unit(self) -> str:
83
+ if not self.__event_sub_unit:
84
+ self.__event_sub_unit = ""
85
+ return self.__event_sub_unit
86
+
87
+
88
+ @property
89
+ def order_number(self) -> str:
90
+ if not self.__order_number:
91
+ self.__order_number = ""
92
+ return self.__order_number
93
+
94
+ @property
95
+ def sub_title(self) -> str:
96
+ if not self.__sub_title:
97
+ self.__sub_title = ""
98
+ return self.__sub_title
99
+
100
+ @property
101
+ def additional_info(self) -> str:
102
+ if not self.__additional_info:
103
+ self.__additional_info = ""
104
+ return self.__additional_info
105
+
106
+ @property
107
+ def publication_type(self) -> str:
108
+ if not self.__publication_type:
109
+ self.__publication_type = ""
110
+ return self.__publication_type
111
+
112
+ @property
113
+ def publication_language(self) -> str:
114
+ if not self.__publication_language:
115
+ self.__publication_language = ""
116
+ return self.__publication_language
117
+
118
+ @property
119
+ def standardized_uri(self) -> str:
120
+ return self.identifier
121
+
122
+ @property
123
+ def viaf_id(self):
124
+ if not self.__viaf_id:
125
+ if self.viaf_info:
126
+ self.__viaf_id = self.viaf_info.get("viaf_url", "")
127
+ else:
128
+ self.__viaf_id = ""
129
+ return self.__viaf_id
130
+
131
+ def to_dict(self):
132
+ author_dict = {
133
+ "is_linked": self.is_linked,
134
+ "original_name": self.original_name,
135
+ "author_role": self.author_role,
136
+ "is_primary": self.is_primary,
137
+ "primary_author_type": self.primary_author_type,
138
+ "name": self.name,
139
+ "numeration": self.numeration,
140
+ "organisation_sub_unit": self.organisation_sub_unit,
141
+ "titles": self.titles,
142
+ "location": self.location,
143
+ "dates": self.dates,
144
+ "name_order_type": self.name_order,
145
+ "event_sub_unit": self.event_sub_unit,
146
+ "order_number": self.order_number,
147
+ "sub_title": self.sub_title,
148
+ "additional_info": self.additional_info,
149
+ "publication_type": self.publication_type,
150
+ "publication_language": self.publication_language,
151
+ "standardized_uri": self.standardized_uri,
152
+ "viaf_id": self.viaf_id
153
+ }
154
+ return author_dict