arekit 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. arekit/common/docs/entities_grouping.py +2 -1
  2. arekit/common/docs/parser.py +52 -20
  3. arekit/common/pipeline/base.py +12 -16
  4. arekit/common/pipeline/batching.py +28 -0
  5. arekit/common/pipeline/context.py +5 -1
  6. arekit/common/pipeline/items/base.py +38 -1
  7. arekit/common/pipeline/items/flatten.py +5 -1
  8. arekit/common/pipeline/items/handle.py +2 -1
  9. arekit/common/pipeline/items/iter.py +2 -1
  10. arekit/common/pipeline/items/map.py +2 -1
  11. arekit/common/pipeline/items/map_nested.py +4 -0
  12. arekit/common/pipeline/utils.py +32 -0
  13. arekit/common/service/sqlite.py +36 -0
  14. arekit/common/text/{partitioning/str.py → partitioning.py} +14 -9
  15. arekit/common/utils.py +0 -44
  16. arekit/contrib/utils/data/contents/opinions.py +13 -3
  17. arekit/contrib/utils/data/readers/sqlite.py +14 -0
  18. arekit/contrib/utils/data/storages/row_cache.py +6 -1
  19. arekit/contrib/utils/data/storages/sqlite_based.py +17 -0
  20. arekit/contrib/utils/data/writers/sqlite_native.py +4 -0
  21. arekit/contrib/utils/io_utils/utils.py +1 -18
  22. arekit/contrib/utils/pipelines/items/sampling/base.py +7 -12
  23. arekit/contrib/utils/pipelines/items/sampling/networks.py +3 -2
  24. arekit/contrib/utils/pipelines/items/text/entities_default.py +2 -2
  25. arekit/contrib/utils/pipelines/items/text/frames.py +2 -3
  26. arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +2 -2
  27. arekit/contrib/utils/pipelines/items/text/frames_negation.py +2 -1
  28. arekit/contrib/utils/pipelines/items/text/tokenizer.py +2 -4
  29. arekit/contrib/utils/pipelines/items/text/translator.py +2 -1
  30. arekit/contrib/utils/pipelines/text_opinion/extraction.py +6 -9
  31. arekit/contrib/utils/serializer.py +1 -2
  32. arekit-0.25.0.data/data/logo.png +0 -0
  33. arekit-0.25.0.dist-info/METADATA +82 -0
  34. {arekit-0.24.0.dist-info → arekit-0.25.0.dist-info}/RECORD +38 -153
  35. {arekit-0.24.0.dist-info → arekit-0.25.0.dist-info}/WHEEL +1 -1
  36. arekit/common/docs/objects_parser.py +0 -37
  37. arekit/common/text/parser.py +0 -12
  38. arekit/common/text/partitioning/base.py +0 -4
  39. arekit/common/text/partitioning/terms.py +0 -35
  40. arekit/contrib/source/__init__.py +0 -0
  41. arekit/contrib/source/brat/__init__.py +0 -0
  42. arekit/contrib/source/brat/annot.py +0 -84
  43. arekit/contrib/source/brat/doc.py +0 -28
  44. arekit/contrib/source/brat/entities/__init__.py +0 -0
  45. arekit/contrib/source/brat/entities/compound.py +0 -13
  46. arekit/contrib/source/brat/entities/entity.py +0 -42
  47. arekit/contrib/source/brat/entities/parser.py +0 -53
  48. arekit/contrib/source/brat/opinions/__init__.py +0 -0
  49. arekit/contrib/source/brat/opinions/converter.py +0 -19
  50. arekit/contrib/source/brat/relation.py +0 -32
  51. arekit/contrib/source/brat/sentence.py +0 -69
  52. arekit/contrib/source/brat/sentences_reader.py +0 -128
  53. arekit/contrib/source/download.py +0 -41
  54. arekit/contrib/source/nerel/__init__.py +0 -0
  55. arekit/contrib/source/nerel/entities.py +0 -55
  56. arekit/contrib/source/nerel/folding/__init__.py +0 -0
  57. arekit/contrib/source/nerel/folding/fixed.py +0 -74
  58. arekit/contrib/source/nerel/io_utils.py +0 -62
  59. arekit/contrib/source/nerel/labels.py +0 -241
  60. arekit/contrib/source/nerel/reader.py +0 -46
  61. arekit/contrib/source/nerel/utils.py +0 -24
  62. arekit/contrib/source/nerel/versions.py +0 -12
  63. arekit/contrib/source/nerelbio/__init__.py +0 -0
  64. arekit/contrib/source/nerelbio/io_utils.py +0 -62
  65. arekit/contrib/source/nerelbio/labels.py +0 -265
  66. arekit/contrib/source/nerelbio/reader.py +0 -8
  67. arekit/contrib/source/nerelbio/versions.py +0 -8
  68. arekit/contrib/source/ruattitudes/__init__.py +0 -0
  69. arekit/contrib/source/ruattitudes/collection.py +0 -36
  70. arekit/contrib/source/ruattitudes/doc.py +0 -51
  71. arekit/contrib/source/ruattitudes/doc_brat.py +0 -44
  72. arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
  73. arekit/contrib/source/ruattitudes/entity/parser.py +0 -7
  74. arekit/contrib/source/ruattitudes/io_utils.py +0 -56
  75. arekit/contrib/source/ruattitudes/labels_fmt.py +0 -12
  76. arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
  77. arekit/contrib/source/ruattitudes/opinions/base.py +0 -28
  78. arekit/contrib/source/ruattitudes/opinions/converter.py +0 -37
  79. arekit/contrib/source/ruattitudes/reader.py +0 -268
  80. arekit/contrib/source/ruattitudes/sentence.py +0 -73
  81. arekit/contrib/source/ruattitudes/synonyms.py +0 -17
  82. arekit/contrib/source/ruattitudes/text_object.py +0 -59
  83. arekit/contrib/source/rusentiframes/__init__.py +0 -0
  84. arekit/contrib/source/rusentiframes/collection.py +0 -157
  85. arekit/contrib/source/rusentiframes/effect.py +0 -24
  86. arekit/contrib/source/rusentiframes/io_utils.py +0 -19
  87. arekit/contrib/source/rusentiframes/labels_fmt.py +0 -22
  88. arekit/contrib/source/rusentiframes/polarity.py +0 -35
  89. arekit/contrib/source/rusentiframes/role.py +0 -15
  90. arekit/contrib/source/rusentiframes/state.py +0 -24
  91. arekit/contrib/source/rusentiframes/types.py +0 -42
  92. arekit/contrib/source/rusentiframes/value.py +0 -2
  93. arekit/contrib/source/rusentrel/__init__.py +0 -0
  94. arekit/contrib/source/rusentrel/const.py +0 -3
  95. arekit/contrib/source/rusentrel/docs_reader.py +0 -51
  96. arekit/contrib/source/rusentrel/entities.py +0 -26
  97. arekit/contrib/source/rusentrel/io_utils.py +0 -125
  98. arekit/contrib/source/rusentrel/labels_fmt.py +0 -12
  99. arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
  100. arekit/contrib/source/rusentrel/opinions/collection.py +0 -30
  101. arekit/contrib/source/rusentrel/opinions/converter.py +0 -40
  102. arekit/contrib/source/rusentrel/opinions/provider.py +0 -54
  103. arekit/contrib/source/rusentrel/opinions/writer.py +0 -42
  104. arekit/contrib/source/rusentrel/synonyms.py +0 -17
  105. arekit/contrib/source/sentinerel/__init__.py +0 -0
  106. arekit/contrib/source/sentinerel/entities.py +0 -52
  107. arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
  108. arekit/contrib/source/sentinerel/folding/factory.py +0 -31
  109. arekit/contrib/source/sentinerel/folding/fixed.py +0 -70
  110. arekit/contrib/source/sentinerel/io_utils.py +0 -87
  111. arekit/contrib/source/sentinerel/labels.py +0 -53
  112. arekit/contrib/source/sentinerel/labels_scaler.py +0 -30
  113. arekit/contrib/source/sentinerel/reader.py +0 -42
  114. arekit/contrib/source/synonyms/__init__.py +0 -0
  115. arekit/contrib/source/synonyms/utils.py +0 -19
  116. arekit/contrib/source/zip_utils.py +0 -47
  117. arekit/contrib/utils/connotations/__init__.py +0 -0
  118. arekit/contrib/utils/connotations/rusentiframes_sentiment.py +0 -23
  119. arekit/contrib/utils/download.py +0 -77
  120. arekit/contrib/utils/io_utils/opinions.py +0 -37
  121. arekit/contrib/utils/io_utils/samples.py +0 -79
  122. arekit/contrib/utils/lexicons/__init__.py +0 -0
  123. arekit/contrib/utils/lexicons/lexicon.py +0 -41
  124. arekit/contrib/utils/lexicons/relation.py +0 -42
  125. arekit/contrib/utils/lexicons/rusentilex.py +0 -37
  126. arekit/contrib/utils/nn/__init__.py +0 -0
  127. arekit/contrib/utils/nn/rows.py +0 -83
  128. arekit/contrib/utils/pipelines/items/text/terms_splitter.py +0 -10
  129. arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
  130. arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
  131. arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py +0 -27
  132. arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +0 -65
  133. arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +0 -60
  134. arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
  135. arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py +0 -29
  136. arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +0 -64
  137. arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +0 -79
  138. arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
  139. arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py +0 -56
  140. arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +0 -20
  141. arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +0 -65
  142. arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
  143. arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py +0 -21
  144. arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +0 -107
  145. arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
  146. arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py +0 -29
  147. arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +0 -62
  148. arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +0 -180
  149. arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +0 -50
  150. arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +0 -88
  151. arekit/contrib/utils/resources.py +0 -25
  152. arekit/contrib/utils/sources/__init__.py +0 -0
  153. arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
  154. arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
  155. arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +0 -63
  156. arekit/download_data.py +0 -11
  157. arekit-0.24.0.dist-info/METADATA +0 -23
  158. /arekit/common/{text/partitioning → service}/__init__.py +0 -0
  159. {arekit-0.24.0.dist-info → arekit-0.25.0.dist-info}/LICENSE +0 -0
  160. {arekit-0.24.0.dist-info → arekit-0.25.0.dist-info}/top_level.txt +0 -0
@@ -1,268 +0,0 @@
1
- from arekit.common.utils import split_by_whitespaces
2
- from arekit.contrib.source.ruattitudes.doc import RuAttitudesDocument
3
- from arekit.contrib.source.ruattitudes.opinions.base import SentenceOpinion
4
- from arekit.contrib.source.ruattitudes.sentence import RuAttitudesSentence
5
- from arekit.contrib.source.ruattitudes.text_object import TextObject
6
-
7
-
8
- class RuAttitudesFormatReader(object):
9
-
10
- DOC_SEP_KEY = '--------'
11
- FILE_KEY = "File:"
12
- OBJ_KEY = "Object:"
13
- TITLE_KEY = "Title:"
14
- SINDEX_KEY = "Sentence:"
15
- OPINION_KEY = "Attitude:"
16
- STEXT_KEY = "Text:"
17
- TERMS_IN_TITLE = "TermsInTitle:"
18
- TERMS_IN_TEXT = "TermsInText:"
19
- FRAMEVAR_TITLE = "FrameVariant:"
20
-
21
- AUTH_LABEL = '<AUTH>'
22
-
23
- def __iter__(self):
24
- pass
25
-
26
- # region private methods
27
-
28
- @staticmethod
29
- def iter_docs_inds(input_file, get_doc_index_func):
30
- assert(callable(get_doc_index_func))
31
-
32
- title = None
33
- local_doc_ind = 0
34
- has_sentences = False
35
-
36
- for line in RuAttitudesFormatReader.__iter_lines(input_file):
37
-
38
- if RuAttitudesFormatReader.__check_is_title(line):
39
- # We use a placeholder, there is no need in actual value out there.
40
- title = "title"
41
- has_sentences = True
42
-
43
- if RuAttitudesFormatReader.__check_is_doc_sep(line=line, title=title):
44
- yield RuAttitudesFormatReader.__assign_doc_index(doc_index_func=get_doc_index_func,
45
- local_index=local_doc_ind)
46
- local_doc_ind += 1
47
- title = None
48
-
49
- if has_sentences:
50
- yield RuAttitudesFormatReader.__assign_doc_index(doc_index_func=get_doc_index_func,
51
- local_index=local_doc_ind)
52
-
53
- @staticmethod
54
- def iter_docs(input_file, get_doc_index_func):
55
- assert(callable(get_doc_index_func))
56
-
57
- reset = False
58
- title = None
59
- title_terms_count = None
60
- text_terms_count = None
61
- sentences = []
62
- opinions_list = []
63
- objects_list = []
64
- s_index = 0
65
- objects_in_prior_sentences_count = 0
66
- local_doc_ind = 0
67
-
68
- for line in RuAttitudesFormatReader.__iter_lines(input_file):
69
-
70
- if RuAttitudesFormatReader.FILE_KEY in line:
71
- pass
72
-
73
- if RuAttitudesFormatReader.OBJ_KEY in line:
74
- object = RuAttitudesFormatReader.__parse_object(line)
75
- objects_list.append(object)
76
-
77
- if RuAttitudesFormatReader.OPINION_KEY in line:
78
- sentence_opin = RuAttitudesFormatReader.__parse_sentence_opin(line)
79
- opinions_list.append(sentence_opin)
80
-
81
- if RuAttitudesFormatReader.FRAMEVAR_TITLE in line:
82
- # TODO. This information is ommited now.
83
- pass
84
-
85
- if RuAttitudesFormatReader.TERMS_IN_TITLE in line:
86
- title_terms_count = RuAttitudesFormatReader.__parse_terms_in_title_count(line)
87
-
88
- if RuAttitudesFormatReader.SINDEX_KEY in line:
89
- s_index = RuAttitudesFormatReader.__parse_sentence_index(line)
90
-
91
- if RuAttitudesFormatReader.__check_is_title(line):
92
- title = RuAttitudesSentence(is_title=True,
93
- text=RuAttitudesFormatReader.__parse_sentence(line, True),
94
- sentence_opins=opinions_list,
95
- objects_list=objects_list,
96
- sentence_index=-1)
97
- sentences.append(title)
98
- t_len = RuAttitudesFormatReader.__calculate_terms_in_line(line)
99
- assert(title_terms_count == t_len or title_terms_count is None)
100
- reset = True
101
-
102
- if RuAttitudesFormatReader.STEXT_KEY in line and line.index(RuAttitudesFormatReader.STEXT_KEY) == 0:
103
- sentence = RuAttitudesSentence(is_title=False,
104
- text=RuAttitudesFormatReader.__parse_sentence(line, False),
105
- sentence_opins=opinions_list,
106
- objects_list=objects_list,
107
- sentence_index=s_index)
108
- sentences.append(sentence)
109
- objects_in_prior_sentences_count += len(objects_list)
110
- t_len = RuAttitudesFormatReader.__calculate_terms_in_line(line)
111
- assert(text_terms_count == t_len or text_terms_count is None)
112
- reset = True
113
-
114
- if RuAttitudesFormatReader.__check_is_doc_sep(line=line, title=title):
115
- doc_index = RuAttitudesFormatReader.__assign_doc_index(doc_index_func=get_doc_index_func,
116
- local_index=local_doc_ind)
117
- yield RuAttitudesDocument(sentences=sentences,
118
- doc_index=doc_index)
119
- local_doc_ind += 1
120
- sentences = []
121
- reset = True
122
-
123
- if RuAttitudesFormatReader.TERMS_IN_TEXT in line:
124
- text_terms_count = RuAttitudesFormatReader.__parse_terms_in_text_count(line)
125
-
126
- if reset:
127
- opinions_list = []
128
- objects_list = []
129
- title_terms_count = None
130
- reset = False
131
-
132
- if len(sentences) > 0:
133
- doc_index = RuAttitudesFormatReader.__assign_doc_index(doc_index_func=get_doc_index_func,
134
- local_index=local_doc_ind)
135
- yield RuAttitudesDocument(sentences=sentences,
136
- doc_index=doc_index)
137
- sentences = []
138
-
139
- assert(len(sentences) == 0)
140
-
141
- @staticmethod
142
- def __assign_doc_index(doc_index_func, local_index):
143
- assert(callable(doc_index_func))
144
- return doc_index_func(local_index)
145
-
146
- @staticmethod
147
- def __check_is_doc_sep(line, title):
148
- return RuAttitudesFormatReader.DOC_SEP_KEY in line and title is not None
149
-
150
- @staticmethod
151
- def __check_is_title(line):
152
- return RuAttitudesFormatReader.TITLE_KEY in line
153
-
154
- @staticmethod
155
- def __iter_lines(input_file):
156
- for line in input_file.readlines():
157
- yield line.decode('utf-8')
158
-
159
- @staticmethod
160
- def __calculate_terms_in_line(line):
161
- assert(isinstance(line, str))
162
- return len(split_by_whitespaces(line))
163
-
164
- @staticmethod
165
- def __parse_sentence(line, is_title):
166
- assert(isinstance(is_title, bool))
167
-
168
- key = RuAttitudesFormatReader.STEXT_KEY if not is_title else RuAttitudesFormatReader.TITLE_KEY
169
- text = line[len(key):]
170
- return text.strip()
171
-
172
- @staticmethod
173
- def __parse_sentence_opin(line):
174
- line = line[len(RuAttitudesFormatReader.OPINION_KEY):]
175
-
176
- s_from = line.index('b:(')
177
- s_to = line.index(')', s_from)
178
- label = int(line[s_from + 3:s_to])
179
-
180
- o_from = line.index('oi:[')
181
- o_to = line.index(']', o_from)
182
- source_object_id_in_sentence, target_object_id_in_sentence = line[o_from + 4:o_to].split(',')
183
-
184
- source_object_id_in_sentence = int(source_object_id_in_sentence)
185
- target_object_id_in_sentence = int(target_object_id_in_sentence)
186
-
187
- s_from = line.index('si:{')
188
- s_to = line.index('}', s_from)
189
- opninion_key = line[s_from+4:s_to]
190
-
191
- sentence_opin = SentenceOpinion(source_id=source_object_id_in_sentence,
192
- target_id=target_object_id_in_sentence,
193
- label_int=label,
194
- tag=opninion_key)
195
-
196
- return sentence_opin
197
-
198
- @staticmethod
199
- def __parse_object(line):
200
- assert(isinstance(line, str))
201
-
202
- line = line[len(RuAttitudesFormatReader.OBJ_KEY):]
203
-
204
- obj_ind_begin = line.index('oi:[', 0)
205
- obj_ind_end = line.index(']', obj_ind_begin + 1)
206
-
207
- o_begin = line.index("'", 0)
208
- o_end = line.index("'", o_begin + 1)
209
-
210
- b_from = line.index('b:(')
211
- b_to = line.index(')', b_from)
212
-
213
- id_in_sentence = int(line[obj_ind_begin + 4:obj_ind_end])
214
- term_index, length = line[b_from+3:b_to].split(',')
215
- value = line[o_begin + 1:o_end]
216
-
217
- obj_type = RuAttitudesFormatReader.__try_get_type(line)
218
-
219
- sg_from = line.index('si:{')
220
- sg_to = line.index('}', sg_from)
221
- group_index = int(line[sg_from+4:sg_to])
222
-
223
- is_auth = '<AUTH>' in line
224
-
225
- text_object = TextObject(id_in_sentence=id_in_sentence,
226
- value=value,
227
- obj_type=obj_type,
228
- position=int(term_index),
229
- terms_count=int(length),
230
- syn_group_index=group_index,
231
- is_auth=is_auth)
232
-
233
- return text_object
234
-
235
- @staticmethod
236
- def __parse_terms_in_title_count(line):
237
- line = line[len(RuAttitudesFormatReader.TERMS_IN_TITLE):]
238
- return int(line)
239
-
240
- @staticmethod
241
- def __parse_terms_in_text_count(line):
242
- line = line[len(RuAttitudesFormatReader.TERMS_IN_TEXT):]
243
- return int(line)
244
-
245
- @staticmethod
246
- def __parse_sentence_index(line):
247
- line = line[len(RuAttitudesFormatReader.SINDEX_KEY):]
248
- return int(line)
249
-
250
- @staticmethod
251
- def __try_get_type(line):
252
-
253
- # Tag, utilized in RuAttitudes-2.0 format.
254
- template = 'type:'
255
- if template in line:
256
- is_auth = RuAttitudesFormatReader.AUTH_LABEL in line
257
- t_from = line.index(template)
258
- t_to = line.index(RuAttitudesFormatReader.AUTH_LABEL[0], t_from) if is_auth else len(line)
259
- return line[t_from + len(template):t_to].strip()
260
-
261
- # Tag, utilized in RuAttitudes-1.* format.
262
- template = 't:['
263
- if template in line:
264
- t_from = line.index(template)
265
- t_to = line.index(']', t_from)
266
- return line[t_from + len(template):t_to].strip()
267
-
268
- # endregion
@@ -1,73 +0,0 @@
1
- from arekit.common.docs.sentence import BaseDocumentSentence
2
- from arekit.contrib.source.ruattitudes.opinions.base import SentenceOpinion
3
-
4
-
5
- class RuAttitudesSentence(BaseDocumentSentence):
6
-
7
- def __init__(self, is_title, text, sentence_opins, objects_list, sentence_index):
8
- assert(isinstance(is_title, bool))
9
- assert(isinstance(sentence_opins, list))
10
- assert(isinstance(objects_list, list))
11
- assert(isinstance(sentence_index, int))
12
- super(RuAttitudesSentence, self).__init__(text)
13
-
14
- self.__is_title = is_title
15
- self.__sentence_opins = sentence_opins
16
- self.__objects = objects_list
17
- self.__sentence_index = sentence_index
18
- self.__owner = None
19
-
20
- # region properties
21
-
22
- @property
23
- def SentenceIndex(self):
24
- return self.__sentence_index
25
-
26
- @property
27
- def IsTitle(self):
28
- return self.__is_title
29
-
30
- @property
31
- def Owner(self):
32
- return self.__owner
33
-
34
- @property
35
- def ObjectsCount(self):
36
- return len(self.__objects)
37
-
38
- # endregion
39
-
40
- # region public methods
41
-
42
- def set_owner(self, owner):
43
- if self.__owner is not None:
44
- raise Exception("Owner is already declared")
45
- self.__owner = owner
46
-
47
- def get_objects(self, sentence_opin):
48
- assert(isinstance(sentence_opin, SentenceOpinion))
49
- source_obj = self.__objects[sentence_opin.SourceID]
50
- target_obj = self.__objects[sentence_opin.TargetID]
51
- return source_obj, target_obj
52
-
53
- def get_doc_level_text_object_id(self, text_object_ind):
54
- return text_object_ind + self.__owner.get_objects_declared_before(self.SentenceIndex)
55
-
56
- def iter_objects(self):
57
- for object in self.__objects:
58
- yield object
59
-
60
- def find_sentence_opin_by_key(self, key):
61
- assert(key is not None)
62
-
63
- for opinion in self.__sentence_opins:
64
- if opinion.Tag == key:
65
- return opinion
66
-
67
- return None
68
-
69
- def iter_sentence_opins(self):
70
- for opinion in self.__sentence_opins:
71
- yield opinion
72
-
73
- # endregion
@@ -1,17 +0,0 @@
1
- from arekit.contrib.source.ruattitudes.io_utils import RuAttitudesIOUtils
2
- from arekit.contrib.source.synonyms.utils import iter_synonym_groups
3
-
4
-
5
- class RuAttitudesSynonymsCollectionHelper(object):
6
-
7
- @staticmethod
8
- def iter_groups(version):
9
- it = RuAttitudesIOUtils.iter_from_zip(
10
- inner_path=RuAttitudesIOUtils.get_synonyms_innerpath(),
11
- process_func=lambda input_file: iter_synonym_groups(
12
- input_file,
13
- desc="Loading RuAttitudes SynonymsCollection"),
14
- version=version)
15
-
16
- for group in it:
17
- yield group
@@ -1,59 +0,0 @@
1
- from arekit.common.bound import Bound
2
- from arekit.contrib.source.brat.entities.entity import BratEntity
3
-
4
-
5
- class TextObject(object):
6
- """
7
- Considering any part of text, labeled by 'position', and 'type'
8
- The latter is used to emphasize the entity type.
9
- """
10
-
11
- def __init__(self, id_in_sentence, value, obj_type, position, terms_count, syn_group_index, is_auth):
12
- assert(isinstance(id_in_sentence, int))
13
- assert(isinstance(value, str))
14
- assert(isinstance(position, int))
15
- assert(isinstance(terms_count, int) and terms_count > 0)
16
- assert(isinstance(obj_type, str) or obj_type is None)
17
- assert(isinstance(syn_group_index, int))
18
- assert(isinstance(is_auth, bool))
19
- self.__value = value
20
- self.__type = obj_type
21
- self.__id_in_sentence = id_in_sentence
22
- self.__syn_group_index = syn_group_index
23
- self.__is_auth = is_auth
24
- self.__bound = Bound(pos=position, length=terms_count)
25
-
26
- def to_entity(self, to_doc_id_func):
27
- assert(callable(to_doc_id_func))
28
- return BratEntity(id_in_doc=to_doc_id_func(self.__id_in_sentence),
29
- value=self.__value if len(self.__value) > 0 else '[empty]',
30
- e_type=self.__type,
31
- index_begin=self.__bound.Position,
32
- index_end=self.__bound.Position + self.__bound.Length,
33
- group_index=self.__syn_group_index,
34
- # In the case of RuAttitudes collection we do not support childs.
35
- childs=None)
36
-
37
- # region properties
38
-
39
- @property
40
- def Value(self):
41
- return self.__value
42
-
43
- @property
44
- def Type(self):
45
- return self.__type
46
-
47
- @property
48
- def IdInSentence(self):
49
- return self.__id_in_sentence
50
-
51
- @property
52
- def Bound(self):
53
- return self.__bound
54
-
55
- @property
56
- def IsAuthorized(self):
57
- return self.__is_auth
58
-
59
- # endregion
File without changes
@@ -1,157 +0,0 @@
1
- import json
2
-
3
- from arekit.common.labels.str_fmt import StringLabelsFormatter
4
- from arekit.contrib.source.rusentiframes.effect import FrameEffect
5
- from arekit.contrib.source.rusentiframes.io_utils import RuSentiFramesIOUtils
6
- from arekit.contrib.source.rusentiframes.types import RuSentiFramesVersions
7
- from arekit.contrib.source.rusentiframes.labels_fmt import RuSentiFramesLabelsFormatter, \
8
- RuSentiFramesEffectLabelsFormatter
9
- from arekit.contrib.source.rusentiframes.polarity import RuSentiFramesFramePolarity
10
- from arekit.contrib.source.rusentiframes.role import FrameRole
11
- from arekit.contrib.source.rusentiframes.state import FrameState
12
-
13
-
14
- class RuSentiFramesCollection(object):
15
-
16
- __frames_key = "frames"
17
- __polarity_key = "polarity"
18
- __state_key = "state"
19
- __effect_key = "effect"
20
- __variants_key = "variants"
21
-
22
- def __init__(self, data, labels_fmt, effect_labels_fmt, lowercase_variants=True):
23
- """ data: dict
24
- Has the following structure of the frame contents:
25
- {
26
- "frame_id": [ ... variants string list ... ]
27
- ...
28
- }
29
- lowercase_variants: bool
30
- If 'True', forcely treat frame-variants as case-insensitive (lowercased)
31
- or avoiding lowercasing operation in case of 'False'.
32
- """
33
- assert(isinstance(data, dict))
34
- assert(isinstance(labels_fmt, StringLabelsFormatter))
35
- assert(isinstance(effect_labels_fmt, StringLabelsFormatter))
36
- self.__labels_fmt = labels_fmt
37
- self.__effect_labels_fmt = effect_labels_fmt
38
- self.__data = data
39
-
40
- if lowercase_variants:
41
- for frame_id, frame in self.__data.items():
42
- frame[self.__variants_key] = [variant.lower() for variant in frame[self.__variants_key]]
43
-
44
- # region classmethods
45
-
46
- @classmethod
47
- def read(cls, version, labels_fmt, effect_labels_fmt):
48
- assert(isinstance(version, RuSentiFramesVersions))
49
- assert(isinstance(labels_fmt, RuSentiFramesLabelsFormatter))
50
- assert(isinstance(effect_labels_fmt, RuSentiFramesEffectLabelsFormatter))
51
-
52
- return RuSentiFramesIOUtils.read_from_zip(
53
- inner_path=RuSentiFramesIOUtils.get_collection_filepath(),
54
- process_func=lambda input_file: cls.__from_json(
55
- input_file=input_file,
56
- labels_fmt=labels_fmt,
57
- effect_labels_fmt=effect_labels_fmt),
58
- version=version)
59
-
60
- @classmethod
61
- def __from_json(cls, input_file, labels_fmt, effect_labels_fmt):
62
- data = json.load(input_file)
63
- return cls(data=data,
64
- labels_fmt=labels_fmt,
65
- effect_labels_fmt=effect_labels_fmt)
66
-
67
- # endregion
68
-
69
- # region public 'try get' methods
70
-
71
- def try_get_frame_polarity(self, frame_id, role_src, role_dest):
72
- assert(isinstance(role_src, str))
73
- assert(isinstance(role_dest, str))
74
-
75
- if not self.__check_has_frame_polarity_key(frame_id):
76
- return None
77
-
78
- for args in self.__data[frame_id][self.__frames_key][self.__polarity_key]:
79
- if args[0] == role_src and args[1] == role_dest:
80
- return self.__frame_polarity_from_args(args)
81
- return None
82
-
83
- # endregion
84
-
85
- # region public 'get' methods
86
-
87
- def get_frame_roles(self, frame_id):
88
- assert(isinstance(frame_id, str))
89
- return [FrameRole(source=key, description=value)
90
- for key, value in self.__data[frame_id]["roles"].items()]
91
-
92
- def get_frame_polarities(self, frame_id):
93
- assert(isinstance(frame_id, str))
94
-
95
- if not self.__check_has_frame_polarity_key(frame_id):
96
- return []
97
-
98
- return [self.__frame_polarity_from_args(args)
99
- for args in self.__data[frame_id][self.__frames_key][self.__polarity_key]]
100
-
101
- def get_frame_states(self, frame_id):
102
- assert(isinstance(frame_id, str))
103
-
104
- if self.__state_key not in self.__data[frame_id][self.__frames_key]:
105
- return []
106
-
107
- return [FrameState(role=args[0], label=self.__labels_fmt.str_to_label(args[1]), prob=args[2])
108
- for args in self.__data[frame_id][self.__frames_key][self.__state_key]]
109
-
110
- def get_frame_titles(self, frame_id):
111
- assert(isinstance(frame_id, str))
112
- return self.__data[frame_id]["title"]
113
-
114
- def get_frame_variants(self, frame_id):
115
- return self.__data[frame_id][self.__variants_key]
116
-
117
- def get_frame_values(self, frame_id):
118
- assert(isinstance(frame_id, str))
119
- # TODO. Not implemented yet.
120
- pass
121
-
122
- def get_frame_effects(self, frame_id):
123
- assert(isinstance(frame_id, str))
124
-
125
- if self.__effect_key not in self.__data[frame_id][self.__frames_key]:
126
- return []
127
-
128
- return [FrameEffect(role=args[0], label=self.__effect_labels_fmt.str_to_label(args[1]), prob=args[2])
129
- for args in self.__data[frame_id][self.__frames_key][self.__effect_key]]
130
-
131
- # endregion
132
-
133
- # region public 'iter' methods
134
-
135
- def iter_frames_ids(self):
136
- for frame_id in self.__data.keys():
137
- yield frame_id
138
-
139
- def iter_frame_id_and_variants(self):
140
- for id, frame in self.__data.items():
141
- for variant in frame[self.__variants_key]:
142
- yield id, variant
143
-
144
- # endregion
145
-
146
- # region private methods
147
-
148
- def __check_has_frame_polarity_key(self, frame_id):
149
- return self.__polarity_key in self.__data[frame_id][self.__frames_key]
150
-
151
- def __frame_polarity_from_args(self, args):
152
- return RuSentiFramesFramePolarity(role_src=args[0],
153
- role_dest=args[1],
154
- label=self.__labels_fmt.str_to_label(args[2]),
155
- prob=args[3])
156
-
157
- # endregion
@@ -1,24 +0,0 @@
1
- from arekit.common.labels.base import Label
2
-
3
-
4
- class FrameEffect(object):
5
-
6
- def __init__(self, role, label, prob):
7
- assert(isinstance(role, str))
8
- assert(isinstance(label, Label))
9
- assert(isinstance(prob, float))
10
- self.__role = role
11
- self.__label = label
12
- self.__prob = prob
13
-
14
- @property
15
- def Role(self):
16
- return self.__role
17
-
18
- @property
19
- def Label(self):
20
- return self.__label
21
-
22
- @property
23
- def Prob(self):
24
- return self.__prob
@@ -1,19 +0,0 @@
1
- from os import path
2
-
3
- from arekit.contrib.source.zip_utils import ZipArchiveUtils
4
-
5
-
6
- class RuSentiFramesIOUtils(ZipArchiveUtils):
7
-
8
- # region internal methods
9
-
10
- @staticmethod
11
- def get_archive_filepath(version):
12
- assert(isinstance(version, str))
13
- return path.join(RuSentiFramesIOUtils.get_data_root(), "rusentiframes-{version}.zip".format(version=version))
14
-
15
- @staticmethod
16
- def get_collection_filepath():
17
- return "frames.json"
18
-
19
- # endregion
@@ -1,22 +0,0 @@
1
- from arekit.common.labels.base import Label
2
- from arekit.common.labels.str_fmt import StringLabelsFormatter
3
-
4
-
5
- class RuSentiFramesLabelsFormatter(StringLabelsFormatter):
6
-
7
- def __init__(self, pos_label_type, neg_label_type):
8
- assert(issubclass(pos_label_type, Label))
9
- assert(issubclass(neg_label_type, Label))
10
- stol = {'neg': neg_label_type, 'pos': pos_label_type}
11
- super(RuSentiFramesLabelsFormatter, self).__init__(stol=stol)
12
-
13
-
14
- class RuSentiFramesEffectLabelsFormatter(StringLabelsFormatter):
15
- """ Effect formatter utilizes '-' and '+' signs.
16
- """
17
-
18
- def __init__(self, pos_label_type, neg_label_type):
19
- assert(issubclass(pos_label_type, Label))
20
- assert(issubclass(neg_label_type, Label))
21
- stol = {'-': neg_label_type, '+': pos_label_type}
22
- super(RuSentiFramesEffectLabelsFormatter, self).__init__(stol=stol)
@@ -1,35 +0,0 @@
1
- from arekit.common.frames.connotations.descriptor import FrameConnotationDescriptor
2
- from arekit.common.labels.base import Label
3
-
4
-
5
- class RuSentiFramesFramePolarity(FrameConnotationDescriptor):
6
- """
7
- Polarity description between source (Agent) towards dest (Theme)
8
- The latter are related to roles of frame polarity.
9
- """
10
-
11
- def __init__(self, role_src, role_dest, label, prob):
12
- assert(isinstance(role_src, str))
13
- assert(isinstance(role_dest, str))
14
- assert(isinstance(label, Label))
15
- assert(isinstance(prob, float))
16
- self.__role_src = role_src
17
- self.__role_dest = role_dest
18
- self.__label = label
19
- self.__prob = prob
20
-
21
- @property
22
- def Source(self):
23
- return self.__role_src
24
-
25
- @property
26
- def Destination(self):
27
- return self.__role_dest
28
-
29
- @property
30
- def Label(self):
31
- return self.__label
32
-
33
- @property
34
- def Prob(self):
35
- return self.__prob