arekit 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. arekit/common/context/terms_mapper.py +2 -2
  2. arekit/common/data/const.py +5 -4
  3. arekit/common/{experiment/api/ops_doc.py → data/doc_provider.py} +1 -1
  4. arekit/common/data/input/providers/columns/sample.py +6 -1
  5. arekit/common/data/input/providers/instances/base.py +1 -1
  6. arekit/common/data/input/providers/rows/base.py +36 -13
  7. arekit/common/data/input/providers/rows/samples.py +57 -55
  8. arekit/common/data/input/providers/sample/cropped.py +2 -2
  9. arekit/common/data/input/sample.py +1 -1
  10. arekit/common/data/rows_fmt.py +82 -0
  11. arekit/common/data/rows_parser.py +43 -0
  12. arekit/common/data/storages/base.py +23 -18
  13. arekit/common/data/views/samples.py +2 -8
  14. arekit/common/{news → docs}/base.py +2 -2
  15. arekit/common/{news → docs}/entities_grouping.py +2 -1
  16. arekit/common/{news → docs}/entity.py +2 -1
  17. arekit/common/{news → docs}/parsed/base.py +5 -5
  18. arekit/common/docs/parsed/providers/base.py +68 -0
  19. arekit/common/{news → docs}/parsed/providers/base_pairs.py +2 -2
  20. arekit/common/{news → docs}/parsed/providers/entity_service.py +27 -22
  21. arekit/common/{news → docs}/parsed/providers/opinion_pairs.py +2 -2
  22. arekit/common/{news → docs}/parsed/providers/text_opinion_pairs.py +6 -6
  23. arekit/common/docs/parsed/service.py +31 -0
  24. arekit/common/docs/parser.py +66 -0
  25. arekit/common/{news → docs}/sentence.py +1 -1
  26. arekit/common/entities/base.py +11 -2
  27. arekit/common/experiment/api/base_samples_io.py +1 -1
  28. arekit/common/frames/variants/collection.py +2 -2
  29. arekit/common/linkage/base.py +2 -2
  30. arekit/common/linkage/meta.py +23 -0
  31. arekit/common/linkage/opinions.py +1 -1
  32. arekit/common/linkage/text_opinions.py +2 -2
  33. arekit/common/opinions/annot/algo/base.py +1 -1
  34. arekit/common/opinions/annot/algo/pair_based.py +15 -13
  35. arekit/common/opinions/annot/algo/predefined.py +4 -4
  36. arekit/common/opinions/annot/algo_based.py +5 -5
  37. arekit/common/opinions/annot/base.py +3 -3
  38. arekit/common/opinions/base.py +7 -7
  39. arekit/common/opinions/collection.py +3 -3
  40. arekit/common/pipeline/base.py +12 -16
  41. arekit/common/pipeline/batching.py +28 -0
  42. arekit/common/pipeline/context.py +5 -1
  43. arekit/common/pipeline/items/base.py +38 -1
  44. arekit/common/pipeline/items/flatten.py +5 -1
  45. arekit/common/pipeline/items/handle.py +2 -1
  46. arekit/common/pipeline/items/iter.py +2 -1
  47. arekit/common/pipeline/items/map.py +2 -1
  48. arekit/common/pipeline/items/map_nested.py +4 -0
  49. arekit/common/pipeline/utils.py +32 -0
  50. arekit/common/service/sqlite.py +36 -0
  51. arekit/common/synonyms/base.py +2 -2
  52. arekit/common/text/{partitioning/str.py → partitioning.py} +16 -11
  53. arekit/common/text_opinions/base.py +11 -11
  54. arekit/common/utils.py +33 -46
  55. arekit/contrib/networks/embedding.py +3 -3
  56. arekit/contrib/networks/embedding_io.py +5 -5
  57. arekit/contrib/networks/input/const.py +0 -2
  58. arekit/contrib/networks/input/providers/sample.py +15 -29
  59. arekit/contrib/networks/input/rows_parser.py +47 -134
  60. arekit/contrib/prompt/sample.py +18 -16
  61. arekit/contrib/utils/data/contents/opinions.py +17 -5
  62. arekit/contrib/utils/data/doc_provider/dict_based.py +13 -0
  63. arekit/contrib/utils/data/{doc_ops → doc_provider}/dir_based.py +7 -7
  64. arekit/contrib/utils/data/readers/base.py +3 -0
  65. arekit/contrib/utils/data/readers/csv_pd.py +10 -4
  66. arekit/contrib/utils/data/readers/jsonl.py +3 -0
  67. arekit/contrib/utils/data/readers/sqlite.py +14 -0
  68. arekit/contrib/utils/data/service/balance.py +0 -1
  69. arekit/contrib/utils/data/storages/pandas_based.py +3 -5
  70. arekit/contrib/utils/data/storages/row_cache.py +18 -6
  71. arekit/contrib/utils/data/storages/sqlite_based.py +17 -0
  72. arekit/contrib/utils/data/writers/base.py +5 -0
  73. arekit/contrib/utils/data/writers/csv_native.py +3 -0
  74. arekit/contrib/utils/data/writers/csv_pd.py +3 -0
  75. arekit/contrib/utils/data/writers/json_opennre.py +31 -13
  76. arekit/contrib/utils/data/writers/sqlite_native.py +114 -0
  77. arekit/contrib/utils/io_utils/embedding.py +25 -33
  78. arekit/contrib/utils/io_utils/utils.py +3 -24
  79. arekit/contrib/utils/pipelines/items/sampling/base.py +31 -26
  80. arekit/contrib/utils/pipelines/items/sampling/networks.py +7 -10
  81. arekit/contrib/utils/pipelines/items/text/entities_default.py +2 -2
  82. arekit/contrib/utils/pipelines/items/text/frames.py +2 -3
  83. arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +3 -3
  84. arekit/contrib/utils/pipelines/items/text/frames_negation.py +2 -1
  85. arekit/contrib/utils/pipelines/items/text/tokenizer.py +3 -5
  86. arekit/contrib/utils/pipelines/items/text/translator.py +136 -0
  87. arekit/contrib/utils/pipelines/opinion_collections.py +5 -5
  88. arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +7 -7
  89. arekit/contrib/utils/pipelines/text_opinion/extraction.py +34 -22
  90. arekit/contrib/utils/pipelines/text_opinion/filters/base.py +1 -1
  91. arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py +1 -1
  92. arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py +3 -3
  93. arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py +4 -4
  94. arekit/contrib/utils/serializer.py +4 -23
  95. arekit-0.25.0.data/data/logo.png +0 -0
  96. arekit-0.25.0.dist-info/METADATA +82 -0
  97. arekit-0.25.0.dist-info/RECORD +259 -0
  98. {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/WHEEL +1 -1
  99. arekit/common/data/row_ids/base.py +0 -79
  100. arekit/common/data/row_ids/binary.py +0 -38
  101. arekit/common/data/row_ids/multiple.py +0 -14
  102. arekit/common/folding/base.py +0 -36
  103. arekit/common/folding/fixed.py +0 -42
  104. arekit/common/folding/nofold.py +0 -15
  105. arekit/common/folding/united.py +0 -46
  106. arekit/common/news/objects_parser.py +0 -37
  107. arekit/common/news/parsed/providers/base.py +0 -48
  108. arekit/common/news/parsed/service.py +0 -31
  109. arekit/common/news/parser.py +0 -34
  110. arekit/common/text/parser.py +0 -12
  111. arekit/common/text/partitioning/__init__.py +0 -0
  112. arekit/common/text/partitioning/base.py +0 -4
  113. arekit/common/text/partitioning/terms.py +0 -35
  114. arekit/contrib/source/__init__.py +0 -0
  115. arekit/contrib/source/brat/__init__.py +0 -0
  116. arekit/contrib/source/brat/annot.py +0 -83
  117. arekit/contrib/source/brat/entities/__init__.py +0 -0
  118. arekit/contrib/source/brat/entities/compound.py +0 -33
  119. arekit/contrib/source/brat/entities/entity.py +0 -42
  120. arekit/contrib/source/brat/entities/parser.py +0 -53
  121. arekit/contrib/source/brat/news.py +0 -28
  122. arekit/contrib/source/brat/opinions/__init__.py +0 -0
  123. arekit/contrib/source/brat/opinions/converter.py +0 -19
  124. arekit/contrib/source/brat/relation.py +0 -32
  125. arekit/contrib/source/brat/sentence.py +0 -69
  126. arekit/contrib/source/brat/sentences_reader.py +0 -128
  127. arekit/contrib/source/download.py +0 -41
  128. arekit/contrib/source/nerel/__init__.py +0 -0
  129. arekit/contrib/source/nerel/entities.py +0 -55
  130. arekit/contrib/source/nerel/folding/__init__.py +0 -0
  131. arekit/contrib/source/nerel/folding/fixed.py +0 -75
  132. arekit/contrib/source/nerel/io_utils.py +0 -62
  133. arekit/contrib/source/nerel/labels.py +0 -241
  134. arekit/contrib/source/nerel/reader.py +0 -46
  135. arekit/contrib/source/nerel/utils.py +0 -24
  136. arekit/contrib/source/nerel/versions.py +0 -12
  137. arekit/contrib/source/nerelbio/__init__.py +0 -0
  138. arekit/contrib/source/nerelbio/io_utils.py +0 -62
  139. arekit/contrib/source/nerelbio/labels.py +0 -265
  140. arekit/contrib/source/nerelbio/reader.py +0 -8
  141. arekit/contrib/source/nerelbio/versions.py +0 -8
  142. arekit/contrib/source/ruattitudes/__init__.py +0 -0
  143. arekit/contrib/source/ruattitudes/collection.py +0 -36
  144. arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
  145. arekit/contrib/source/ruattitudes/entity/parser.py +0 -7
  146. arekit/contrib/source/ruattitudes/io_utils.py +0 -56
  147. arekit/contrib/source/ruattitudes/labels_fmt.py +0 -12
  148. arekit/contrib/source/ruattitudes/news.py +0 -51
  149. arekit/contrib/source/ruattitudes/news_brat.py +0 -44
  150. arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
  151. arekit/contrib/source/ruattitudes/opinions/base.py +0 -28
  152. arekit/contrib/source/ruattitudes/opinions/converter.py +0 -37
  153. arekit/contrib/source/ruattitudes/reader.py +0 -268
  154. arekit/contrib/source/ruattitudes/sentence.py +0 -73
  155. arekit/contrib/source/ruattitudes/synonyms.py +0 -17
  156. arekit/contrib/source/ruattitudes/text_object.py +0 -57
  157. arekit/contrib/source/rusentiframes/__init__.py +0 -0
  158. arekit/contrib/source/rusentiframes/collection.py +0 -157
  159. arekit/contrib/source/rusentiframes/effect.py +0 -24
  160. arekit/contrib/source/rusentiframes/io_utils.py +0 -19
  161. arekit/contrib/source/rusentiframes/labels_fmt.py +0 -22
  162. arekit/contrib/source/rusentiframes/polarity.py +0 -35
  163. arekit/contrib/source/rusentiframes/role.py +0 -15
  164. arekit/contrib/source/rusentiframes/state.py +0 -24
  165. arekit/contrib/source/rusentiframes/types.py +0 -42
  166. arekit/contrib/source/rusentiframes/value.py +0 -2
  167. arekit/contrib/source/rusentrel/__init__.py +0 -0
  168. arekit/contrib/source/rusentrel/const.py +0 -3
  169. arekit/contrib/source/rusentrel/entities.py +0 -26
  170. arekit/contrib/source/rusentrel/io_utils.py +0 -125
  171. arekit/contrib/source/rusentrel/labels_fmt.py +0 -12
  172. arekit/contrib/source/rusentrel/news_reader.py +0 -51
  173. arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
  174. arekit/contrib/source/rusentrel/opinions/collection.py +0 -30
  175. arekit/contrib/source/rusentrel/opinions/converter.py +0 -40
  176. arekit/contrib/source/rusentrel/opinions/provider.py +0 -54
  177. arekit/contrib/source/rusentrel/opinions/writer.py +0 -42
  178. arekit/contrib/source/rusentrel/synonyms.py +0 -17
  179. arekit/contrib/source/sentinerel/__init__.py +0 -0
  180. arekit/contrib/source/sentinerel/entities.py +0 -52
  181. arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
  182. arekit/contrib/source/sentinerel/folding/factory.py +0 -32
  183. arekit/contrib/source/sentinerel/folding/fixed.py +0 -73
  184. arekit/contrib/source/sentinerel/io_utils.py +0 -87
  185. arekit/contrib/source/sentinerel/labels.py +0 -53
  186. arekit/contrib/source/sentinerel/labels_scaler.py +0 -30
  187. arekit/contrib/source/sentinerel/reader.py +0 -42
  188. arekit/contrib/source/synonyms/__init__.py +0 -0
  189. arekit/contrib/source/synonyms/utils.py +0 -19
  190. arekit/contrib/source/zip_utils.py +0 -47
  191. arekit/contrib/utils/bert/rows.py +0 -0
  192. arekit/contrib/utils/bert/text_b_rus.py +0 -18
  193. arekit/contrib/utils/connotations/__init__.py +0 -0
  194. arekit/contrib/utils/connotations/rusentiframes_sentiment.py +0 -23
  195. arekit/contrib/utils/cv/__init__.py +0 -0
  196. arekit/contrib/utils/cv/doc_stat/__init__.py +0 -0
  197. arekit/contrib/utils/cv/doc_stat/base.py +0 -37
  198. arekit/contrib/utils/cv/doc_stat/sentence.py +0 -12
  199. arekit/contrib/utils/cv/splitters/__init__.py +0 -0
  200. arekit/contrib/utils/cv/splitters/base.py +0 -4
  201. arekit/contrib/utils/cv/splitters/default.py +0 -53
  202. arekit/contrib/utils/cv/splitters/statistical.py +0 -57
  203. arekit/contrib/utils/cv/two_class.py +0 -77
  204. arekit/contrib/utils/data/doc_ops/__init__.py +0 -0
  205. arekit/contrib/utils/data/doc_ops/dict_based.py +0 -13
  206. arekit/contrib/utils/data/ext.py +0 -31
  207. arekit/contrib/utils/data/views/__init__.py +0 -0
  208. arekit/contrib/utils/data/views/linkages/__init__.py +0 -0
  209. arekit/contrib/utils/data/views/linkages/base.py +0 -58
  210. arekit/contrib/utils/data/views/linkages/multilabel.py +0 -48
  211. arekit/contrib/utils/data/views/linkages/utils.py +0 -24
  212. arekit/contrib/utils/data/views/opinions.py +0 -14
  213. arekit/contrib/utils/download.py +0 -78
  214. arekit/contrib/utils/entities/formatters/str_rus_cased_fmt.py +0 -78
  215. arekit/contrib/utils/entities/formatters/str_rus_nocased_fmt.py +0 -15
  216. arekit/contrib/utils/entities/formatters/str_simple_fmt.py +0 -24
  217. arekit/contrib/utils/entities/formatters/str_simple_uppercase_fmt.py +0 -21
  218. arekit/contrib/utils/io_utils/opinions.py +0 -39
  219. arekit/contrib/utils/io_utils/samples.py +0 -78
  220. arekit/contrib/utils/lexicons/__init__.py +0 -0
  221. arekit/contrib/utils/lexicons/lexicon.py +0 -43
  222. arekit/contrib/utils/lexicons/relation.py +0 -45
  223. arekit/contrib/utils/lexicons/rusentilex.py +0 -34
  224. arekit/contrib/utils/nn/__init__.py +0 -0
  225. arekit/contrib/utils/nn/rows.py +0 -83
  226. arekit/contrib/utils/pipelines/items/sampling/bert.py +0 -5
  227. arekit/contrib/utils/pipelines/items/text/terms_splitter.py +0 -10
  228. arekit/contrib/utils/pipelines/items/to_output.py +0 -101
  229. arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
  230. arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
  231. arekit/contrib/utils/pipelines/sources/nerel/doc_ops.py +0 -27
  232. arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +0 -59
  233. arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +0 -60
  234. arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
  235. arekit/contrib/utils/pipelines/sources/nerel_bio/doc_ops.py +0 -29
  236. arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +0 -59
  237. arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +0 -79
  238. arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
  239. arekit/contrib/utils/pipelines/sources/ruattitudes/doc_ops.py +0 -56
  240. arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +0 -19
  241. arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +0 -58
  242. arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
  243. arekit/contrib/utils/pipelines/sources/rusentrel/doc_ops.py +0 -21
  244. arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +0 -100
  245. arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
  246. arekit/contrib/utils/pipelines/sources/sentinerel/doc_ops.py +0 -29
  247. arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +0 -62
  248. arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +0 -175
  249. arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +0 -50
  250. arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +0 -88
  251. arekit/contrib/utils/resources.py +0 -26
  252. arekit/contrib/utils/sources/__init__.py +0 -0
  253. arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
  254. arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
  255. arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +0 -63
  256. arekit/contrib/utils/utils_folding.py +0 -19
  257. arekit/download_data.py +0 -11
  258. arekit-0.23.1.dist-info/METADATA +0 -23
  259. arekit-0.23.1.dist-info/RECORD +0 -403
  260. /arekit/common/{data/row_ids → docs}/__init__.py +0 -0
  261. /arekit/common/{folding → docs/parsed}/__init__.py +0 -0
  262. /arekit/common/{news → docs/parsed/providers}/__init__.py +0 -0
  263. /arekit/common/{news → docs}/parsed/term_position.py +0 -0
  264. /arekit/common/{news/parsed → service}/__init__.py +0 -0
  265. /arekit/{common/news/parsed/providers → contrib/utils/data/doc_provider}/__init__.py +0 -0
  266. {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/LICENSE +0 -0
  267. {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/top_level.txt +0 -0
@@ -1,157 +0,0 @@
1
- import json
2
-
3
- from arekit.common.labels.str_fmt import StringLabelsFormatter
4
- from arekit.contrib.source.rusentiframes.effect import FrameEffect
5
- from arekit.contrib.source.rusentiframes.io_utils import RuSentiFramesIOUtils
6
- from arekit.contrib.source.rusentiframes.types import RuSentiFramesVersions
7
- from arekit.contrib.source.rusentiframes.labels_fmt import RuSentiFramesLabelsFormatter, \
8
- RuSentiFramesEffectLabelsFormatter
9
- from arekit.contrib.source.rusentiframes.polarity import RuSentiFramesFramePolarity
10
- from arekit.contrib.source.rusentiframes.role import FrameRole
11
- from arekit.contrib.source.rusentiframes.state import FrameState
12
-
13
-
14
- class RuSentiFramesCollection(object):
15
-
16
- __frames_key = "frames"
17
- __polarity_key = "polarity"
18
- __state_key = "state"
19
- __effect_key = "effect"
20
- __variants_key = "variants"
21
-
22
- def __init__(self, data, labels_fmt, effect_labels_fmt, lowercase_variants=True):
23
- """ data: dict
24
- Has the following structure of the frame contents:
25
- {
26
- "frame_id": [ ... variants string list ... ]
27
- ...
28
- }
29
- lowercase_variants: bool
30
- If 'True', forcely treat frame-variants as case-insensitive (lowercased)
31
- or avoiding lowercasing operation in case of 'False'.
32
- """
33
- assert(isinstance(data, dict))
34
- assert(isinstance(labels_fmt, StringLabelsFormatter))
35
- assert(isinstance(effect_labels_fmt, StringLabelsFormatter))
36
- self.__labels_fmt = labels_fmt
37
- self.__effect_labels_fmt = effect_labels_fmt
38
- self.__data = data
39
-
40
- if lowercase_variants:
41
- for frame_id, frame in self.__data.items():
42
- frame[self.__variants_key] = [variant.lower() for variant in frame[self.__variants_key]]
43
-
44
- # region classmethods
45
-
46
- @classmethod
47
- def read(cls, version, labels_fmt, effect_labels_fmt):
48
- assert(isinstance(version, RuSentiFramesVersions))
49
- assert(isinstance(labels_fmt, RuSentiFramesLabelsFormatter))
50
- assert(isinstance(effect_labels_fmt, RuSentiFramesEffectLabelsFormatter))
51
-
52
- return RuSentiFramesIOUtils.read_from_zip(
53
- inner_path=RuSentiFramesIOUtils.get_collection_filepath(),
54
- process_func=lambda input_file: cls.__from_json(
55
- input_file=input_file,
56
- labels_fmt=labels_fmt,
57
- effect_labels_fmt=effect_labels_fmt),
58
- version=version)
59
-
60
- @classmethod
61
- def __from_json(cls, input_file, labels_fmt, effect_labels_fmt):
62
- data = json.load(input_file)
63
- return cls(data=data,
64
- labels_fmt=labels_fmt,
65
- effect_labels_fmt=effect_labels_fmt)
66
-
67
- # endregion
68
-
69
- # region public 'try get' methods
70
-
71
- def try_get_frame_polarity(self, frame_id, role_src, role_dest):
72
- assert(isinstance(role_src, str))
73
- assert(isinstance(role_dest, str))
74
-
75
- if not self.__check_has_frame_polarity_key(frame_id):
76
- return None
77
-
78
- for args in self.__data[frame_id][self.__frames_key][self.__polarity_key]:
79
- if args[0] == role_src and args[1] == role_dest:
80
- return self.__frame_polarity_from_args(args)
81
- return None
82
-
83
- # endregion
84
-
85
- # region public 'get' methods
86
-
87
- def get_frame_roles(self, frame_id):
88
- assert(isinstance(frame_id, str))
89
- return [FrameRole(source=key, description=value)
90
- for key, value in self.__data[frame_id]["roles"].items()]
91
-
92
- def get_frame_polarities(self, frame_id):
93
- assert(isinstance(frame_id, str))
94
-
95
- if not self.__check_has_frame_polarity_key(frame_id):
96
- return []
97
-
98
- return [self.__frame_polarity_from_args(args)
99
- for args in self.__data[frame_id][self.__frames_key][self.__polarity_key]]
100
-
101
- def get_frame_states(self, frame_id):
102
- assert(isinstance(frame_id, str))
103
-
104
- if self.__state_key not in self.__data[frame_id][self.__frames_key]:
105
- return []
106
-
107
- return [FrameState(role=args[0], label=self.__labels_fmt.str_to_label(args[1]), prob=args[2])
108
- for args in self.__data[frame_id][self.__frames_key][self.__state_key]]
109
-
110
- def get_frame_titles(self, frame_id):
111
- assert(isinstance(frame_id, str))
112
- return self.__data[frame_id]["title"]
113
-
114
- def get_frame_variants(self, frame_id):
115
- return self.__data[frame_id][self.__variants_key]
116
-
117
- def get_frame_values(self, frame_id):
118
- assert(isinstance(frame_id, str))
119
- # TODO. Not implemented yet.
120
- pass
121
-
122
- def get_frame_effects(self, frame_id):
123
- assert(isinstance(frame_id, str))
124
-
125
- if self.__effect_key not in self.__data[frame_id][self.__frames_key]:
126
- return []
127
-
128
- return [FrameEffect(role=args[0], label=self.__effect_labels_fmt.str_to_label(args[1]), prob=args[2])
129
- for args in self.__data[frame_id][self.__frames_key][self.__effect_key]]
130
-
131
- # endregion
132
-
133
- # region public 'iter' methods
134
-
135
- def iter_frames_ids(self):
136
- for frame_id in self.__data.keys():
137
- yield frame_id
138
-
139
- def iter_frame_id_and_variants(self):
140
- for id, frame in self.__data.items():
141
- for variant in frame[self.__variants_key]:
142
- yield id, variant
143
-
144
- # endregion
145
-
146
- # region private methods
147
-
148
- def __check_has_frame_polarity_key(self, frame_id):
149
- return self.__polarity_key in self.__data[frame_id][self.__frames_key]
150
-
151
- def __frame_polarity_from_args(self, args):
152
- return RuSentiFramesFramePolarity(role_src=args[0],
153
- role_dest=args[1],
154
- label=self.__labels_fmt.str_to_label(args[2]),
155
- prob=args[3])
156
-
157
- # endregion
@@ -1,24 +0,0 @@
1
- from arekit.common.labels.base import Label
2
-
3
-
4
- class FrameEffect(object):
5
-
6
- def __init__(self, role, label, prob):
7
- assert(isinstance(role, str))
8
- assert(isinstance(label, Label))
9
- assert(isinstance(prob, float))
10
- self.__role = role
11
- self.__label = label
12
- self.__prob = prob
13
-
14
- @property
15
- def Role(self):
16
- return self.__role
17
-
18
- @property
19
- def Label(self):
20
- return self.__label
21
-
22
- @property
23
- def Prob(self):
24
- return self.__prob
@@ -1,19 +0,0 @@
1
- from os import path
2
-
3
- from arekit.contrib.source.zip_utils import ZipArchiveUtils
4
-
5
-
6
- class RuSentiFramesIOUtils(ZipArchiveUtils):
7
-
8
- # region internal methods
9
-
10
- @staticmethod
11
- def get_archive_filepath(version):
12
- assert(isinstance(version, str))
13
- return path.join(RuSentiFramesIOUtils.get_data_root(), "rusentiframes-{version}.zip".format(version=version))
14
-
15
- @staticmethod
16
- def get_collection_filepath():
17
- return "frames.json"
18
-
19
- # endregion
@@ -1,22 +0,0 @@
1
- from arekit.common.labels.base import Label
2
- from arekit.common.labels.str_fmt import StringLabelsFormatter
3
-
4
-
5
- class RuSentiFramesLabelsFormatter(StringLabelsFormatter):
6
-
7
- def __init__(self, pos_label_type, neg_label_type):
8
- assert(issubclass(pos_label_type, Label))
9
- assert(issubclass(neg_label_type, Label))
10
- stol = {'neg': neg_label_type, 'pos': pos_label_type}
11
- super(RuSentiFramesLabelsFormatter, self).__init__(stol=stol)
12
-
13
-
14
- class RuSentiFramesEffectLabelsFormatter(StringLabelsFormatter):
15
- """ Effect formatter utilizes '-' and '+' signs.
16
- """
17
-
18
- def __init__(self, pos_label_type, neg_label_type):
19
- assert(issubclass(pos_label_type, Label))
20
- assert(issubclass(neg_label_type, Label))
21
- stol = {'-': neg_label_type, '+': pos_label_type}
22
- super(RuSentiFramesEffectLabelsFormatter, self).__init__(stol=stol)
@@ -1,35 +0,0 @@
1
- from arekit.common.frames.connotations.descriptor import FrameConnotationDescriptor
2
- from arekit.common.labels.base import Label
3
-
4
-
5
- class RuSentiFramesFramePolarity(FrameConnotationDescriptor):
6
- """
7
- Polarity description between source (Agent) towards dest (Theme)
8
- The latter are related to roles of frame polarity.
9
- """
10
-
11
- def __init__(self, role_src, role_dest, label, prob):
12
- assert(isinstance(role_src, str))
13
- assert(isinstance(role_dest, str))
14
- assert(isinstance(label, Label))
15
- assert(isinstance(prob, float))
16
- self.__role_src = role_src
17
- self.__role_dest = role_dest
18
- self.__label = label
19
- self.__prob = prob
20
-
21
- @property
22
- def Source(self):
23
- return self.__role_src
24
-
25
- @property
26
- def Destination(self):
27
- return self.__role_dest
28
-
29
- @property
30
- def Label(self):
31
- return self.__label
32
-
33
- @property
34
- def Prob(self):
35
- return self.__prob
@@ -1,15 +0,0 @@
1
- class FrameRole(object):
2
-
3
- def __init__(self, source, description):
4
- assert(isinstance(source, str))
5
- assert(isinstance(description, str))
6
- self.__source = source
7
- self.__description = description
8
-
9
- @property
10
- def Source(self):
11
- return self.__source
12
-
13
- @property
14
- def Description(self):
15
- return self.__description
@@ -1,24 +0,0 @@
1
- from arekit.common.labels.base import Label
2
-
3
-
4
- class FrameState(object):
5
-
6
- def __init__(self, role, label, prob):
7
- assert(isinstance(role, str))
8
- assert(isinstance(label, Label))
9
- assert(isinstance(prob, float))
10
- self.__role = role
11
- self.__label = label
12
- self.__prob = prob
13
-
14
- @property
15
- def Role(self):
16
- return self.__role
17
-
18
- @property
19
- def Label(self):
20
- return self.__label
21
-
22
- @property
23
- def Prob(self):
24
- return self.__prob
@@ -1,42 +0,0 @@
1
- from enum import Enum
2
-
3
-
4
- class RuSentiFramesVersions(Enum):
5
-
6
- # Papers for description:
7
- # Distant Supervision for Sentiment Attitude Extraction (RANLP-2019)
8
- # Nicolay Rusnachenko, Natalia Loukachevitch, Elena Tutubalina
9
- # https://www.aclweb.org/anthology/R19-1118/
10
- # https://github.com/nicolay-r/RuSentiFrames/tree/v1.0
11
- V10 = "v1_0"
12
-
13
- # Papers for description:
14
- # Sentiment Frames for Attitude Extraction in Russian (DIALOG-2020)
15
- # Natalia Loukachevitch, Nicolay Rusnachenko
16
- # https://github.com/nicolay-r/RuSentiFrames/tree/v2.0
17
- V20 = "v2_0"
18
-
19
-
20
- class RuSentiFramesVersionsService:
21
-
22
- @staticmethod
23
- def __iter_supported_types():
24
- return iter(RuSentiFramesVersions)
25
-
26
- @staticmethod
27
- def get_name_by_type(version_type):
28
- assert(isinstance(version_type, RuSentiFramesVersions))
29
- return version_type.value
30
-
31
- @staticmethod
32
- def get_type_by_name(name):
33
- for version_type in RuSentiFramesVersionsService.__iter_supported_types():
34
- if version_type.value == name:
35
- return version_type
36
-
37
- raise Exception("RuSentiFrames version by name `{}` was hot found!".format(name))
38
-
39
- @staticmethod
40
- def iter_supported_names():
41
- for version_type in RuSentiFramesVersionsService.__iter_supported_types():
42
- yield version_type.value
@@ -1,2 +0,0 @@
1
- class FrameValue(object):
2
- pass
File without changes
@@ -1,3 +0,0 @@
1
- # Defaul label formattings.
2
- POS_LABEL_STR = "pos"
3
- NEG_LABEL_STR = "neg"
@@ -1,26 +0,0 @@
1
- from arekit.common.entities.collection import EntityCollection
2
- from arekit.common.synonyms.base import SynonymsCollection
3
- from arekit.contrib.source.brat.annot import BratAnnotationParser
4
- from arekit.contrib.source.rusentrel.io_utils import RuSentRelVersions, RuSentRelIOUtils
5
-
6
-
7
- class RuSentRelDocumentEntityCollection(EntityCollection):
8
-
9
- def __init__(self, entities, value_to_group_id_func):
10
- super(RuSentRelDocumentEntityCollection, self).__init__(
11
- entities=entities,
12
- value_to_group_id_func=value_to_group_id_func)
13
-
14
- self._sort_entities(key=lambda entity: entity.IndexBegin)
15
-
16
- @classmethod
17
- def read_collection(cls, doc_id, synonyms, version=RuSentRelVersions.V11):
18
- assert (isinstance(synonyms, SynonymsCollection))
19
- assert (isinstance(doc_id, int))
20
-
21
- return RuSentRelIOUtils.read_from_zip(
22
- inner_path=RuSentRelIOUtils.get_entity_innerpath(index=doc_id, version=version),
23
- process_func=lambda input_file: cls(
24
- entities=BratAnnotationParser.parse_annotations(input_file)["entities"],
25
- value_to_group_id_func=synonyms.get_synonym_group_index),
26
- version=version)
@@ -1,125 +0,0 @@
1
- from os import path
2
-
3
- from enum import Enum
4
-
5
- from arekit.contrib.source.zip_utils import ZipArchiveUtils
6
-
7
-
8
- class RuSentRelVersions(Enum):
9
- """ Original collection repository: https://github.com/nicolay-r/RuSentRel
10
- Paper: https://arxiv.org/abs/1808.08932
11
- """
12
- V11 = "v1_1"
13
-
14
-
15
- class RuSentRelIOUtils(ZipArchiveUtils):
16
-
17
- TEST_FOLDER = "test"
18
- TRAIN_FOLDER = "train"
19
- ETALON_FOLDER = "etalon"
20
-
21
- @staticmethod
22
- def get_archive_filepath(version):
23
- assert(version, str)
24
- return path.join(RuSentRelIOUtils.get_data_root(), "rusentrel-{}.zip".format(version))
25
-
26
- # region internal methods
27
-
28
- @staticmethod
29
- def get_sentiment_opin_filepath(index, version, prefix='art'):
30
- root = RuSentRelIOUtils.__get_root_by_index(index, version=version, keep_etalon=True)
31
- return path.join(root, "{prefix}{index}.opin.txt".format(prefix=prefix, index=index))
32
-
33
- @staticmethod
34
- def get_entity_innerpath(index, version):
35
- assert(isinstance(index, int))
36
- assert(isinstance(version, RuSentRelVersions))
37
- inner_root = RuSentRelIOUtils.__get_root_by_index(doc_id=index, version=version)
38
- return path.join(inner_root, "art{}.ann".format(index))
39
-
40
- @staticmethod
41
- def get_news_innerpath(index, version):
42
- assert(isinstance(index, int))
43
- assert(isinstance(version, RuSentRelVersions))
44
- inner_root = RuSentRelIOUtils.__get_root_by_index(doc_id=index, version=version)
45
- return path.join(inner_root, "art{}.txt".format(index))
46
-
47
- @staticmethod
48
- def get_synonyms_innerpath():
49
- return "synonyms.txt"
50
-
51
- # endregion
52
-
53
- @staticmethod
54
- def __get_root_by_index(doc_id, version, keep_etalon=False):
55
- assert(RuSentRelIOUtils.__is_supported(version))
56
- assert(isinstance(version, RuSentRelVersions))
57
- assert(isinstance(doc_id, int))
58
- other_dir = RuSentRelIOUtils.ETALON_FOLDER if keep_etalon else RuSentRelIOUtils.TEST_FOLDER
59
- test_indices = set(RuSentRelIOUtils.__iter_indicies_from_dataset(version, RuSentRelIOUtils.TEST_FOLDER))
60
- return other_dir if doc_id in test_indices else RuSentRelIOUtils.TRAIN_FOLDER
61
-
62
- @staticmethod
63
- def __is_supported(version):
64
- assert(isinstance(version, RuSentRelVersions))
65
- return version == RuSentRelVersions.V11
66
-
67
- @staticmethod
68
- def __number_from_string(s):
69
- digit_chars = [chr for chr in s if chr.isdigit()]
70
-
71
- if len(digit_chars) == 0:
72
- return None
73
-
74
- return int("".join(digit_chars))
75
-
76
- @staticmethod
77
- def __iter_indicies_from_dataset(version, folder_name):
78
- assert(isinstance(folder_name, str))
79
- assert(RuSentRelIOUtils.__is_supported(version))
80
-
81
- used = set()
82
-
83
- for filename in RuSentRelIOUtils.iter_filenames_from_zip(version):
84
- if not folder_name in filename:
85
- continue
86
-
87
- index = RuSentRelIOUtils.__number_from_string(filename)
88
-
89
- if index is None:
90
- continue
91
-
92
- if index in used:
93
- continue
94
-
95
- used.add(index)
96
-
97
- yield index
98
-
99
- # region public methods
100
-
101
- @staticmethod
102
- def iter_test_indices(version):
103
- assert(RuSentRelIOUtils.__is_supported(version))
104
- indices_iter = RuSentRelIOUtils.__iter_indicies_from_dataset(
105
- version=version, folder_name="{}/".format(RuSentRelIOUtils.TEST_FOLDER))
106
- for index in indices_iter:
107
- yield index
108
-
109
- @staticmethod
110
- def iter_train_indices(version):
111
- assert(RuSentRelIOUtils.__is_supported(version))
112
- indices_iter = RuSentRelIOUtils.__iter_indicies_from_dataset(
113
- version=version, folder_name="{}/".format(RuSentRelIOUtils.TRAIN_FOLDER))
114
- for index in indices_iter:
115
- yield index
116
-
117
- @staticmethod
118
- def iter_collection_indices(version):
119
- assert(RuSentRelIOUtils.__is_supported(version))
120
- for index in RuSentRelIOUtils.iter_train_indices(version):
121
- yield index
122
- for index in RuSentRelIOUtils.iter_test_indices(version):
123
- yield index
124
-
125
- # endregion
@@ -1,12 +0,0 @@
1
- from arekit.common.labels.base import Label
2
- from arekit.common.labels.str_fmt import StringLabelsFormatter
3
- from arekit.contrib.source.rusentrel.const import NEG_LABEL_STR, POS_LABEL_STR
4
-
5
-
6
- class RuSentRelLabelsFormatter(StringLabelsFormatter):
7
-
8
- def __init__(self, pos_label_type, neg_label_type):
9
- assert(issubclass(pos_label_type, Label))
10
- assert(issubclass(neg_label_type, Label))
11
- stol = {NEG_LABEL_STR: neg_label_type, POS_LABEL_STR: pos_label_type}
12
- super(RuSentRelLabelsFormatter, self).__init__(stol=stol)
@@ -1,51 +0,0 @@
1
- from arekit.common.synonyms.base import SynonymsCollection
2
- from arekit.contrib.source.brat.news import BratNews
3
- from arekit.contrib.source.brat.sentences_reader import BratDocumentSentencesReader
4
- from arekit.contrib.source.rusentrel.entities import RuSentRelDocumentEntityCollection
5
- from arekit.contrib.source.rusentrel.io_utils import RuSentRelVersions, RuSentRelIOUtils
6
-
7
-
8
- class RuSentRelNewsReader(object):
9
-
10
- # region class methods
11
-
12
- @staticmethod
13
- def hide_first_entry(line, entry, hide_with=" "):
14
-
15
- index = line.find(entry)
16
-
17
- if index >= 0:
18
- pad = hide_with * len(entry)
19
- before = line[0:index]
20
- after = line[index+len(entry):]
21
- line = "".join([before, pad, after])
22
-
23
- return line
24
-
25
- @staticmethod
26
- def read_document(doc_id, synonyms, version=RuSentRelVersions.V11, target_doc_id=None):
27
- assert(isinstance(synonyms, SynonymsCollection))
28
- assert(isinstance(version, RuSentRelVersions))
29
- assert(isinstance(target_doc_id, int) or target_doc_id is None)
30
-
31
- def file_to_doc(input_file):
32
-
33
- sentences = BratDocumentSentencesReader.from_file(
34
- input_file=input_file,
35
- entities=entities,
36
- line_handler=lambda line: RuSentRelNewsReader.hide_first_entry(line, entry="{Author, Unknown}"),
37
- skip_entity_func=lambda entity: entity.Value in ['author', 'unknown'])
38
-
39
- return BratNews(doc_id=target_doc_id if target_doc_id is not None else doc_id,
40
- sentences=sentences,
41
- text_relations=[])
42
-
43
- entities = RuSentRelDocumentEntityCollection.read_collection(
44
- doc_id=doc_id,
45
- synonyms=synonyms,
46
- version=version)
47
-
48
- return RuSentRelIOUtils.read_from_zip(
49
- inner_path=RuSentRelIOUtils.get_news_innerpath(index=doc_id, version=version),
50
- process_func=file_to_doc,
51
- version=version)
File without changes
@@ -1,30 +0,0 @@
1
- from arekit.contrib.source.rusentrel.const import POS_LABEL_STR, NEG_LABEL_STR
2
- from arekit.contrib.source.rusentrel.io_utils import RuSentRelIOUtils, RuSentRelVersions
3
- from arekit.contrib.source.rusentrel.labels_fmt import RuSentRelLabelsFormatter
4
- from arekit.contrib.source.rusentrel.opinions.provider import RuSentRelOpinionCollectionProvider
5
-
6
-
7
- class RuSentRelOpinions:
8
- """
9
- Collection of sentiment opinions between entities
10
- """
11
-
12
- @staticmethod
13
- def iter_from_doc(doc_id, labels_fmt, version=RuSentRelVersions.V11):
14
- """ doc_id:
15
- synonyms: None or SynonymsCollection
16
- None corresponds to the related synonym collection from RuSentRel collection.
17
- version: RuSentrelVersions enum
18
- """
19
- assert(isinstance(version, RuSentRelVersions))
20
- assert(isinstance(labels_fmt, RuSentRelLabelsFormatter))
21
- assert(labels_fmt.supports_value(POS_LABEL_STR))
22
- assert(labels_fmt.supports_value(NEG_LABEL_STR))
23
-
24
- return RuSentRelIOUtils.iter_from_zip(
25
- inner_path=RuSentRelIOUtils.get_sentiment_opin_filepath(index=doc_id, version=version),
26
- process_func=lambda input_file: RuSentRelOpinionCollectionProvider._iter_opinions_from_file(
27
- input_file=input_file,
28
- labels_formatter=labels_fmt,
29
- error_on_non_supported=True),
30
- version=version)
@@ -1,40 +0,0 @@
1
- from arekit.common.labels.str_fmt import StringLabelsFormatter
2
- from arekit.common.opinions.base import Opinion
3
-
4
-
5
- class OpinionConverter(object):
6
- """ Opinion type <-> string Converter.
7
- """
8
-
9
- @staticmethod
10
- def try_from_string(line, labels_formatter):
11
- assert(isinstance(line, str))
12
-
13
- args = line.strip().split(',')
14
- assert (len(args) >= 3)
15
-
16
- source_value = args[0].strip()
17
- target_value = args[1].strip()
18
- str_label = args[2].strip()
19
-
20
- if not labels_formatter.supports_value(str_label):
21
- return None
22
-
23
- return Opinion(source_value=source_value,
24
- target_value=target_value,
25
- sentiment=labels_formatter.str_to_label(str_label))
26
-
27
- @staticmethod
28
- def try_to_string(opinion, labels_formatter):
29
- assert(isinstance(opinion, Opinion))
30
- assert(isinstance(labels_formatter, StringLabelsFormatter))
31
-
32
- label = opinion.Sentiment
33
-
34
- if not labels_formatter.supports_label(label):
35
- return None
36
-
37
- return "{}, {}, {}, current".format(
38
- opinion.SourceValue,
39
- opinion.TargetValue,
40
- labels_formatter.label_to_str(opinion.Sentiment))