arekit 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. arekit/common/context/terms_mapper.py +2 -2
  2. arekit/common/data/const.py +5 -4
  3. arekit/common/{experiment/api/ops_doc.py → data/doc_provider.py} +1 -1
  4. arekit/common/data/input/providers/columns/sample.py +6 -1
  5. arekit/common/data/input/providers/instances/base.py +1 -1
  6. arekit/common/data/input/providers/rows/base.py +36 -13
  7. arekit/common/data/input/providers/rows/samples.py +57 -55
  8. arekit/common/data/input/providers/sample/cropped.py +2 -2
  9. arekit/common/data/input/sample.py +1 -1
  10. arekit/common/data/rows_fmt.py +82 -0
  11. arekit/common/data/rows_parser.py +43 -0
  12. arekit/common/data/storages/base.py +23 -18
  13. arekit/common/data/views/samples.py +2 -8
  14. arekit/common/{news → docs}/base.py +2 -2
  15. arekit/common/{news → docs}/entities_grouping.py +2 -1
  16. arekit/common/{news → docs}/entity.py +2 -1
  17. arekit/common/{news → docs}/parsed/base.py +5 -5
  18. arekit/common/docs/parsed/providers/base.py +68 -0
  19. arekit/common/{news → docs}/parsed/providers/base_pairs.py +2 -2
  20. arekit/common/{news → docs}/parsed/providers/entity_service.py +27 -22
  21. arekit/common/{news → docs}/parsed/providers/opinion_pairs.py +2 -2
  22. arekit/common/{news → docs}/parsed/providers/text_opinion_pairs.py +6 -6
  23. arekit/common/docs/parsed/service.py +31 -0
  24. arekit/common/docs/parser.py +66 -0
  25. arekit/common/{news → docs}/sentence.py +1 -1
  26. arekit/common/entities/base.py +11 -2
  27. arekit/common/experiment/api/base_samples_io.py +1 -1
  28. arekit/common/frames/variants/collection.py +2 -2
  29. arekit/common/linkage/base.py +2 -2
  30. arekit/common/linkage/meta.py +23 -0
  31. arekit/common/linkage/opinions.py +1 -1
  32. arekit/common/linkage/text_opinions.py +2 -2
  33. arekit/common/opinions/annot/algo/base.py +1 -1
  34. arekit/common/opinions/annot/algo/pair_based.py +15 -13
  35. arekit/common/opinions/annot/algo/predefined.py +4 -4
  36. arekit/common/opinions/annot/algo_based.py +5 -5
  37. arekit/common/opinions/annot/base.py +3 -3
  38. arekit/common/opinions/base.py +7 -7
  39. arekit/common/opinions/collection.py +3 -3
  40. arekit/common/pipeline/base.py +12 -16
  41. arekit/common/pipeline/batching.py +28 -0
  42. arekit/common/pipeline/context.py +5 -1
  43. arekit/common/pipeline/items/base.py +38 -1
  44. arekit/common/pipeline/items/flatten.py +5 -1
  45. arekit/common/pipeline/items/handle.py +2 -1
  46. arekit/common/pipeline/items/iter.py +2 -1
  47. arekit/common/pipeline/items/map.py +2 -1
  48. arekit/common/pipeline/items/map_nested.py +4 -0
  49. arekit/common/pipeline/utils.py +32 -0
  50. arekit/common/service/sqlite.py +36 -0
  51. arekit/common/synonyms/base.py +2 -2
  52. arekit/common/text/{partitioning/str.py → partitioning.py} +16 -11
  53. arekit/common/text_opinions/base.py +11 -11
  54. arekit/common/utils.py +33 -46
  55. arekit/contrib/networks/embedding.py +3 -3
  56. arekit/contrib/networks/embedding_io.py +5 -5
  57. arekit/contrib/networks/input/const.py +0 -2
  58. arekit/contrib/networks/input/providers/sample.py +15 -29
  59. arekit/contrib/networks/input/rows_parser.py +47 -134
  60. arekit/contrib/prompt/sample.py +18 -16
  61. arekit/contrib/utils/data/contents/opinions.py +17 -5
  62. arekit/contrib/utils/data/doc_provider/dict_based.py +13 -0
  63. arekit/contrib/utils/data/{doc_ops → doc_provider}/dir_based.py +7 -7
  64. arekit/contrib/utils/data/readers/base.py +3 -0
  65. arekit/contrib/utils/data/readers/csv_pd.py +10 -4
  66. arekit/contrib/utils/data/readers/jsonl.py +3 -0
  67. arekit/contrib/utils/data/readers/sqlite.py +14 -0
  68. arekit/contrib/utils/data/service/balance.py +0 -1
  69. arekit/contrib/utils/data/storages/pandas_based.py +3 -5
  70. arekit/contrib/utils/data/storages/row_cache.py +18 -6
  71. arekit/contrib/utils/data/storages/sqlite_based.py +17 -0
  72. arekit/contrib/utils/data/writers/base.py +5 -0
  73. arekit/contrib/utils/data/writers/csv_native.py +3 -0
  74. arekit/contrib/utils/data/writers/csv_pd.py +3 -0
  75. arekit/contrib/utils/data/writers/json_opennre.py +31 -13
  76. arekit/contrib/utils/data/writers/sqlite_native.py +114 -0
  77. arekit/contrib/utils/io_utils/embedding.py +25 -33
  78. arekit/contrib/utils/io_utils/utils.py +3 -24
  79. arekit/contrib/utils/pipelines/items/sampling/base.py +31 -26
  80. arekit/contrib/utils/pipelines/items/sampling/networks.py +7 -10
  81. arekit/contrib/utils/pipelines/items/text/entities_default.py +2 -2
  82. arekit/contrib/utils/pipelines/items/text/frames.py +2 -3
  83. arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +3 -3
  84. arekit/contrib/utils/pipelines/items/text/frames_negation.py +2 -1
  85. arekit/contrib/utils/pipelines/items/text/tokenizer.py +3 -5
  86. arekit/contrib/utils/pipelines/items/text/translator.py +136 -0
  87. arekit/contrib/utils/pipelines/opinion_collections.py +5 -5
  88. arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +7 -7
  89. arekit/contrib/utils/pipelines/text_opinion/extraction.py +34 -22
  90. arekit/contrib/utils/pipelines/text_opinion/filters/base.py +1 -1
  91. arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py +1 -1
  92. arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py +3 -3
  93. arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py +4 -4
  94. arekit/contrib/utils/serializer.py +4 -23
  95. arekit-0.25.0.data/data/logo.png +0 -0
  96. arekit-0.25.0.dist-info/METADATA +82 -0
  97. arekit-0.25.0.dist-info/RECORD +259 -0
  98. {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/WHEEL +1 -1
  99. arekit/common/data/row_ids/base.py +0 -79
  100. arekit/common/data/row_ids/binary.py +0 -38
  101. arekit/common/data/row_ids/multiple.py +0 -14
  102. arekit/common/folding/base.py +0 -36
  103. arekit/common/folding/fixed.py +0 -42
  104. arekit/common/folding/nofold.py +0 -15
  105. arekit/common/folding/united.py +0 -46
  106. arekit/common/news/objects_parser.py +0 -37
  107. arekit/common/news/parsed/providers/base.py +0 -48
  108. arekit/common/news/parsed/service.py +0 -31
  109. arekit/common/news/parser.py +0 -34
  110. arekit/common/text/parser.py +0 -12
  111. arekit/common/text/partitioning/__init__.py +0 -0
  112. arekit/common/text/partitioning/base.py +0 -4
  113. arekit/common/text/partitioning/terms.py +0 -35
  114. arekit/contrib/source/__init__.py +0 -0
  115. arekit/contrib/source/brat/__init__.py +0 -0
  116. arekit/contrib/source/brat/annot.py +0 -83
  117. arekit/contrib/source/brat/entities/__init__.py +0 -0
  118. arekit/contrib/source/brat/entities/compound.py +0 -33
  119. arekit/contrib/source/brat/entities/entity.py +0 -42
  120. arekit/contrib/source/brat/entities/parser.py +0 -53
  121. arekit/contrib/source/brat/news.py +0 -28
  122. arekit/contrib/source/brat/opinions/__init__.py +0 -0
  123. arekit/contrib/source/brat/opinions/converter.py +0 -19
  124. arekit/contrib/source/brat/relation.py +0 -32
  125. arekit/contrib/source/brat/sentence.py +0 -69
  126. arekit/contrib/source/brat/sentences_reader.py +0 -128
  127. arekit/contrib/source/download.py +0 -41
  128. arekit/contrib/source/nerel/__init__.py +0 -0
  129. arekit/contrib/source/nerel/entities.py +0 -55
  130. arekit/contrib/source/nerel/folding/__init__.py +0 -0
  131. arekit/contrib/source/nerel/folding/fixed.py +0 -75
  132. arekit/contrib/source/nerel/io_utils.py +0 -62
  133. arekit/contrib/source/nerel/labels.py +0 -241
  134. arekit/contrib/source/nerel/reader.py +0 -46
  135. arekit/contrib/source/nerel/utils.py +0 -24
  136. arekit/contrib/source/nerel/versions.py +0 -12
  137. arekit/contrib/source/nerelbio/__init__.py +0 -0
  138. arekit/contrib/source/nerelbio/io_utils.py +0 -62
  139. arekit/contrib/source/nerelbio/labels.py +0 -265
  140. arekit/contrib/source/nerelbio/reader.py +0 -8
  141. arekit/contrib/source/nerelbio/versions.py +0 -8
  142. arekit/contrib/source/ruattitudes/__init__.py +0 -0
  143. arekit/contrib/source/ruattitudes/collection.py +0 -36
  144. arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
  145. arekit/contrib/source/ruattitudes/entity/parser.py +0 -7
  146. arekit/contrib/source/ruattitudes/io_utils.py +0 -56
  147. arekit/contrib/source/ruattitudes/labels_fmt.py +0 -12
  148. arekit/contrib/source/ruattitudes/news.py +0 -51
  149. arekit/contrib/source/ruattitudes/news_brat.py +0 -44
  150. arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
  151. arekit/contrib/source/ruattitudes/opinions/base.py +0 -28
  152. arekit/contrib/source/ruattitudes/opinions/converter.py +0 -37
  153. arekit/contrib/source/ruattitudes/reader.py +0 -268
  154. arekit/contrib/source/ruattitudes/sentence.py +0 -73
  155. arekit/contrib/source/ruattitudes/synonyms.py +0 -17
  156. arekit/contrib/source/ruattitudes/text_object.py +0 -57
  157. arekit/contrib/source/rusentiframes/__init__.py +0 -0
  158. arekit/contrib/source/rusentiframes/collection.py +0 -157
  159. arekit/contrib/source/rusentiframes/effect.py +0 -24
  160. arekit/contrib/source/rusentiframes/io_utils.py +0 -19
  161. arekit/contrib/source/rusentiframes/labels_fmt.py +0 -22
  162. arekit/contrib/source/rusentiframes/polarity.py +0 -35
  163. arekit/contrib/source/rusentiframes/role.py +0 -15
  164. arekit/contrib/source/rusentiframes/state.py +0 -24
  165. arekit/contrib/source/rusentiframes/types.py +0 -42
  166. arekit/contrib/source/rusentiframes/value.py +0 -2
  167. arekit/contrib/source/rusentrel/__init__.py +0 -0
  168. arekit/contrib/source/rusentrel/const.py +0 -3
  169. arekit/contrib/source/rusentrel/entities.py +0 -26
  170. arekit/contrib/source/rusentrel/io_utils.py +0 -125
  171. arekit/contrib/source/rusentrel/labels_fmt.py +0 -12
  172. arekit/contrib/source/rusentrel/news_reader.py +0 -51
  173. arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
  174. arekit/contrib/source/rusentrel/opinions/collection.py +0 -30
  175. arekit/contrib/source/rusentrel/opinions/converter.py +0 -40
  176. arekit/contrib/source/rusentrel/opinions/provider.py +0 -54
  177. arekit/contrib/source/rusentrel/opinions/writer.py +0 -42
  178. arekit/contrib/source/rusentrel/synonyms.py +0 -17
  179. arekit/contrib/source/sentinerel/__init__.py +0 -0
  180. arekit/contrib/source/sentinerel/entities.py +0 -52
  181. arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
  182. arekit/contrib/source/sentinerel/folding/factory.py +0 -32
  183. arekit/contrib/source/sentinerel/folding/fixed.py +0 -73
  184. arekit/contrib/source/sentinerel/io_utils.py +0 -87
  185. arekit/contrib/source/sentinerel/labels.py +0 -53
  186. arekit/contrib/source/sentinerel/labels_scaler.py +0 -30
  187. arekit/contrib/source/sentinerel/reader.py +0 -42
  188. arekit/contrib/source/synonyms/__init__.py +0 -0
  189. arekit/contrib/source/synonyms/utils.py +0 -19
  190. arekit/contrib/source/zip_utils.py +0 -47
  191. arekit/contrib/utils/bert/rows.py +0 -0
  192. arekit/contrib/utils/bert/text_b_rus.py +0 -18
  193. arekit/contrib/utils/connotations/__init__.py +0 -0
  194. arekit/contrib/utils/connotations/rusentiframes_sentiment.py +0 -23
  195. arekit/contrib/utils/cv/__init__.py +0 -0
  196. arekit/contrib/utils/cv/doc_stat/__init__.py +0 -0
  197. arekit/contrib/utils/cv/doc_stat/base.py +0 -37
  198. arekit/contrib/utils/cv/doc_stat/sentence.py +0 -12
  199. arekit/contrib/utils/cv/splitters/__init__.py +0 -0
  200. arekit/contrib/utils/cv/splitters/base.py +0 -4
  201. arekit/contrib/utils/cv/splitters/default.py +0 -53
  202. arekit/contrib/utils/cv/splitters/statistical.py +0 -57
  203. arekit/contrib/utils/cv/two_class.py +0 -77
  204. arekit/contrib/utils/data/doc_ops/__init__.py +0 -0
  205. arekit/contrib/utils/data/doc_ops/dict_based.py +0 -13
  206. arekit/contrib/utils/data/ext.py +0 -31
  207. arekit/contrib/utils/data/views/__init__.py +0 -0
  208. arekit/contrib/utils/data/views/linkages/__init__.py +0 -0
  209. arekit/contrib/utils/data/views/linkages/base.py +0 -58
  210. arekit/contrib/utils/data/views/linkages/multilabel.py +0 -48
  211. arekit/contrib/utils/data/views/linkages/utils.py +0 -24
  212. arekit/contrib/utils/data/views/opinions.py +0 -14
  213. arekit/contrib/utils/download.py +0 -78
  214. arekit/contrib/utils/entities/formatters/str_rus_cased_fmt.py +0 -78
  215. arekit/contrib/utils/entities/formatters/str_rus_nocased_fmt.py +0 -15
  216. arekit/contrib/utils/entities/formatters/str_simple_fmt.py +0 -24
  217. arekit/contrib/utils/entities/formatters/str_simple_uppercase_fmt.py +0 -21
  218. arekit/contrib/utils/io_utils/opinions.py +0 -39
  219. arekit/contrib/utils/io_utils/samples.py +0 -78
  220. arekit/contrib/utils/lexicons/__init__.py +0 -0
  221. arekit/contrib/utils/lexicons/lexicon.py +0 -43
  222. arekit/contrib/utils/lexicons/relation.py +0 -45
  223. arekit/contrib/utils/lexicons/rusentilex.py +0 -34
  224. arekit/contrib/utils/nn/__init__.py +0 -0
  225. arekit/contrib/utils/nn/rows.py +0 -83
  226. arekit/contrib/utils/pipelines/items/sampling/bert.py +0 -5
  227. arekit/contrib/utils/pipelines/items/text/terms_splitter.py +0 -10
  228. arekit/contrib/utils/pipelines/items/to_output.py +0 -101
  229. arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
  230. arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
  231. arekit/contrib/utils/pipelines/sources/nerel/doc_ops.py +0 -27
  232. arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +0 -59
  233. arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +0 -60
  234. arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
  235. arekit/contrib/utils/pipelines/sources/nerel_bio/doc_ops.py +0 -29
  236. arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +0 -59
  237. arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +0 -79
  238. arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
  239. arekit/contrib/utils/pipelines/sources/ruattitudes/doc_ops.py +0 -56
  240. arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +0 -19
  241. arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +0 -58
  242. arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
  243. arekit/contrib/utils/pipelines/sources/rusentrel/doc_ops.py +0 -21
  244. arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +0 -100
  245. arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
  246. arekit/contrib/utils/pipelines/sources/sentinerel/doc_ops.py +0 -29
  247. arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +0 -62
  248. arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +0 -175
  249. arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +0 -50
  250. arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +0 -88
  251. arekit/contrib/utils/resources.py +0 -26
  252. arekit/contrib/utils/sources/__init__.py +0 -0
  253. arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
  254. arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
  255. arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +0 -63
  256. arekit/contrib/utils/utils_folding.py +0 -19
  257. arekit/download_data.py +0 -11
  258. arekit-0.23.1.dist-info/METADATA +0 -23
  259. arekit-0.23.1.dist-info/RECORD +0 -403
  260. /arekit/common/{data/row_ids → docs}/__init__.py +0 -0
  261. /arekit/common/{folding → docs/parsed}/__init__.py +0 -0
  262. /arekit/common/{news → docs/parsed/providers}/__init__.py +0 -0
  263. /arekit/common/{news → docs}/parsed/term_position.py +0 -0
  264. /arekit/common/{news/parsed → service}/__init__.py +0 -0
  265. /arekit/{common/news/parsed/providers → contrib/utils/data/doc_provider}/__init__.py +0 -0
  266. {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/LICENSE +0 -0
  267. {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/top_level.txt +0 -0
@@ -1,79 +0,0 @@
1
- from arekit.common.labels.str_fmt import StringLabelsFormatter
2
- from arekit.contrib.source.nerelbio import labels
3
-
4
-
5
- class NerelBioAnyLabelFormatter(StringLabelsFormatter):
6
-
7
- def __init__(self):
8
-
9
- stol = {
10
- "ABBREVIATION": labels.ABBREVIATION,
11
- "ALTERNATIVE_NAME": labels.ALTERNATIVE_NAME,
12
- "KNOWS": labels.KNOWS,
13
- "AGE_IS": labels.AGE_IS,
14
- "AGE_DIED_AT": labels.AGE_DIED_AT,
15
- "AWARDED_WITH": labels.AWARDED_WITH,
16
- "PLACE_OF_BIRTH": labels.PLACE_OF_BIRTH,
17
- "DATE_DEFUNCT_IN": labels.DATE_DEFUNCT_IN,
18
- "DATE_FOUNDED_IN": labels.DATE_FOUNDED_IN,
19
- "DATE_OF_BIRTH": labels.DATE_OF_BIRTH,
20
- "DATE_OF_CREATION": labels.DATE_OF_CREATION,
21
- "DATE_OF_DEATH": labels.DATE_OF_DEATH,
22
- "POINT_IN_TIME": labels.POINT_IN_TIME,
23
- "PLACE_OF_DEATH": labels.PLACE_OF_DEATH,
24
- "FOUNDED_BY": labels.FOUNDED_BY,
25
- "HEADQUARTERED_IN": labels.HEADQUARTERED_IN,
26
- "IDEOLOGY_OF": labels.IDEOLOGY_OF,
27
- "SPOUSE": labels.SPOUSE,
28
- "MEMBER_OF": labels.MEMBER_OF,
29
- "ORGANIZES": labels.ORGANIZES,
30
- "OWNER_OF": labels.OWNER_OF,
31
- "PARENT_OF": labels.PARENT_OF,
32
- "PARTICIPANT_IN": labels.PARTICIPANT_IN,
33
- "PLACE_RESIDES_IN": labels.PLACE_RESIDES_IN,
34
- "PRICE_OF": labels.PRICE_OF,
35
- "PRODUCES": labels.PRODUCES,
36
- "RELATIVE": labels.RELATIVE,
37
- "RELIGION_OF": labels.RELIGION_OF,
38
- "SCHOOLS_ATTENDED": labels.SCHOOLS_ATTENDED,
39
- "SIBLING": labels.SIBLING,
40
- "SUBEVENT_OF": labels.SUBEVENT_OF,
41
- "SUBORDINATE_OF": labels.SUBORDINATE_OF,
42
- "TAKES_PLACE_IN": labels.TAKES_PLACE_IN,
43
- "WORKPLACE": labels.WORKPLACE,
44
- "WORKS_AS": labels.WORKS_AS,
45
- "CONVICTED_OF": labels.CONVICTED_OF,
46
- "PENALIZED_AS": labels.PENALIZED_AS,
47
- "START_TIME": labels.START_TIME,
48
- "END_TIME": labels.END_TIME,
49
- "EXPENDITURE": labels.EXPENDITURE,
50
- "AGENT": labels.AGENT,
51
- "INANIMATE_INVOLVED": labels.INANIMATE_INVOLVED,
52
- "INCOME": labels.INCOME,
53
- "SUBCLASS_OF": labels.SUBCLASS_OF,
54
- "PART_OF": labels.PART_OF,
55
- "LOCATED_IN": labels.LOCATED_IN,
56
- "TREATED_USING": labels.TREATED_USING,
57
- "ORIGINS_FROM": labels.ORIGINS_FROM,
58
- "TO_DETECT_OR_STUDY": labels.TO_DETECT_OR_STUDY,
59
- "AFFECTS": labels.AFFECTS,
60
- "HAS_CAUSE": labels.HAS_CAUSE,
61
- "APPLIED_TO": labels.APPLIED_TO,
62
- "USED_IN": labels.USED_IN,
63
- "ASSOCIATED_WITH": labels.ASSOCIATED_WITH,
64
- "HAS_ADMINISTRATION_ROUTE": labels.HAS_ADMINISTRATION_ROUTE,
65
- "HAS_STRENGTH": labels.HAS_STRENGTH,
66
- "DURATION_OF": labels.DURATION_OF,
67
- "VALUE_IS": labels.VALUE_IS,
68
- "PHYSIOLOGY_OF": labels.PHYSIOLOGY_OF,
69
- "PROCEDURE_PERFORMED": labels.PROCEDURE_PERFORMED,
70
- "MENTAL_PROCESS_OF": labels.MENTAL_PROCESS_OF,
71
- "MEDICAL_CONDITION": labels.MEDICAL_CONDITION,
72
- "DOSE_IS": labels.DOSE_IS,
73
- "FINDING_OF": labels.FINDING_OF,
74
- "CAUSE_OF_DEATH": labels.CAUSE_OF_DEATH,
75
- "CONSUME": labels.CONSUME,
76
- }
77
-
78
- super(NerelBioAnyLabelFormatter, self).__init__(stol=stol)
79
-
@@ -1,56 +0,0 @@
1
- from arekit.common.utils import progress_bar_iter
2
- from arekit.contrib.source.ruattitudes.collection import RuAttitudesCollection
3
- from arekit.contrib.source.ruattitudes.io_utils import RuAttitudesVersions
4
- from arekit.contrib.source.ruattitudes.news import RuAttitudesNews
5
- from arekit.contrib.source.ruattitudes.news_brat import RuAttitudesNewsConverter
6
- from arekit.contrib.utils.data.doc_ops.dict_based import DictionaryBasedDocumentOperations
7
-
8
-
9
- class RuAttitudesDocumentOperations(DictionaryBasedDocumentOperations):
10
-
11
- def __init__(self, version, keep_doc_ids_only, doc_id_func, limit):
12
- d = self.read_ruattitudes_to_brat_in_memory(version=version,
13
- keep_doc_ids_only=keep_doc_ids_only,
14
- doc_id_func=doc_id_func,
15
- limit=limit)
16
- super(RuAttitudesDocumentOperations, self).__init__(d)
17
-
18
- @staticmethod
19
- def read_ruattitudes_to_brat_in_memory(version, keep_doc_ids_only, doc_id_func, limit=None):
20
- """ Performs reading of RuAttitude formatted documents and
21
- selection according to 'doc_ids_set' parameter.
22
- """
23
- assert (isinstance(version, RuAttitudesVersions))
24
- assert (isinstance(keep_doc_ids_only, bool))
25
- assert (callable(doc_id_func))
26
-
27
- it = RuAttitudesCollection.iter_news(version=version,
28
- get_news_index_func=doc_id_func,
29
- return_inds_only=keep_doc_ids_only)
30
-
31
- it_formatted_and_logged = progress_bar_iter(
32
- iterable=RuAttitudesDocumentOperations.__iter_id_with_news(
33
- docs_it=it, keep_doc_ids_only=keep_doc_ids_only),
34
- desc="Loading RuAttitudes Collection [{}]".format("doc ids only" if keep_doc_ids_only else "fully"),
35
- unit='docs')
36
-
37
- d = {}
38
- docs_read = 0
39
- for doc_id, news in it_formatted_and_logged:
40
- assert(isinstance(news, RuAttitudesNews) or news is None)
41
- d[doc_id] = RuAttitudesNewsConverter.to_brat_news(news) if news is not None else None
42
- docs_read += 1
43
- if limit is not None and docs_read >= limit:
44
- break
45
-
46
- return d
47
-
48
- @staticmethod
49
- def __iter_id_with_news(docs_it, keep_doc_ids_only):
50
- if keep_doc_ids_only:
51
- for doc_id in docs_it:
52
- yield doc_id, None
53
- else:
54
- for news in docs_it:
55
- assert (isinstance(news, RuAttitudesNews))
56
- yield news.ID, news
@@ -1,19 +0,0 @@
1
- from arekit.common.entities.types import OpinionEntityType
2
- from arekit.contrib.utils.entities.filter import EntityFilter
3
-
4
-
5
- class RuAttitudesEntityFilter(EntityFilter):
6
- """ Among all the entities proposed by the OntonotesV5,
7
- we consider only a short list related to sentiment attutde extraction task.
8
- """
9
-
10
- supported = ["GPE", "PERSON", "LOCAL", "GEO", "ORG"]
11
-
12
- def is_ignored(self, entity, e_type):
13
-
14
- if e_type == OpinionEntityType.Subject:
15
- return entity.Type not in RuAttitudesEntityFilter.supported
16
- if e_type == OpinionEntityType.Object:
17
- return entity.Type not in RuAttitudesEntityFilter.supported
18
- else:
19
- return True
@@ -1,58 +0,0 @@
1
- from arekit.common.labels.scaler.base import BaseLabelScaler
2
- from arekit.contrib.source.ruattitudes.io_utils import RuAttitudesVersions
3
- from arekit.contrib.source.ruattitudes.labels_fmt import RuAttitudesLabelFormatter
4
- from arekit.contrib.utils.pipelines.sources.ruattitudes.doc_ops import RuAttitudesDocumentOperations
5
- from arekit.contrib.utils.pipelines.sources.ruattitudes.entity_filter import RuAttitudesEntityFilter
6
- from arekit.contrib.utils.pipelines.text_opinion.annot.predefined import PredefinedTextOpinionAnnotator
7
- from arekit.contrib.utils.pipelines.text_opinion.extraction import text_opinion_extraction_pipeline
8
- from arekit.contrib.utils.pipelines.text_opinion.filters.distance_based import DistanceLimitedTextOpinionFilter
9
- from arekit.contrib.utils.pipelines.text_opinion.filters.entity_based import EntityBasedTextOpinionFilter
10
-
11
-
12
- def create_text_opinion_extraction_pipeline(text_parser,
13
- label_scaler,
14
- version=RuAttitudesVersions.V20Large,
15
- terms_per_context=50,
16
- entity_filter=RuAttitudesEntityFilter(),
17
- limit=None):
18
- """ Processing pipeline for RuAttitudes.
19
- This pipeline is based on the in-memory RuAttitudes storage.
20
-
21
- Original collection paper: www.aclweb.org/anthology/r19-1118/
22
- Github repository: https://github.com/nicolay-r/RuAttitudes
23
-
24
- version: enum
25
- Version of the RuAttitudes collection.
26
- NOTE: we consider to support a variations of the 2.0 versions.
27
- label_scaler:
28
- Scaler that allows to perform conversion from integer labels (RuAttitudes) to
29
- the actual `Label` instances, required in further for text_opinions instances.
30
- terms_per_context: int
31
- Amount of terms that we consider in between the Object and Subject.
32
- entity_filter:
33
- Entity filter
34
- limit: int or None
35
- Limit of documents to consider.
36
- """
37
- assert(isinstance(label_scaler, BaseLabelScaler))
38
- assert(isinstance(version, RuAttitudesVersions))
39
- assert(version in [RuAttitudesVersions.V20Large, RuAttitudesVersions.V20Base,
40
- RuAttitudesVersions.V20BaseNeut, RuAttitudesVersions.V20LargeNeut])
41
-
42
- doc_ops = RuAttitudesDocumentOperations(version=version,
43
- keep_doc_ids_only=False,
44
- doc_id_func=lambda doc_id: doc_id,
45
- limit=limit)
46
-
47
- pipeline = text_opinion_extraction_pipeline(
48
- annotators=[
49
- PredefinedTextOpinionAnnotator(doc_ops=doc_ops, label_formatter=RuAttitudesLabelFormatter(label_scaler))
50
- ],
51
- text_opinion_filters=[
52
- EntityBasedTextOpinionFilter(entity_filter=entity_filter),
53
- DistanceLimitedTextOpinionFilter(terms_per_context)
54
- ],
55
- get_doc_by_id_func=doc_ops.by_id,
56
- text_parser=text_parser)
57
-
58
- return pipeline
@@ -1,21 +0,0 @@
1
- from arekit.common.experiment.api.ops_doc import DocumentOperations
2
- from arekit.common.synonyms.base import SynonymsCollection
3
- from arekit.contrib.source.rusentrel.io_utils import RuSentRelVersions
4
- from arekit.contrib.source.rusentrel.news_reader import RuSentRelNewsReader
5
-
6
-
7
- class RuSentrelDocumentOperations(DocumentOperations):
8
- """ Limitations: Supported only train/test collections format
9
- """
10
-
11
- def __init__(self, version, synonyms):
12
- assert(isinstance(version, RuSentRelVersions))
13
- assert(isinstance(synonyms, SynonymsCollection))
14
- super(RuSentrelDocumentOperations, self).__init__()
15
- self.__version = version
16
- self.__synonyms = synonyms
17
-
18
- def by_id(self, doc_id):
19
- assert (isinstance(doc_id, int))
20
- return RuSentRelNewsReader.read_document(doc_id=doc_id, synonyms=self.__synonyms, version=self.__version)
21
-
@@ -1,100 +0,0 @@
1
- from arekit.common.labels.base import NoLabel
2
- from arekit.common.labels.provider.constant import ConstantLabelProvider
3
- from arekit.common.opinions.annot.algo.pair_based import PairBasedOpinionAnnotationAlgorithm
4
- from arekit.common.opinions.annot.algo.predefined import PredefinedOpinionAnnotationAlgorithm
5
- from arekit.common.opinions.collection import OpinionCollection
6
- from arekit.common.synonyms.grouping import SynonymsCollectionValuesGroupingProviders
7
- from arekit.contrib.source.rusentrel.labels_fmt import RuSentRelLabelsFormatter
8
- from arekit.contrib.source.rusentrel.opinions.collection import RuSentRelOpinions
9
- from arekit.contrib.source.rusentrel.synonyms import RuSentRelSynonymsCollectionHelper
10
- from arekit.contrib.utils.pipelines.sources.rusentrel.doc_ops import RuSentrelDocumentOperations
11
- from arekit.contrib.utils.pipelines.text_opinion.annot.algo_based import AlgorithmBasedTextOpinionAnnotator
12
- from arekit.contrib.utils.pipelines.text_opinion.extraction import text_opinion_extraction_pipeline
13
- from arekit.contrib.utils.pipelines.text_opinion.filters.distance_based import DistanceLimitedTextOpinionFilter
14
- from arekit.contrib.utils.pipelines.text_opinion.filters.entity_based import EntityBasedTextOpinionFilter
15
- from arekit.contrib.utils.processing.lemmatization.mystem import MystemWrapper
16
- from arekit.contrib.utils.synonyms.stemmer_based import StemmerBasedSynonymCollection
17
-
18
-
19
- def create_text_opinion_extraction_pipeline(rusentrel_version,
20
- text_parser,
21
- labels_fmt,
22
- entity_filter=None,
23
- no_label=NoLabel(),
24
- terms_per_context=50,
25
- dist_in_sentences=0):
26
- """ Processing pipeline for RuSentRel, which combines:
27
- - predefined document-level annotation (sentiment labels)
28
- - automatic annotation of opinions between mentioned named entities (no-label)
29
-
30
- Original collection paper: arxiv.org/abs/1808.08932
31
-
32
- version: enum
33
- Version of the RuSentRel collection.
34
- terms_per_context: int
35
- Amount of terms that we consider in between the Object and Subject.
36
- dist_in_sentences: int
37
- considering amount of sentences that could be in between Object and Subject.
38
- """
39
- assert(isinstance(labels_fmt, RuSentRelLabelsFormatter))
40
-
41
- synonyms = StemmerBasedSynonymCollection(
42
- iter_group_values_lists=RuSentRelSynonymsCollectionHelper.iter_groups(rusentrel_version),
43
- stemmer=MystemWrapper(),
44
- is_read_only=False)
45
-
46
- doc_ops = RuSentrelDocumentOperations(version=rusentrel_version, synonyms=synonyms)
47
-
48
- pipeline = text_opinion_extraction_pipeline(
49
- annotators=[
50
- predefined_annotator(synonyms=synonyms, labels_fmt=labels_fmt),
51
- nolabel_annotator(synonyms=synonyms, terms_per_context=terms_per_context,
52
- dist_in_sentences=dist_in_sentences, no_label=no_label)
53
- ],
54
- text_opinion_filters=[
55
- EntityBasedTextOpinionFilter(entity_filter=entity_filter),
56
- DistanceLimitedTextOpinionFilter(terms_per_context)
57
- ],
58
- get_doc_by_id_func=doc_ops.by_id,
59
- text_parser=text_parser)
60
-
61
- return pipeline
62
-
63
-
64
- def nolabel_annotator(synonyms, terms_per_context, dist_in_sentences=0, no_label=NoLabel()):
65
- """ This is a default annotator, utilized to annotate `neutral`-like attitudes.
66
- Neutral means that we adopt no-label parameter, and this label might be customized
67
- to the one required in your studies.
68
- """
69
- return AlgorithmBasedTextOpinionAnnotator(
70
- annot_algo=PairBasedOpinionAnnotationAlgorithm(dist_in_sents=dist_in_sentences,
71
- dist_in_terms_bound=terms_per_context,
72
- label_provider=ConstantLabelProvider(no_label)),
73
- create_empty_collection_func=lambda: OpinionCollection(
74
- synonyms=synonyms, error_on_duplicates=True, error_on_synonym_end_missed=False),
75
- value_to_group_id_func=lambda value:
76
- SynonymsCollectionValuesGroupingProviders.provide_existed_value(synonyms=synonyms, value=value))
77
-
78
-
79
- def predefined_annotator(synonyms, labels_fmt):
80
- """ This is a annotator-converter of the predefined Document-Level opinions onto text-level one
81
- """
82
- return AlgorithmBasedTextOpinionAnnotator(
83
- annot_algo=PredefinedOpinionAnnotationAlgorithm(
84
- lambda doc_id: __get_document_opinions(doc_id=doc_id, synonyms=synonyms, labels_fmt=labels_fmt)),
85
- create_empty_collection_func=lambda: OpinionCollection(
86
- synonyms=synonyms, error_on_duplicates=True, error_on_synonym_end_missed=False),
87
- value_to_group_id_func=lambda value:
88
- SynonymsCollectionValuesGroupingProviders.provide_existed_value(synonyms=synonyms, value=value))
89
-
90
-
91
- def __get_document_opinions(doc_id, synonyms, labels_fmt):
92
- """ RuSentRel provides a pre-defined list of Document-Level Opinions.
93
- Within this function we create the related OpinionCollection by a given doc_id.
94
- """
95
- return OpinionCollection(
96
- opinions=RuSentRelOpinions.iter_from_doc(
97
- doc_id=doc_id, labels_fmt=labels_fmt),
98
- synonyms=synonyms,
99
- error_on_synonym_end_missed=True,
100
- error_on_duplicates=True)
@@ -1,29 +0,0 @@
1
- from arekit.common.experiment.api.ops_doc import DocumentOperations
2
- from arekit.contrib.source.sentinerel.io_utils import SentiNerelVersions
3
- from arekit.contrib.source.sentinerel.reader import SentiNerelDocReader
4
-
5
-
6
- class SentiNERELDocOperation(DocumentOperations):
7
- """ Document reader for the collection of the RuSentNE competition 2023.
8
- For more details please follow the following repository:
9
- github: https://github.com/dialogue-evaluation/RuSentNE-evaluation
10
- """
11
-
12
- def __init__(self, filename_by_id, version):
13
- """ filename_ids: dict
14
- Dictionary of {id: filename}, where
15
- - id: int
16
- - filename: str
17
- version: SentiNerelVersions
18
- Specify the appropriate version of teh SentiNEREL collection.
19
- """
20
- assert(isinstance(filename_by_id, dict))
21
- assert(isinstance(version, SentiNerelVersions))
22
- super(SentiNERELDocOperation, self).__init__()
23
- self.__filename_by_id = filename_by_id
24
- self.__version = version
25
-
26
- def by_id(self, doc_id):
27
- return SentiNerelDocReader.read_document(doc_id=doc_id,
28
- version=self.__version,
29
- filename=self.__filename_by_id[doc_id])
@@ -1,62 +0,0 @@
1
- from arekit.common.entities.base import Entity
2
- from arekit.common.entities.types import OpinionEntityType
3
- from arekit.contrib.utils.entities.filter import EntityFilter
4
-
5
-
6
- class EntityHelper(object):
7
- """ Named Entities formatting in text.
8
- Based on OntoNotes5 collection tags:
9
- https://catalog.ldc.upenn.edu/LDC2013T19
10
- """
11
-
12
- AGE = "AGE"
13
- AWARD = "AWARD"
14
- CITY = "CITY"
15
- COUNTRY = "COUNTRY"
16
- CRIME = "CRIME"
17
- DATE = "DATE"
18
- DISEASE = "DISEASE"
19
- DISTRICT = "DISTRICT"
20
- EVENT = "EVENT"
21
- FACILITY = "FACILITY"
22
- FAMILY = "FAMILY"
23
- IDEOLOGY = "IDEOLOGY"
24
- LANGUAGE = "LANGUAGE"
25
- LAW = "LAW"
26
- LOCATION = "LOCATION"
27
- MONEY = "MONEY"
28
- NATIONALITY = "NATIONALITY"
29
- NUMBER = "NUMBER"
30
- ORDINAL = "ORDINAL"
31
- ORGANIZATION = "ORGANIZATION"
32
- PENALTY = "PENALTY"
33
- PERCENT = "PERCENT"
34
- PERSON = "PERSON"
35
- PRODUCT = "PRODUCT"
36
- PROFESSION = "PROFESSION"
37
- RELIGION = "RELIGION"
38
- STATE_OR_PROVINCE = "STATE_OR_PROVINCE"
39
- TIME = "TIME"
40
- WORK_OF_ART = "WORK_OF_ART"
41
-
42
-
43
- class SentiNerelEntityFilter(EntityFilter):
44
- """ Filter, oriented on sentiment related extraction task
45
- within SentiNEREL dataset.
46
- """
47
-
48
- def is_ignored(self, entity, e_type):
49
- """ Subject and Object could be one of the following object types:
50
- [PERSON, ORGANIZATION, COUNTRY, PROFESSION]
51
- """
52
- assert(isinstance(entity, Entity))
53
- assert(isinstance(e_type, OpinionEntityType))
54
-
55
- supported = [EntityHelper.PERSON, EntityHelper.ORGANIZATION, EntityHelper.COUNTRY, EntityHelper.PROFESSION]
56
-
57
- if e_type == OpinionEntityType.Subject:
58
- return entity.Type not in supported
59
- if e_type == OpinionEntityType.Object:
60
- return entity.Type not in supported
61
- else:
62
- return True
@@ -1,175 +0,0 @@
1
- from arekit.common.experiment.api.ops_doc import DocumentOperations
2
- from arekit.common.experiment.data_type import DataType
3
- from arekit.common.labels.base import NoLabel
4
- from arekit.common.labels.provider.constant import ConstantLabelProvider
5
- from arekit.common.opinions.annot.algo.pair_based import PairBasedOpinionAnnotationAlgorithm
6
- from arekit.common.opinions.collection import OpinionCollection
7
- from arekit.common.synonyms.base import SynonymsCollection
8
- from arekit.common.synonyms.grouping import SynonymsCollectionValuesGroupingProviders
9
- from arekit.contrib.source.sentinerel.io_utils import SentiNerelVersions, SentiNerelIOUtils
10
- from arekit.contrib.utils.pipelines.sources.sentinerel.doc_ops import SentiNERELDocOperation
11
- from arekit.contrib.utils.pipelines.sources.sentinerel.labels_fmt import SentiNERELSentimentLabelFormatter
12
- from arekit.contrib.utils.pipelines.text_opinion.annot.algo_based import AlgorithmBasedTextOpinionAnnotator
13
- from arekit.contrib.utils.pipelines.text_opinion.annot.predefined import PredefinedTextOpinionAnnotator
14
- from arekit.contrib.utils.pipelines.text_opinion.extraction import text_opinion_extraction_pipeline
15
- from arekit.contrib.utils.pipelines.text_opinion.filters.distance_based import DistanceLimitedTextOpinionFilter
16
- from arekit.contrib.utils.pipelines.text_opinion.filters.entity_based import EntityBasedTextOpinionFilter
17
- from arekit.contrib.utils.processing.lemmatization.mystem import MystemWrapper
18
- from arekit.contrib.utils.sources.sentinerel.text_opinion.prof_per_org_filter import \
19
- ProfessionAsCharacteristicSentimentTextOpinionFilter
20
- from arekit.contrib.utils.synonyms.stemmer_based import StemmerBasedSynonymCollection
21
-
22
-
23
- def create_text_opinion_extraction_pipeline(sentinerel_version,
24
- text_parser,
25
- label_formatter=SentiNERELSentimentLabelFormatter(),
26
- no_label=NoLabel(),
27
- terms_per_context=50,
28
- doc_ops=None,
29
- dist_in_sentences=0,
30
- docs_limit=None,
31
- entity_filter=None):
32
- """ This is a main pipeline which generates the samples for a SentiNEREL documents.
33
- SentiNEREL is a collection that becomes a part of the:
34
- 1. Attitude extraction studies (AREkit focused studies):
35
- https://github.com/nicolay-r/SentiNEREL-attitude-extraction
36
- 2. RuSentNE-2023 competitions under CODALAB platform (github page):
37
- https://github.com/dialogue-evaluation/RuSentNE-evaluation
38
-
39
- Parameters:
40
- sentinerel_version: enum
41
- Version of the SentiNEREL collection.
42
- text_parser: Is the way of how do we process the text.
43
- doc_ops: DocumentOperations or None
44
- In case of None we consider the default initialization.
45
- label_formatter:
46
- Formatter for labels which allows to: limit set of labels, and perform its conversion from
47
- string to actual python type.
48
- terms_per_context: int
49
- Amount of terms that we consider in between the Object and Subject.
50
-
51
- Returns: dict, (data_folding) optional
52
- pipelines per every type.
53
- """
54
- assert(isinstance(sentinerel_version, SentiNerelVersions))
55
- assert(isinstance(doc_ops, DocumentOperations) or doc_ops is None)
56
-
57
- data_folding = None
58
-
59
- if doc_ops is None:
60
- # Default Initialization.
61
- filenames_by_ids, data_folding = SentiNerelIOUtils.read_dataset_split(version=sentinerel_version,
62
- docs_limit=docs_limit)
63
- doc_ops = SentiNERELDocOperation(filename_by_id=filenames_by_ids,
64
- version=sentinerel_version)
65
-
66
- train_neut_annot = create_nolabel_text_opinion_annotator(terms_per_context=terms_per_context,
67
- dist_in_sents=dist_in_sentences,
68
- no_label=no_label)
69
- test_neut_annot = create_nolabel_text_opinion_annotator(terms_per_context=terms_per_context,
70
- dist_in_sents=dist_in_sentences,
71
- no_label=no_label)
72
-
73
- text_opinion_filters = [
74
- EntityBasedTextOpinionFilter(entity_filter=entity_filter),
75
- ProfessionAsCharacteristicSentimentTextOpinionFilter(),
76
- DistanceLimitedTextOpinionFilter(terms_per_context)
77
- ]
78
-
79
- predefined_annot = PredefinedTextOpinionAnnotator(doc_ops, label_formatter)
80
-
81
- pipelines = {
82
- DataType.Train: create_main_pipeline(text_parser=text_parser,
83
- doc_ops=doc_ops,
84
- annotators=[
85
- predefined_annot,
86
- train_neut_annot
87
- ],
88
- text_opinion_filters=text_opinion_filters),
89
- DataType.Test: create_main_pipeline(text_parser=text_parser,
90
- doc_ops=doc_ops,
91
- annotators=[
92
- test_neut_annot
93
- ],
94
- text_opinion_filters=text_opinion_filters),
95
- DataType.Etalon: create_etalon_pipeline(text_parser=text_parser,
96
- doc_ops=doc_ops,
97
- predefined_annot=predefined_annot,
98
- text_opinion_filters=text_opinion_filters),
99
- DataType.Dev: create_etalon_with_no_label_pipeline(text_parser=text_parser,
100
- doc_ops=doc_ops,
101
- annotators=[
102
- predefined_annot,
103
- train_neut_annot
104
- ],
105
- text_opinion_filters=text_opinion_filters),
106
- }
107
-
108
- # In the case when we setup a default data-folding.
109
- # There is a need to provide it, due to the needs in further.
110
- if data_folding is not None:
111
- return pipelines, data_folding
112
-
113
- return pipelines
114
-
115
-
116
- def create_nolabel_text_opinion_annotator(terms_per_context, no_label, dist_in_sents=0, synonyms=None):
117
- """ This is a core annotator, which provides all entity pairs.
118
- Could be revealed from the document.
119
-
120
- Parameters:
121
- terms_per_context: int
122
- Amount of terms that we consider in between the Object and Subject.
123
- dist_in_sents: int
124
- Distance in sentences in between the objects.
125
- """
126
- assert(isinstance(terms_per_context, int))
127
- assert(isinstance(synonyms, SynonymsCollection) or synonyms is None)
128
- assert(isinstance(dist_in_sents, int))
129
-
130
- if synonyms is None:
131
- synonyms = StemmerBasedSynonymCollection(stemmer=MystemWrapper(), is_read_only=False)
132
-
133
- return AlgorithmBasedTextOpinionAnnotator(
134
- value_to_group_id_func=lambda value:
135
- SynonymsCollectionValuesGroupingProviders.provide_existed_or_register_missed_value(
136
- synonyms=synonyms, value=value),
137
- annot_algo=PairBasedOpinionAnnotationAlgorithm(
138
- dist_in_sents=dist_in_sents,
139
- dist_in_terms_bound=terms_per_context,
140
- label_provider=ConstantLabelProvider(no_label)),
141
- create_empty_collection_func=lambda: OpinionCollection(
142
- synonyms=synonyms,
143
- error_on_duplicates=True,
144
- error_on_synonym_end_missed=False))
145
-
146
-
147
- def create_main_pipeline(text_parser, doc_ops, annotators, text_opinion_filters):
148
- """ Train pipeline is based on the predefined annotations and
149
- automatic annotations of other pairs with a NoLabel.
150
- """
151
- return text_opinion_extraction_pipeline(
152
- get_doc_by_id_func=doc_ops.by_id,
153
- text_parser=text_parser,
154
- annotators=annotators,
155
- text_opinion_filters=text_opinion_filters)
156
-
157
-
158
- def create_etalon_pipeline(text_parser, doc_ops, predefined_annot, text_opinion_filters):
159
- """ We adopt exact the same pipeline as for training data,
160
- but we do not perform "NoLabel" annotation.
161
- (we are interested only in sentiment attitudes).
162
- """
163
- return create_main_pipeline(text_parser=text_parser,
164
- doc_ops=doc_ops,
165
- annotators=[predefined_annot],
166
- text_opinion_filters=text_opinion_filters)
167
-
168
-
169
- def create_etalon_with_no_label_pipeline(annotators, text_parser, doc_ops, text_opinion_filters):
170
- """ We adopt exact the same pipeline as for training data.
171
- """
172
- return create_main_pipeline(text_parser=text_parser,
173
- doc_ops=doc_ops,
174
- annotators=annotators,
175
- text_opinion_filters=text_opinion_filters)
@@ -1,50 +0,0 @@
1
- from arekit.common.labels.base import NoLabel
2
- from arekit.common.labels.str_fmt import StringLabelsFormatter
3
- from arekit.contrib.source.sentinerel import labels
4
-
5
-
6
- class SentiNERELAnyLabelFormatter(StringLabelsFormatter):
7
-
8
- def __init__(self):
9
-
10
- stol = {
11
- "OPINION_BELONGS_TO": labels.OpinionBelongsTo,
12
- "OPINION_RELATES_TO": labels.OpinionRelatesTo,
13
- "NEG_EFFECT_FROM": labels.NegEffectFrom,
14
- "POS_EFFECT_FROM": labels.PosEffectFrom,
15
- "NEG_STATE_FROM": labels.NegStateFrom,
16
- "POS_STATE_FROM": labels.PosStateFrom,
17
- "NEGATIVE_TO": labels.NegativeTo,
18
- "POSITIVE_TO": labels.PositiveTo,
19
- "STATE_BELONGS_TO": labels.StateBelongsTo,
20
- "POS_AUTHOR_FROM": labels.PosAuthorFrom,
21
- "NEG_AUTHOR_FROM": labels.NegAuthorFrom,
22
- "ALTERNATIVE_NAME": labels.AlternativeName,
23
- "ORIGINS_FROM": labels.OriginsFrom
24
- }
25
-
26
- super(SentiNERELAnyLabelFormatter, self).__init__(stol=stol)
27
-
28
-
29
- class SentiNERELSentimentLabelFormatter(StringLabelsFormatter):
30
-
31
- def __init__(self):
32
- stol = {
33
- "NEGATIVE_TO": labels.NegativeTo,
34
- "POSITIVE_TO": labels.PositiveTo,
35
- }
36
-
37
- super(SentiNERELSentimentLabelFormatter, self).__init__(stol=stol)
38
-
39
-
40
- class SentiNERELPosNegNeuRelationsLabelFormatter(StringLabelsFormatter):
41
-
42
- def __init__(self):
43
-
44
- stol = {
45
- "NEUTRAL": NoLabel,
46
- "NEGATIVE_TO": labels.NegativeTo,
47
- "POSITIVE_TO": labels.PositiveTo,
48
- }
49
-
50
- super(SentiNERELPosNegNeuRelationsLabelFormatter, self).__init__(stol=stol)