arekit 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. arekit/__init__.py +0 -0
  2. arekit/common/__init__.py +0 -0
  3. arekit/common/bound.py +48 -0
  4. arekit/common/context/__init__.py +0 -0
  5. arekit/common/context/terms_mapper.py +51 -0
  6. arekit/common/context/token.py +16 -0
  7. arekit/common/data/__init__.py +0 -0
  8. arekit/common/data/const.py +21 -0
  9. arekit/common/data/doc_provider.py +6 -0
  10. arekit/common/data/input/__init__.py +0 -0
  11. arekit/common/data/input/providers/__init__.py +0 -0
  12. arekit/common/data/input/providers/columns/__init__.py +0 -0
  13. arekit/common/data/input/providers/columns/base.py +9 -0
  14. arekit/common/data/input/providers/columns/sample.py +59 -0
  15. arekit/common/data/input/providers/const.py +3 -0
  16. arekit/common/data/input/providers/contents.py +9 -0
  17. arekit/common/data/input/providers/instances/__init__.py +0 -0
  18. arekit/common/data/input/providers/instances/base.py +14 -0
  19. arekit/common/data/input/providers/instances/multiple.py +27 -0
  20. arekit/common/data/input/providers/instances/single.py +8 -0
  21. arekit/common/data/input/providers/label/__init__.py +0 -0
  22. arekit/common/data/input/providers/label/base.py +24 -0
  23. arekit/common/data/input/providers/label/binary.py +11 -0
  24. arekit/common/data/input/providers/label/multiple.py +15 -0
  25. arekit/common/data/input/providers/rows/__init__.py +0 -0
  26. arekit/common/data/input/providers/rows/base.py +64 -0
  27. arekit/common/data/input/providers/rows/samples.py +227 -0
  28. arekit/common/data/input/providers/sample/__init__.py +0 -0
  29. arekit/common/data/input/providers/sample/cropped.py +43 -0
  30. arekit/common/data/input/providers/text/__init__.py +0 -0
  31. arekit/common/data/input/providers/text/single.py +49 -0
  32. arekit/common/data/input/repositories/__init__.py +0 -0
  33. arekit/common/data/input/repositories/base.py +68 -0
  34. arekit/common/data/input/repositories/sample.py +22 -0
  35. arekit/common/data/input/sample.py +66 -0
  36. arekit/common/data/input/terms_mapper.py +88 -0
  37. arekit/common/data/rows_fmt.py +82 -0
  38. arekit/common/data/rows_parser.py +43 -0
  39. arekit/common/data/storages/__init__.py +0 -0
  40. arekit/common/data/storages/base.py +109 -0
  41. arekit/common/data/views/__init__.py +0 -0
  42. arekit/common/data/views/samples.py +26 -0
  43. arekit/common/docs/__init__.py +0 -0
  44. arekit/common/docs/base.py +30 -0
  45. arekit/common/docs/entities_grouping.py +16 -0
  46. arekit/common/docs/entity.py +18 -0
  47. arekit/common/docs/objects_parser.py +37 -0
  48. arekit/common/docs/parsed/__init__.py +0 -0
  49. arekit/common/docs/parsed/base.py +101 -0
  50. arekit/common/docs/parsed/providers/__init__.py +0 -0
  51. arekit/common/docs/parsed/providers/base.py +68 -0
  52. arekit/common/docs/parsed/providers/base_pairs.py +51 -0
  53. arekit/common/docs/parsed/providers/entity_service.py +175 -0
  54. arekit/common/docs/parsed/providers/opinion_pairs.py +20 -0
  55. arekit/common/docs/parsed/providers/text_opinion_pairs.py +78 -0
  56. arekit/common/docs/parsed/service.py +31 -0
  57. arekit/common/docs/parsed/term_position.py +42 -0
  58. arekit/common/docs/parser.py +34 -0
  59. arekit/common/docs/sentence.py +14 -0
  60. arekit/common/entities/__init__.py +0 -0
  61. arekit/common/entities/base.py +51 -0
  62. arekit/common/entities/collection.py +72 -0
  63. arekit/common/entities/str_fmt.py +8 -0
  64. arekit/common/entities/types.py +9 -0
  65. arekit/common/experiment/__init__.py +0 -0
  66. arekit/common/experiment/api/__init__.py +0 -0
  67. arekit/common/experiment/api/base_samples_io.py +20 -0
  68. arekit/common/experiment/data_type.py +17 -0
  69. arekit/common/frames/__init__.py +0 -0
  70. arekit/common/frames/connotations/__init__.py +0 -0
  71. arekit/common/frames/connotations/descriptor.py +17 -0
  72. arekit/common/frames/connotations/provider.py +4 -0
  73. arekit/common/frames/text_variant.py +43 -0
  74. arekit/common/frames/variants/__init__.py +0 -0
  75. arekit/common/frames/variants/base.py +21 -0
  76. arekit/common/frames/variants/collection.py +60 -0
  77. arekit/common/labels/__init__.py +0 -0
  78. arekit/common/labels/base.py +19 -0
  79. arekit/common/labels/provider/__init__.py +0 -0
  80. arekit/common/labels/provider/base.py +7 -0
  81. arekit/common/labels/provider/constant.py +14 -0
  82. arekit/common/labels/scaler/__init__.py +0 -0
  83. arekit/common/labels/scaler/base.py +85 -0
  84. arekit/common/labels/scaler/sentiment.py +7 -0
  85. arekit/common/labels/scaler/single.py +10 -0
  86. arekit/common/labels/str_fmt.py +55 -0
  87. arekit/common/linkage/__init__.py +0 -0
  88. arekit/common/linkage/base.py +44 -0
  89. arekit/common/linkage/meta.py +23 -0
  90. arekit/common/linkage/opinions.py +9 -0
  91. arekit/common/linkage/text_opinions.py +22 -0
  92. arekit/common/log_utils.py +29 -0
  93. arekit/common/model/__init__.py +0 -0
  94. arekit/common/model/labeling/__init__.py +0 -0
  95. arekit/common/model/labeling/base.py +24 -0
  96. arekit/common/model/labeling/modes.py +8 -0
  97. arekit/common/model/labeling/single.py +24 -0
  98. arekit/common/opinions/__init__.py +0 -0
  99. arekit/common/opinions/annot/__init__.py +0 -0
  100. arekit/common/opinions/annot/algo/__init__.py +0 -0
  101. arekit/common/opinions/annot/algo/base.py +4 -0
  102. arekit/common/opinions/annot/algo/pair_based.py +99 -0
  103. arekit/common/opinions/annot/algo/predefined.py +16 -0
  104. arekit/common/opinions/annot/algo_based.py +55 -0
  105. arekit/common/opinions/annot/base.py +15 -0
  106. arekit/common/opinions/base.py +74 -0
  107. arekit/common/opinions/collection.py +150 -0
  108. arekit/common/opinions/enums.py +6 -0
  109. arekit/common/opinions/provider.py +4 -0
  110. arekit/common/opinions/writer.py +4 -0
  111. arekit/common/pipeline/__init__.py +0 -0
  112. arekit/common/pipeline/base.py +25 -0
  113. arekit/common/pipeline/context.py +36 -0
  114. arekit/common/pipeline/conts.py +2 -0
  115. arekit/common/pipeline/items/__init__.py +0 -0
  116. arekit/common/pipeline/items/base.py +12 -0
  117. arekit/common/pipeline/items/flatten.py +14 -0
  118. arekit/common/pipeline/items/handle.py +17 -0
  119. arekit/common/pipeline/items/iter.py +11 -0
  120. arekit/common/pipeline/items/map.py +11 -0
  121. arekit/common/pipeline/items/map_nested.py +13 -0
  122. arekit/common/synonyms/__init__.py +0 -0
  123. arekit/common/synonyms/base.py +151 -0
  124. arekit/common/synonyms/grouping.py +21 -0
  125. arekit/common/text/__init__.py +0 -0
  126. arekit/common/text/enums.py +12 -0
  127. arekit/common/text/parsed.py +42 -0
  128. arekit/common/text/parser.py +12 -0
  129. arekit/common/text/partitioning/__init__.py +0 -0
  130. arekit/common/text/partitioning/base.py +4 -0
  131. arekit/common/text/partitioning/str.py +36 -0
  132. arekit/common/text/partitioning/terms.py +35 -0
  133. arekit/common/text/stemmer.py +16 -0
  134. arekit/common/text_opinions/__init__.py +0 -0
  135. arekit/common/text_opinions/base.py +105 -0
  136. arekit/common/utils.py +129 -0
  137. arekit/contrib/__init__.py +0 -0
  138. arekit/contrib/bert/__init__.py +0 -0
  139. arekit/contrib/bert/input/__init__.py +0 -0
  140. arekit/contrib/bert/input/providers/__init__.py +0 -0
  141. arekit/contrib/bert/input/providers/cropped_sample.py +17 -0
  142. arekit/contrib/bert/input/providers/text_pair.py +62 -0
  143. arekit/contrib/bert/terms/__init__.py +0 -0
  144. arekit/contrib/bert/terms/mapper.py +20 -0
  145. arekit/contrib/networks/__init__.py +0 -0
  146. arekit/contrib/networks/embedding.py +149 -0
  147. arekit/contrib/networks/embedding_io.py +18 -0
  148. arekit/contrib/networks/input/__init__.py +0 -0
  149. arekit/contrib/networks/input/const.py +6 -0
  150. arekit/contrib/networks/input/ctx_serialization.py +28 -0
  151. arekit/contrib/networks/input/embedding/__init__.py +0 -0
  152. arekit/contrib/networks/input/embedding/matrix.py +29 -0
  153. arekit/contrib/networks/input/embedding/offsets.py +55 -0
  154. arekit/contrib/networks/input/formatters/__init__.py +0 -0
  155. arekit/contrib/networks/input/formatters/pos_mapper.py +22 -0
  156. arekit/contrib/networks/input/providers/__init__.py +0 -0
  157. arekit/contrib/networks/input/providers/sample.py +129 -0
  158. arekit/contrib/networks/input/providers/term_connotation.py +23 -0
  159. arekit/contrib/networks/input/providers/text.py +24 -0
  160. arekit/contrib/networks/input/rows_parser.py +47 -0
  161. arekit/contrib/networks/input/term_types.py +13 -0
  162. arekit/contrib/networks/input/terms_mapping.py +60 -0
  163. arekit/contrib/networks/vectorizer.py +6 -0
  164. arekit/contrib/prompt/__init__.py +0 -0
  165. arekit/contrib/prompt/sample.py +61 -0
  166. arekit/contrib/source/__init__.py +0 -0
  167. arekit/contrib/source/brat/__init__.py +0 -0
  168. arekit/contrib/source/brat/annot.py +84 -0
  169. arekit/contrib/source/brat/doc.py +28 -0
  170. arekit/contrib/source/brat/entities/__init__.py +0 -0
  171. arekit/contrib/source/brat/entities/compound.py +13 -0
  172. arekit/contrib/source/brat/entities/entity.py +42 -0
  173. arekit/contrib/source/brat/entities/parser.py +53 -0
  174. arekit/contrib/source/brat/opinions/__init__.py +0 -0
  175. arekit/contrib/source/brat/opinions/converter.py +19 -0
  176. arekit/contrib/source/brat/relation.py +32 -0
  177. arekit/contrib/source/brat/sentence.py +69 -0
  178. arekit/contrib/source/brat/sentences_reader.py +128 -0
  179. arekit/contrib/source/download.py +41 -0
  180. arekit/contrib/source/nerel/__init__.py +0 -0
  181. arekit/contrib/source/nerel/entities.py +55 -0
  182. arekit/contrib/source/nerel/folding/__init__.py +0 -0
  183. arekit/contrib/source/nerel/folding/fixed.py +74 -0
  184. arekit/contrib/source/nerel/io_utils.py +62 -0
  185. arekit/contrib/source/nerel/labels.py +241 -0
  186. arekit/contrib/source/nerel/reader.py +46 -0
  187. arekit/contrib/source/nerel/utils.py +24 -0
  188. arekit/contrib/source/nerel/versions.py +12 -0
  189. arekit/contrib/source/nerelbio/__init__.py +0 -0
  190. arekit/contrib/source/nerelbio/io_utils.py +62 -0
  191. arekit/contrib/source/nerelbio/labels.py +265 -0
  192. arekit/contrib/source/nerelbio/reader.py +8 -0
  193. arekit/contrib/source/nerelbio/versions.py +8 -0
  194. arekit/contrib/source/ruattitudes/__init__.py +0 -0
  195. arekit/contrib/source/ruattitudes/collection.py +36 -0
  196. arekit/contrib/source/ruattitudes/doc.py +51 -0
  197. arekit/contrib/source/ruattitudes/doc_brat.py +44 -0
  198. arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
  199. arekit/contrib/source/ruattitudes/entity/parser.py +7 -0
  200. arekit/contrib/source/ruattitudes/io_utils.py +56 -0
  201. arekit/contrib/source/ruattitudes/labels_fmt.py +12 -0
  202. arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
  203. arekit/contrib/source/ruattitudes/opinions/base.py +28 -0
  204. arekit/contrib/source/ruattitudes/opinions/converter.py +37 -0
  205. arekit/contrib/source/ruattitudes/reader.py +268 -0
  206. arekit/contrib/source/ruattitudes/sentence.py +73 -0
  207. arekit/contrib/source/ruattitudes/synonyms.py +17 -0
  208. arekit/contrib/source/ruattitudes/text_object.py +59 -0
  209. arekit/contrib/source/rusentiframes/__init__.py +0 -0
  210. arekit/contrib/source/rusentiframes/collection.py +157 -0
  211. arekit/contrib/source/rusentiframes/effect.py +24 -0
  212. arekit/contrib/source/rusentiframes/io_utils.py +19 -0
  213. arekit/contrib/source/rusentiframes/labels_fmt.py +22 -0
  214. arekit/contrib/source/rusentiframes/polarity.py +35 -0
  215. arekit/contrib/source/rusentiframes/role.py +15 -0
  216. arekit/contrib/source/rusentiframes/state.py +24 -0
  217. arekit/contrib/source/rusentiframes/types.py +42 -0
  218. arekit/contrib/source/rusentiframes/value.py +2 -0
  219. arekit/contrib/source/rusentrel/__init__.py +0 -0
  220. arekit/contrib/source/rusentrel/const.py +3 -0
  221. arekit/contrib/source/rusentrel/docs_reader.py +51 -0
  222. arekit/contrib/source/rusentrel/entities.py +26 -0
  223. arekit/contrib/source/rusentrel/io_utils.py +125 -0
  224. arekit/contrib/source/rusentrel/labels_fmt.py +12 -0
  225. arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
  226. arekit/contrib/source/rusentrel/opinions/collection.py +30 -0
  227. arekit/contrib/source/rusentrel/opinions/converter.py +40 -0
  228. arekit/contrib/source/rusentrel/opinions/provider.py +54 -0
  229. arekit/contrib/source/rusentrel/opinions/writer.py +42 -0
  230. arekit/contrib/source/rusentrel/synonyms.py +17 -0
  231. arekit/contrib/source/sentinerel/__init__.py +0 -0
  232. arekit/contrib/source/sentinerel/entities.py +52 -0
  233. arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
  234. arekit/contrib/source/sentinerel/folding/factory.py +31 -0
  235. arekit/contrib/source/sentinerel/folding/fixed.py +70 -0
  236. arekit/contrib/source/sentinerel/io_utils.py +87 -0
  237. arekit/contrib/source/sentinerel/labels.py +53 -0
  238. arekit/contrib/source/sentinerel/labels_scaler.py +30 -0
  239. arekit/contrib/source/sentinerel/reader.py +42 -0
  240. arekit/contrib/source/synonyms/__init__.py +0 -0
  241. arekit/contrib/source/synonyms/utils.py +19 -0
  242. arekit/contrib/source/zip_utils.py +47 -0
  243. arekit/contrib/utils/__init__.py +0 -0
  244. arekit/contrib/utils/bert/__init__.py +0 -0
  245. arekit/contrib/utils/bert/samplers.py +17 -0
  246. arekit/contrib/utils/connotations/__init__.py +0 -0
  247. arekit/contrib/utils/connotations/rusentiframes_sentiment.py +23 -0
  248. arekit/contrib/utils/data/__init__.py +0 -0
  249. arekit/contrib/utils/data/contents/__init__.py +0 -0
  250. arekit/contrib/utils/data/contents/opinions.py +37 -0
  251. arekit/contrib/utils/data/doc_provider/__init__.py +0 -0
  252. arekit/contrib/utils/data/doc_provider/dict_based.py +13 -0
  253. arekit/contrib/utils/data/doc_provider/dir_based.py +53 -0
  254. arekit/contrib/utils/data/readers/__init__.py +0 -0
  255. arekit/contrib/utils/data/readers/base.py +7 -0
  256. arekit/contrib/utils/data/readers/csv_pd.py +38 -0
  257. arekit/contrib/utils/data/readers/jsonl.py +15 -0
  258. arekit/contrib/utils/data/service/__init__.py +0 -0
  259. arekit/contrib/utils/data/service/balance.py +50 -0
  260. arekit/contrib/utils/data/storages/__init__.py +0 -0
  261. arekit/contrib/utils/data/storages/jsonl_based.py +18 -0
  262. arekit/contrib/utils/data/storages/pandas_based.py +123 -0
  263. arekit/contrib/utils/data/storages/row_cache.py +48 -0
  264. arekit/contrib/utils/data/writers/__init__.py +0 -0
  265. arekit/contrib/utils/data/writers/base.py +27 -0
  266. arekit/contrib/utils/data/writers/csv_native.py +63 -0
  267. arekit/contrib/utils/data/writers/csv_pd.py +40 -0
  268. arekit/contrib/utils/data/writers/json_opennre.py +132 -0
  269. arekit/contrib/utils/data/writers/sqlite_native.py +110 -0
  270. arekit/contrib/utils/download.py +77 -0
  271. arekit/contrib/utils/embeddings/__init__.py +0 -0
  272. arekit/contrib/utils/embeddings/rusvectores.py +58 -0
  273. arekit/contrib/utils/embeddings/tokens.py +30 -0
  274. arekit/contrib/utils/entities/__init__.py +0 -0
  275. arekit/contrib/utils/entities/filter.py +7 -0
  276. arekit/contrib/utils/entities/formatters/__init__.py +0 -0
  277. arekit/contrib/utils/entities/formatters/str_display.py +11 -0
  278. arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py +15 -0
  279. arekit/contrib/utils/io_utils/__init__.py +0 -0
  280. arekit/contrib/utils/io_utils/embedding.py +72 -0
  281. arekit/contrib/utils/io_utils/opinions.py +37 -0
  282. arekit/contrib/utils/io_utils/samples.py +79 -0
  283. arekit/contrib/utils/io_utils/utils.py +39 -0
  284. arekit/contrib/utils/lexicons/__init__.py +0 -0
  285. arekit/contrib/utils/lexicons/lexicon.py +41 -0
  286. arekit/contrib/utils/lexicons/relation.py +42 -0
  287. arekit/contrib/utils/lexicons/rusentilex.py +37 -0
  288. arekit/contrib/utils/nn/__init__.py +0 -0
  289. arekit/contrib/utils/nn/rows.py +83 -0
  290. arekit/contrib/utils/np_utils/__init__.py +0 -0
  291. arekit/contrib/utils/np_utils/embedding.py +22 -0
  292. arekit/contrib/utils/np_utils/npz_utils.py +13 -0
  293. arekit/contrib/utils/np_utils/vocab.py +20 -0
  294. arekit/contrib/utils/pipelines/__init__.py +0 -0
  295. arekit/contrib/utils/pipelines/items/__init__.py +0 -0
  296. arekit/contrib/utils/pipelines/items/sampling/__init__.py +0 -0
  297. arekit/contrib/utils/pipelines/items/sampling/base.py +99 -0
  298. arekit/contrib/utils/pipelines/items/sampling/networks.py +54 -0
  299. arekit/contrib/utils/pipelines/items/text/__init__.py +0 -0
  300. arekit/contrib/utils/pipelines/items/text/entities_default.py +23 -0
  301. arekit/contrib/utils/pipelines/items/text/frames.py +86 -0
  302. arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +36 -0
  303. arekit/contrib/utils/pipelines/items/text/frames_negation.py +32 -0
  304. arekit/contrib/utils/pipelines/items/text/terms_splitter.py +10 -0
  305. arekit/contrib/utils/pipelines/items/text/tokenizer.py +107 -0
  306. arekit/contrib/utils/pipelines/items/text/translator.py +135 -0
  307. arekit/contrib/utils/pipelines/opinion_collections.py +85 -0
  308. arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
  309. arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
  310. arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py +27 -0
  311. arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +65 -0
  312. arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +60 -0
  313. arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
  314. arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py +29 -0
  315. arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +64 -0
  316. arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +79 -0
  317. arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
  318. arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py +56 -0
  319. arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +20 -0
  320. arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +65 -0
  321. arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
  322. arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py +21 -0
  323. arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +107 -0
  324. arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
  325. arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py +29 -0
  326. arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +62 -0
  327. arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +180 -0
  328. arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +50 -0
  329. arekit/contrib/utils/pipelines/text_opinion/__init__.py +0 -0
  330. arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py +0 -0
  331. arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +34 -0
  332. arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +88 -0
  333. arekit/contrib/utils/pipelines/text_opinion/extraction.py +93 -0
  334. arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py +0 -0
  335. arekit/contrib/utils/pipelines/text_opinion/filters/base.py +4 -0
  336. arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py +16 -0
  337. arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py +29 -0
  338. arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py +26 -0
  339. arekit/contrib/utils/processing/__init__.py +0 -0
  340. arekit/contrib/utils/processing/languages/__init__.py +0 -0
  341. arekit/contrib/utils/processing/languages/mods.py +12 -0
  342. arekit/contrib/utils/processing/languages/pos.py +23 -0
  343. arekit/contrib/utils/processing/languages/ru/__init__.py +0 -0
  344. arekit/contrib/utils/processing/languages/ru/cases.py +78 -0
  345. arekit/contrib/utils/processing/languages/ru/constants.py +6 -0
  346. arekit/contrib/utils/processing/languages/ru/mods.py +13 -0
  347. arekit/contrib/utils/processing/languages/ru/number.py +23 -0
  348. arekit/contrib/utils/processing/languages/ru/pos_service.py +36 -0
  349. arekit/contrib/utils/processing/lemmatization/__init__.py +0 -0
  350. arekit/contrib/utils/processing/lemmatization/mystem.py +51 -0
  351. arekit/contrib/utils/processing/pos/__init__.py +0 -0
  352. arekit/contrib/utils/processing/pos/base.py +12 -0
  353. arekit/contrib/utils/processing/pos/mystem_wrap.py +134 -0
  354. arekit/contrib/utils/processing/pos/russian.py +10 -0
  355. arekit/contrib/utils/processing/text/__init__.py +0 -0
  356. arekit/contrib/utils/processing/text/tokens.py +127 -0
  357. arekit/contrib/utils/resources.py +25 -0
  358. arekit/contrib/utils/serializer.py +43 -0
  359. arekit/contrib/utils/sources/__init__.py +0 -0
  360. arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
  361. arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
  362. arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +63 -0
  363. arekit/contrib/utils/synonyms/__init__.py +0 -0
  364. arekit/contrib/utils/synonyms/simple.py +15 -0
  365. arekit/contrib/utils/synonyms/stemmer_based.py +38 -0
  366. arekit/contrib/utils/vectorizers/__init__.py +0 -0
  367. arekit/contrib/utils/vectorizers/bpe.py +93 -0
  368. arekit/contrib/utils/vectorizers/random_norm.py +39 -0
  369. arekit/download_data.py +11 -0
  370. arekit-0.24.0.dist-info/LICENSE +21 -0
  371. arekit-0.24.0.dist-info/METADATA +23 -0
  372. arekit-0.24.0.dist-info/RECORD +374 -0
  373. arekit-0.24.0.dist-info/WHEEL +5 -0
  374. arekit-0.24.0.dist-info/top_level.txt +1 -0
arekit/__init__.py ADDED
File without changes
File without changes
arekit/common/bound.py ADDED
@@ -0,0 +1,48 @@
1
+ class Bound:
2
+
3
+ def __init__(self, pos, length):
4
+ assert(isinstance(pos, int))
5
+ assert(isinstance(length, int))
6
+ self.__pos = pos
7
+ self.__length = length
8
+
9
+ # region properties
10
+
11
+ @property
12
+ def Position(self):
13
+ return self.__pos
14
+
15
+ @property
16
+ def Length(self):
17
+ return self.__length
18
+
19
+ # endregion
20
+
21
+ def itersects_with(self, other):
22
+ begin = self.__pos
23
+ end = self.__pos + self.__length
24
+ other_begin = other.Position
25
+ other_end_included = other.Position + other.Length - 1
26
+ if end > other_begin >= begin:
27
+ return True
28
+ if end > other_end_included >= begin:
29
+ return True
30
+ if other_begin < begin and end <= other_end_included:
31
+ return True
32
+ return False
33
+
34
+ def intersect(self, other):
35
+ begin = self.__pos
36
+ end = self.__pos + self.__length
37
+ other_begin = other.Position
38
+ other_end = other.Position + other.Length
39
+ actual_begin = min(begin, other_begin)
40
+ actual_length = max(end, other_end) - actual_begin
41
+ return Bound(pos=actual_begin, length=actual_length)
42
+
43
+ def contains(self, other):
44
+ begin = self.__pos
45
+ end = self.__pos + self.__length
46
+ other_begin = other.Position
47
+ other_end = other.Position + other.Length
48
+ return begin <= other_begin and end >= other_end
File without changes
@@ -0,0 +1,51 @@
1
+ from collections.abc import Iterable
2
+
3
+ from arekit.common.context.token import Token
4
+ from arekit.common.entities.base import Entity
5
+ from arekit.common.frames.text_variant import TextFrameVariant
6
+
7
+
8
+ class TextTermsMapper(object):
9
+
10
+ def iter_mapped(self, terms):
11
+ """ Performs mapping operation of each terms in a sequence
12
+ """
13
+ assert(isinstance(terms, Iterable))
14
+
15
+ self._before_mapping()
16
+
17
+ for i, term in enumerate(terms):
18
+
19
+ if isinstance(term, str):
20
+ m_term = self.map_word(i, term)
21
+ elif isinstance(term, Token):
22
+ m_term = self.map_token(i, term)
23
+ elif isinstance(term, TextFrameVariant):
24
+ m_term = self.map_text_frame_variant(i, term)
25
+ elif isinstance(term, Entity):
26
+ m_term = self.map_entity(i, term)
27
+ else:
28
+ raise Exception("Unsupported type {}".format(term))
29
+
30
+ if m_term is not None:
31
+ yield m_term
32
+
33
+ self._after_mapping()
34
+
35
+ def _before_mapping(self):
36
+ pass
37
+
38
+ def _after_mapping(self):
39
+ pass
40
+
41
+ def map_word(self, w_ind, word):
42
+ raise NotImplementedError()
43
+
44
+ def map_token(self, t_ind, token):
45
+ raise NotImplementedError()
46
+
47
+ def map_text_frame_variant(self, fv_ind, text_frame_variant):
48
+ raise NotImplementedError()
49
+
50
+ def map_entity(self, e_ind, entity):
51
+ raise NotImplementedError()
@@ -0,0 +1,16 @@
1
+ class Token:
2
+ """
3
+ Token that stores original and resulted token values
4
+ i.e.: term=',', token_value='<[COMMA]>'
5
+ """
6
+ def __init__(self, term, token_value):
7
+ assert(isinstance(term, str))
8
+ assert(isinstance(token_value, str))
9
+ self.__meta_value = term
10
+ self.__token_value = token_value
11
+
12
+ def get_meta_value(self):
13
+ return self.__meta_value
14
+
15
+ def get_token_value(self):
16
+ return self.__token_value
File without changes
@@ -0,0 +1,21 @@
1
+ ID = 'id'
2
+ DOC_ID = 'doc_id'
3
+ TEXT = 'text_a'
4
+ LABEL_UINT = 'label_uint'
5
+ LABEL_STR = 'label_str'
6
+
7
+ # Global identifier of the opinion in the sampled data.
8
+ OPINION_ID = "opinion_id"
9
+ OPINION_LINKAGE_ID = "linkage_id"
10
+
11
+ # Corresponds to fields with attitude ends. (indices, INT)
12
+ S_IND = 's_ind'
13
+ T_IND = 't_ind'
14
+
15
+ # Provide sentence index.
16
+ SENT_IND = 'sent_ind'
17
+
18
+ # Entity parameters
19
+ ENTITY_VALUES = 'entity_values'
20
+ ENTITY_TYPES = 'entity_types'
21
+ ENTITIES = 'entities'
@@ -0,0 +1,6 @@
1
+ class DocumentProvider(object):
2
+ """ Provides operations with documents
3
+ """
4
+
5
+ def by_id(self, doc_id):
6
+ raise NotImplementedError()
File without changes
File without changes
File without changes
@@ -0,0 +1,9 @@
1
+
2
+ class BaseColumnsProvider(object):
3
+
4
+ ROW_ID = 'row_id'
5
+
6
+ def get_columns_list_with_types(self):
7
+ dtypes_list = list()
8
+ dtypes_list.append((BaseColumnsProvider.ROW_ID, 'int32'))
9
+ return dtypes_list
@@ -0,0 +1,59 @@
1
+ from arekit.common.data import const
2
+ from arekit.common.data.input.providers.columns.base import BaseColumnsProvider
3
+
4
+
5
+ class SampleColumnsProvider(BaseColumnsProvider):
6
+ """
7
+ [id, label, text_a] -- for train
8
+ [id, text_a] -- for test
9
+ """
10
+
11
+ def __init__(self, store_labels):
12
+ super(SampleColumnsProvider, self).__init__()
13
+ self.__store_labels = store_labels
14
+ self.__text_column_names = None
15
+
16
+ # region properties
17
+
18
+ @property
19
+ def StoreLabels(self):
20
+ return self.__store_labels
21
+
22
+ @property
23
+ def TextColumnNames(self):
24
+ return self.__text_column_names
25
+
26
+ # endregion
27
+
28
+ def get_columns_list_with_types(self):
29
+ """
30
+ Composing df with the following columns:
31
+ [id, label, type, text_a]
32
+ """
33
+ dtypes_list = super(SampleColumnsProvider, self).get_columns_list_with_types()
34
+
35
+ dtypes_list.append((const.ID, str))
36
+ dtypes_list.append((const.DOC_ID, str))
37
+
38
+ # insert labels
39
+ if self.__store_labels:
40
+ dtypes_list.append((const.LABEL_UINT, 'int32'))
41
+ dtypes_list.append((const.LABEL_STR, str))
42
+
43
+ # insert text columns
44
+ for col_name in self.__text_column_names:
45
+ dtypes_list.append((col_name, str))
46
+
47
+ # insert indices
48
+ dtypes_list.append((const.S_IND, 'int32'))
49
+ dtypes_list.append((const.T_IND, 'int32'))
50
+
51
+ # opinion-extraction task related fields
52
+ dtypes_list.append((const.OPINION_ID, 'int32'))
53
+ dtypes_list.append((const.OPINION_LINKAGE_ID, 'int32'))
54
+
55
+ return dtypes_list
56
+
57
+ def set_text_column_names(self, text_column_names):
58
+ assert(isinstance(text_column_names, list))
59
+ self.__text_column_names = text_column_names
@@ -0,0 +1,3 @@
1
+ # Idle mode related to iteration format, which is considered only for assessing the expected
2
+ # amount of sampled data, while its contents could be avoided at all.
3
+ IDLE_MODE = "idle_mode"
@@ -0,0 +1,9 @@
1
+
2
+ class ContentsProvider(object):
3
+ """ This is a main provider of the contents of further sampled output.
4
+ """
5
+
6
+ def from_doc_ids(self, doc_ids, idle_mode=False):
7
+ """ This is a main method is expected to be implemented.
8
+ """
9
+ pass
@@ -0,0 +1,14 @@
1
+ from arekit.common.linkage.base import LinkedDataWrapper
2
+
3
+
4
+ class BaseLinkedDataInstancesProvider(object):
5
+
6
+ def iter_instances(self, linked_data):
7
+ raise NotImplementedError()
8
+
9
+ @staticmethod
10
+ def provide_label(linked_data):
11
+ """ Implementation based on the first element of the linkage.
12
+ """
13
+ assert(isinstance(linked_data, LinkedDataWrapper))
14
+ return linked_data.First.Label
@@ -0,0 +1,27 @@
1
+ from arekit.common.data.input.providers.instances.base import BaseLinkedDataInstancesProvider
2
+ from arekit.common.linkage.text_opinions import TextOpinionsLinkage
3
+ from arekit.common.text_opinions.base import TextOpinion
4
+
5
+
6
+ class MultipleInstancesLinkedTextOpinionsProvider(BaseLinkedDataInstancesProvider):
7
+
8
+ def __init__(self, supported_labels):
9
+ assert(isinstance(supported_labels, list))
10
+ self.__supported_labels = supported_labels
11
+
12
+ def iter_instances(self, linked_data):
13
+ """ Enumerate all opinions as if it would be with the different label types.
14
+ """
15
+ for label in self.__supported_labels:
16
+ yield self.__modify_first_and_copy_linked_wrap(linked_data, label)
17
+
18
+ @staticmethod
19
+ def __modify_first_and_copy_linked_wrap(text_opinions_linkage, label):
20
+ assert (isinstance(text_opinions_linkage, TextOpinionsLinkage))
21
+
22
+ linkage = list(text_opinions_linkage)
23
+ text_opinion_copy = TextOpinion.create_copy(other=linkage[0])
24
+ text_opinion_copy.set_label(label=label)
25
+ linkage[0] = text_opinion_copy
26
+
27
+ return TextOpinionsLinkage(linked_data=linkage)
@@ -0,0 +1,8 @@
1
+ from arekit.common.data.input.providers.instances.base import BaseLinkedDataInstancesProvider
2
+
3
+
4
+ class SingleInstanceLinkedDataProvider(BaseLinkedDataInstancesProvider):
5
+
6
+ def iter_instances(self, linked_data):
7
+ yield linked_data
8
+ return
File without changes
@@ -0,0 +1,24 @@
1
+ from arekit.common.labels.scaler.base import BaseLabelScaler
2
+
3
+
4
+ class LabelProvider(object):
5
+
6
+ def __init__(self, label_scaler):
7
+ assert(isinstance(label_scaler, BaseLabelScaler))
8
+ self.__label_scaler = label_scaler
9
+
10
+ @property
11
+ def LabelScaler(self):
12
+ return self.__label_scaler
13
+
14
+ @property
15
+ def SupportedLabels(self):
16
+ return self.__label_scaler.ordered_suppoted_labels()
17
+
18
+ @property
19
+ def OutputLabelsUint(self):
20
+ raise NotImplementedError()
21
+
22
+ def calculate_output_uint_label(self, expected_uint_label, etalon_uint_label):
23
+ raise NotImplementedError()
24
+
@@ -0,0 +1,11 @@
1
+ from arekit.common.data.input.providers.label.base import LabelProvider
2
+
3
+
4
+ class BinaryLabelProvider(LabelProvider):
5
+
6
+ def calculate_output_uint_label(self, expected_uint_label, etalon_uint_label):
7
+ return 1 if expected_uint_label == etalon_uint_label else 0
8
+
9
+ @property
10
+ def OutputLabelsUint(self):
11
+ return [0, 1]
@@ -0,0 +1,15 @@
1
+ from arekit.common.data.input.providers.label.base import LabelProvider
2
+
3
+
4
+ class MultipleLabelProvider(LabelProvider):
5
+
6
+ def __init__(self, label_scaler):
7
+ super(MultipleLabelProvider, self).__init__(label_scaler=label_scaler)
8
+
9
+ def calculate_output_uint_label(self, expected_uint_label, etalon_uint_label):
10
+ return expected_uint_label
11
+
12
+ @property
13
+ def OutputLabelsUint(self):
14
+ return [self.LabelScaler.label_to_uint(label) for label in self.SupportedLabels]
15
+
File without changes
@@ -0,0 +1,64 @@
1
+ from collections import Counter
2
+ from collections.abc import Iterable
3
+ import logging
4
+
5
+ from arekit.common.data.input.providers.contents import ContentsProvider
6
+ from arekit.common.linkage.base import LinkedDataWrapper
7
+ from arekit.common.docs.parsed.providers.entity_service import EntityServiceProvider
8
+ from arekit.common.linkage.meta import MetaEmptyLinkedDataWrapper
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class BaseRowProvider(object):
14
+ """ Base provider for rows that suppose to be filled into BaseRowsStorage.
15
+ """
16
+
17
+ def __init__(self):
18
+ self.__rows_counter = None
19
+
20
+ # region protected methods
21
+
22
+ # TODO. This might be also generalized.
23
+ # TODO. Idle-mode is also a implementation and task specific parameter, i.e. might be removed from here.
24
+ def _provide_rows(self, parsed_doc, entity_service, text_opinion_linkage, idle_mode):
25
+ raise NotImplementedError()
26
+
27
+ def _count_row(self):
28
+ index = self.__rows_counter["rows_iterated"]
29
+ self.__rows_counter["rows_iterated"] += 1
30
+ return index
31
+
32
+ # endregion
33
+
34
+ def __iter_rows(self, linked_data, idle_mode):
35
+ parsed_doc_service = linked_data.Tag
36
+ return self._provide_rows(parsed_doc=parsed_doc_service.ParsedDocument,
37
+ entity_service=parsed_doc_service.get_provider(EntityServiceProvider.NAME),
38
+ text_opinion_linkage=linked_data,
39
+ idle_mode=idle_mode)
40
+
41
+ def iter_by_rows(self, contents_provider, doc_ids_iter, idle_mode):
42
+ assert(isinstance(contents_provider, ContentsProvider))
43
+ assert(isinstance(doc_ids_iter, Iterable))
44
+
45
+ self.__rows_counter = Counter()
46
+
47
+ for linked_data in contents_provider.from_doc_ids(doc_ids=doc_ids_iter, idle_mode=idle_mode):
48
+ assert(isinstance(linked_data, LinkedDataWrapper))
49
+
50
+ if isinstance(linked_data, MetaEmptyLinkedDataWrapper):
51
+ if idle_mode:
52
+ # In the case of the IDLE mode we do not consider the meta-data.
53
+ data_it = []
54
+ else:
55
+ # Consider the actual linked data instance.
56
+ data_it = [linked_data]
57
+ else:
58
+ # Consider the actual rows of the related linked data.
59
+ data_it = self.__iter_rows(linked_data=linked_data, idle_mode=idle_mode)
60
+
61
+ for data in data_it:
62
+ yield linked_data.RelatedDocID, data
63
+
64
+ self.__rows_counter = None
@@ -0,0 +1,227 @@
1
+ from collections import OrderedDict
2
+
3
+ from arekit.common.data import const
4
+ from arekit.common.data.input.providers.instances.multiple import MultipleInstancesLinkedTextOpinionsProvider
5
+ from arekit.common.data.input.providers.instances.single import SingleInstanceLinkedDataProvider
6
+ from arekit.common.data.input.providers.label.base import LabelProvider
7
+ from arekit.common.data.input.providers.label.binary import BinaryLabelProvider
8
+ from arekit.common.data.input.providers.label.multiple import MultipleLabelProvider
9
+ from arekit.common.data.input.providers.rows.base import BaseRowProvider
10
+ from arekit.common.data.input.providers.text.single import BaseSingleTextProvider
11
+ from arekit.common.data.rows_fmt import create_base_column_fmt
12
+ from arekit.common.entities.base import Entity
13
+ from arekit.common.labels.base import Label
14
+
15
+ from arekit.common.linkage.text_opinions import TextOpinionsLinkage
16
+ from arekit.common.docs.parsed.base import ParsedDocument
17
+ from arekit.common.docs.parsed.providers.entity_service import EntityEndType, EntityServiceProvider
18
+ from arekit.common.docs.parsed.term_position import TermPositionTypes
19
+ from arekit.common.text_opinions.base import TextOpinion
20
+
21
+
22
+ # TODO. This is actually a text-opinion related sampler.
23
+ # TODO. Here we may expose all the text-opinion related params.
24
+ # TODO. With more generalized API in base class.
25
+ class BaseSampleRowProvider(BaseRowProvider):
26
+ """ Rows provider for samples storage.
27
+ """
28
+
29
+ def __init__(self, label_provider, text_provider):
30
+ assert(isinstance(label_provider, LabelProvider))
31
+ assert(isinstance(text_provider, BaseSingleTextProvider))
32
+ super(BaseSampleRowProvider, self).__init__()
33
+
34
+ self._label_provider = label_provider
35
+ self.__text_provider = text_provider
36
+ self.__instances_provider = self.__create_instances_provider(label_provider)
37
+ self.__store_labels = None
38
+ self._val_fmt = create_base_column_fmt(fmt_type="writer")
39
+
40
+ # region properties
41
+
42
+ @property
43
+ def LabelProvider(self):
44
+ return self._label_provider
45
+
46
+ @property
47
+ def TextProvider(self):
48
+ return self.__text_provider
49
+
50
+ # endregion
51
+
52
+ # region protected methods
53
+
54
+ def _provide_sentence_terms(self, parsed_doc, sentence_ind, s_ind, t_ind):
55
+ terms_iter = parsed_doc.iter_sentence_terms(sentence_index=sentence_ind, return_id=False)
56
+ return list(terms_iter), s_ind, t_ind
57
+
58
+ # TODO. This is a very task-specific description, too many data provided.
59
+ # TODO. Switch this API to dict of params
60
+ def _fill_row_core(self, row, text_opinion_linkage, index_in_linked, etalon_label,
61
+ parsed_doc, sentence_ind, s_ind, t_ind):
62
+ assert(isinstance(self.__store_labels, bool))
63
+
64
+ sentence_terms, actual_s_ind, actual_t_ind = self._provide_sentence_terms(
65
+ parsed_doc=parsed_doc, sentence_ind=sentence_ind, s_ind=s_ind, t_ind=t_ind)
66
+
67
+ # Entity indices from the related context.
68
+ entities = list(filter(lambda term: isinstance(term, Entity), sentence_terms))
69
+
70
+ # Values mapping.
71
+ vm = {
72
+ const.ID: self._count_row(),
73
+ const.OPINION_ID: text_opinion_linkage.First.TextOpinionID,
74
+ const.OPINION_LINKAGE_ID: index_in_linked,
75
+ const.DOC_ID: text_opinion_linkage.First.DocID,
76
+ const.SENT_IND: sentence_ind,
77
+ const.ENTITY_VALUES: entities,
78
+ const.ENTITY_TYPES: entities,
79
+ const.ENTITIES: [str(i) for i, t in enumerate(sentence_terms) if isinstance(t, Entity)],
80
+ const.S_IND: actual_s_ind,
81
+ const.T_IND: actual_t_ind,
82
+ const.LABEL_UINT: None,
83
+ const.LABEL_STR: None
84
+ }
85
+
86
+ # Compose text value.
87
+ def __assign_value(column, value):
88
+ vm[column] = value
89
+
90
+ expected_label = text_opinion_linkage.get_linked_label()
91
+
92
+ self.__text_provider.add_text_in_row(
93
+ set_text_func=__assign_value, sentence_terms=sentence_terms,
94
+ s_ind=actual_s_ind, t_ind=actual_t_ind,
95
+ expected_label=expected_label)
96
+
97
+ if self.__store_labels:
98
+ l2i = self._label_provider.LabelScaler.label_to_uint
99
+ ui2l = self._label_provider.LabelScaler.uint_to_label
100
+ uint_label = self._label_provider.calculate_output_uint_label(
101
+ expected_uint_label=l2i(expected_label), etalon_uint_label=l2i(etalon_label))
102
+ vm[const.LABEL_UINT] = uint_label
103
+ vm[const.LABEL_STR] = type(ui2l(uint_label)).__name__
104
+
105
+ self._apply_row_data(row=row, vm=vm, val_fmt=self._val_fmt)
106
+
107
+ @staticmethod
108
+ def _apply_row_data(row, vm, val_fmt):
109
+ for k, v in vm.items():
110
+ if v is None:
111
+ continue
112
+ row[k] = v if k not in val_fmt else val_fmt[k](v)
113
+
114
+ def _provide_rows(self, parsed_doc, entity_service, text_opinion_linkage, idle_mode):
115
+ assert(isinstance(idle_mode, bool))
116
+
117
+ row_dict = OrderedDict()
118
+
119
+ for index_in_linked in range(len(text_opinion_linkage)):
120
+
121
+ rows_it = self.__provide_rows(
122
+ parsed_doc=parsed_doc,
123
+ entity_service=entity_service,
124
+ row_dict=row_dict,
125
+ text_opinion_linkage=text_opinion_linkage,
126
+ index_in_linked=index_in_linked,
127
+ idle_mode=idle_mode)
128
+
129
+ for row in rows_it:
130
+ yield row
131
+
132
+ # endregion
133
+
134
+ # region private methods
135
+
136
+ @staticmethod
137
+ def __create_instances_provider(label_provider):
138
+ # TODO. #473 related: these label providers are based on text opinion extraction task!
139
+ if isinstance(label_provider, BinaryLabelProvider):
140
+ return MultipleInstancesLinkedTextOpinionsProvider(label_provider.SupportedLabels)
141
+ if isinstance(label_provider, MultipleLabelProvider):
142
+ return SingleInstanceLinkedDataProvider()
143
+
144
+ def __provide_rows(self, row_dict, parsed_doc, entity_service,
145
+ text_opinion_linkage, index_in_linked, idle_mode):
146
+ """
147
+ Providing Rows depending on row_id_formatter type
148
+ """
149
+ assert(isinstance(parsed_doc, ParsedDocument))
150
+ assert(isinstance(row_dict, OrderedDict))
151
+ assert(isinstance(text_opinion_linkage, TextOpinionsLinkage))
152
+
153
+ etalon_label = self.__instances_provider.provide_label(text_opinion_linkage)
154
+ for instance in self.__instances_provider.iter_instances(text_opinion_linkage):
155
+ yield self.__create_row(row=row_dict,
156
+ row_id=0,
157
+ parsed_doc=parsed_doc,
158
+ entity_service=entity_service,
159
+ text_opinions_linkage=instance,
160
+ index_in_linked=index_in_linked,
161
+ # TODO. provide uint_label
162
+ etalon_label=etalon_label,
163
+ idle_mode=idle_mode)
164
+
165
+ def __create_row(self, row, row_id, parsed_doc, entity_service, text_opinions_linkage,
166
+ index_in_linked, etalon_label, idle_mode):
167
+ """
168
+ Composing row in following format:
169
+ [id, label, type, text_a]
170
+
171
+ returns: OrderedDict
172
+ row with key values
173
+ """
174
+ assert(isinstance(row, OrderedDict))
175
+ assert(isinstance(text_opinions_linkage, TextOpinionsLinkage))
176
+ assert(isinstance(index_in_linked, int))
177
+ assert(isinstance(etalon_label, Label))
178
+ assert(isinstance(idle_mode, bool))
179
+
180
+ if idle_mode:
181
+ return None
182
+
183
+ text_opinion = text_opinions_linkage[index_in_linked]
184
+
185
+ s_ind, t_ind = self.__get_opinion_end_indices(entity_service, text_opinion)
186
+
187
+ row.clear()
188
+
189
+ source_s_ind = entity_service.extract_entity_position(
190
+ text_opinion=text_opinion, end_type=EntityEndType.Source,
191
+ position_type=TermPositionTypes.SentenceIndex)
192
+
193
+ target_s_ind = entity_service.extract_entity_position(
194
+ text_opinion=text_opinion, end_type=EntityEndType.Target,
195
+ position_type=TermPositionTypes.SentenceIndex)
196
+
197
+ if target_s_ind != source_s_ind:
198
+ raise Exception("Limitation: Multi-Sentence text_opinions are not supported.")
199
+
200
+ self._fill_row_core(row=row,
201
+ parsed_doc=parsed_doc,
202
+ sentence_ind=source_s_ind,
203
+ text_opinion_linkage=text_opinions_linkage,
204
+ index_in_linked=index_in_linked,
205
+ etalon_label=etalon_label,
206
+ s_ind=s_ind,
207
+ t_ind=t_ind)
208
+ return row
209
+
210
+ @staticmethod
211
+ def __get_opinion_end_indices(service, text_opinion):
212
+ assert(isinstance(service, EntityServiceProvider))
213
+ assert(isinstance(text_opinion, TextOpinion))
214
+
215
+ s_ind = service.get_entity_position(text_opinion.SourceId).get_index(
216
+ position_type=TermPositionTypes.IndexInSentence)
217
+
218
+ t_ind = service.get_entity_position(text_opinion.TargetId).get_index(
219
+ position_type=TermPositionTypes.IndexInSentence)
220
+
221
+ return s_ind, t_ind
222
+
223
+ # endregion
224
+
225
+ def set_store_labels(self, store_labels):
226
+ assert(isinstance(store_labels, bool))
227
+ self.__store_labels = store_labels
File without changes