arekit 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. arekit/__init__.py +0 -0
  2. arekit/common/__init__.py +0 -0
  3. arekit/common/bound.py +48 -0
  4. arekit/common/context/__init__.py +0 -0
  5. arekit/common/context/terms_mapper.py +51 -0
  6. arekit/common/context/token.py +16 -0
  7. arekit/common/data/__init__.py +0 -0
  8. arekit/common/data/const.py +21 -0
  9. arekit/common/data/doc_provider.py +6 -0
  10. arekit/common/data/input/__init__.py +0 -0
  11. arekit/common/data/input/providers/__init__.py +0 -0
  12. arekit/common/data/input/providers/columns/__init__.py +0 -0
  13. arekit/common/data/input/providers/columns/base.py +9 -0
  14. arekit/common/data/input/providers/columns/sample.py +59 -0
  15. arekit/common/data/input/providers/const.py +3 -0
  16. arekit/common/data/input/providers/contents.py +9 -0
  17. arekit/common/data/input/providers/instances/__init__.py +0 -0
  18. arekit/common/data/input/providers/instances/base.py +14 -0
  19. arekit/common/data/input/providers/instances/multiple.py +27 -0
  20. arekit/common/data/input/providers/instances/single.py +8 -0
  21. arekit/common/data/input/providers/label/__init__.py +0 -0
  22. arekit/common/data/input/providers/label/base.py +24 -0
  23. arekit/common/data/input/providers/label/binary.py +11 -0
  24. arekit/common/data/input/providers/label/multiple.py +15 -0
  25. arekit/common/data/input/providers/rows/__init__.py +0 -0
  26. arekit/common/data/input/providers/rows/base.py +64 -0
  27. arekit/common/data/input/providers/rows/samples.py +227 -0
  28. arekit/common/data/input/providers/sample/__init__.py +0 -0
  29. arekit/common/data/input/providers/sample/cropped.py +43 -0
  30. arekit/common/data/input/providers/text/__init__.py +0 -0
  31. arekit/common/data/input/providers/text/single.py +49 -0
  32. arekit/common/data/input/repositories/__init__.py +0 -0
  33. arekit/common/data/input/repositories/base.py +68 -0
  34. arekit/common/data/input/repositories/sample.py +22 -0
  35. arekit/common/data/input/sample.py +66 -0
  36. arekit/common/data/input/terms_mapper.py +88 -0
  37. arekit/common/data/rows_fmt.py +82 -0
  38. arekit/common/data/rows_parser.py +43 -0
  39. arekit/common/data/storages/__init__.py +0 -0
  40. arekit/common/data/storages/base.py +109 -0
  41. arekit/common/data/views/__init__.py +0 -0
  42. arekit/common/data/views/samples.py +26 -0
  43. arekit/common/docs/__init__.py +0 -0
  44. arekit/common/docs/base.py +30 -0
  45. arekit/common/docs/entities_grouping.py +16 -0
  46. arekit/common/docs/entity.py +18 -0
  47. arekit/common/docs/objects_parser.py +37 -0
  48. arekit/common/docs/parsed/__init__.py +0 -0
  49. arekit/common/docs/parsed/base.py +101 -0
  50. arekit/common/docs/parsed/providers/__init__.py +0 -0
  51. arekit/common/docs/parsed/providers/base.py +68 -0
  52. arekit/common/docs/parsed/providers/base_pairs.py +51 -0
  53. arekit/common/docs/parsed/providers/entity_service.py +175 -0
  54. arekit/common/docs/parsed/providers/opinion_pairs.py +20 -0
  55. arekit/common/docs/parsed/providers/text_opinion_pairs.py +78 -0
  56. arekit/common/docs/parsed/service.py +31 -0
  57. arekit/common/docs/parsed/term_position.py +42 -0
  58. arekit/common/docs/parser.py +34 -0
  59. arekit/common/docs/sentence.py +14 -0
  60. arekit/common/entities/__init__.py +0 -0
  61. arekit/common/entities/base.py +51 -0
  62. arekit/common/entities/collection.py +72 -0
  63. arekit/common/entities/str_fmt.py +8 -0
  64. arekit/common/entities/types.py +9 -0
  65. arekit/common/experiment/__init__.py +0 -0
  66. arekit/common/experiment/api/__init__.py +0 -0
  67. arekit/common/experiment/api/base_samples_io.py +20 -0
  68. arekit/common/experiment/data_type.py +17 -0
  69. arekit/common/frames/__init__.py +0 -0
  70. arekit/common/frames/connotations/__init__.py +0 -0
  71. arekit/common/frames/connotations/descriptor.py +17 -0
  72. arekit/common/frames/connotations/provider.py +4 -0
  73. arekit/common/frames/text_variant.py +43 -0
  74. arekit/common/frames/variants/__init__.py +0 -0
  75. arekit/common/frames/variants/base.py +21 -0
  76. arekit/common/frames/variants/collection.py +60 -0
  77. arekit/common/labels/__init__.py +0 -0
  78. arekit/common/labels/base.py +19 -0
  79. arekit/common/labels/provider/__init__.py +0 -0
  80. arekit/common/labels/provider/base.py +7 -0
  81. arekit/common/labels/provider/constant.py +14 -0
  82. arekit/common/labels/scaler/__init__.py +0 -0
  83. arekit/common/labels/scaler/base.py +85 -0
  84. arekit/common/labels/scaler/sentiment.py +7 -0
  85. arekit/common/labels/scaler/single.py +10 -0
  86. arekit/common/labels/str_fmt.py +55 -0
  87. arekit/common/linkage/__init__.py +0 -0
  88. arekit/common/linkage/base.py +44 -0
  89. arekit/common/linkage/meta.py +23 -0
  90. arekit/common/linkage/opinions.py +9 -0
  91. arekit/common/linkage/text_opinions.py +22 -0
  92. arekit/common/log_utils.py +29 -0
  93. arekit/common/model/__init__.py +0 -0
  94. arekit/common/model/labeling/__init__.py +0 -0
  95. arekit/common/model/labeling/base.py +24 -0
  96. arekit/common/model/labeling/modes.py +8 -0
  97. arekit/common/model/labeling/single.py +24 -0
  98. arekit/common/opinions/__init__.py +0 -0
  99. arekit/common/opinions/annot/__init__.py +0 -0
  100. arekit/common/opinions/annot/algo/__init__.py +0 -0
  101. arekit/common/opinions/annot/algo/base.py +4 -0
  102. arekit/common/opinions/annot/algo/pair_based.py +99 -0
  103. arekit/common/opinions/annot/algo/predefined.py +16 -0
  104. arekit/common/opinions/annot/algo_based.py +55 -0
  105. arekit/common/opinions/annot/base.py +15 -0
  106. arekit/common/opinions/base.py +74 -0
  107. arekit/common/opinions/collection.py +150 -0
  108. arekit/common/opinions/enums.py +6 -0
  109. arekit/common/opinions/provider.py +4 -0
  110. arekit/common/opinions/writer.py +4 -0
  111. arekit/common/pipeline/__init__.py +0 -0
  112. arekit/common/pipeline/base.py +25 -0
  113. arekit/common/pipeline/context.py +36 -0
  114. arekit/common/pipeline/conts.py +2 -0
  115. arekit/common/pipeline/items/__init__.py +0 -0
  116. arekit/common/pipeline/items/base.py +12 -0
  117. arekit/common/pipeline/items/flatten.py +14 -0
  118. arekit/common/pipeline/items/handle.py +17 -0
  119. arekit/common/pipeline/items/iter.py +11 -0
  120. arekit/common/pipeline/items/map.py +11 -0
  121. arekit/common/pipeline/items/map_nested.py +13 -0
  122. arekit/common/synonyms/__init__.py +0 -0
  123. arekit/common/synonyms/base.py +151 -0
  124. arekit/common/synonyms/grouping.py +21 -0
  125. arekit/common/text/__init__.py +0 -0
  126. arekit/common/text/enums.py +12 -0
  127. arekit/common/text/parsed.py +42 -0
  128. arekit/common/text/parser.py +12 -0
  129. arekit/common/text/partitioning/__init__.py +0 -0
  130. arekit/common/text/partitioning/base.py +4 -0
  131. arekit/common/text/partitioning/str.py +36 -0
  132. arekit/common/text/partitioning/terms.py +35 -0
  133. arekit/common/text/stemmer.py +16 -0
  134. arekit/common/text_opinions/__init__.py +0 -0
  135. arekit/common/text_opinions/base.py +105 -0
  136. arekit/common/utils.py +129 -0
  137. arekit/contrib/__init__.py +0 -0
  138. arekit/contrib/bert/__init__.py +0 -0
  139. arekit/contrib/bert/input/__init__.py +0 -0
  140. arekit/contrib/bert/input/providers/__init__.py +0 -0
  141. arekit/contrib/bert/input/providers/cropped_sample.py +17 -0
  142. arekit/contrib/bert/input/providers/text_pair.py +62 -0
  143. arekit/contrib/bert/terms/__init__.py +0 -0
  144. arekit/contrib/bert/terms/mapper.py +20 -0
  145. arekit/contrib/networks/__init__.py +0 -0
  146. arekit/contrib/networks/embedding.py +149 -0
  147. arekit/contrib/networks/embedding_io.py +18 -0
  148. arekit/contrib/networks/input/__init__.py +0 -0
  149. arekit/contrib/networks/input/const.py +6 -0
  150. arekit/contrib/networks/input/ctx_serialization.py +28 -0
  151. arekit/contrib/networks/input/embedding/__init__.py +0 -0
  152. arekit/contrib/networks/input/embedding/matrix.py +29 -0
  153. arekit/contrib/networks/input/embedding/offsets.py +55 -0
  154. arekit/contrib/networks/input/formatters/__init__.py +0 -0
  155. arekit/contrib/networks/input/formatters/pos_mapper.py +22 -0
  156. arekit/contrib/networks/input/providers/__init__.py +0 -0
  157. arekit/contrib/networks/input/providers/sample.py +129 -0
  158. arekit/contrib/networks/input/providers/term_connotation.py +23 -0
  159. arekit/contrib/networks/input/providers/text.py +24 -0
  160. arekit/contrib/networks/input/rows_parser.py +47 -0
  161. arekit/contrib/networks/input/term_types.py +13 -0
  162. arekit/contrib/networks/input/terms_mapping.py +60 -0
  163. arekit/contrib/networks/vectorizer.py +6 -0
  164. arekit/contrib/prompt/__init__.py +0 -0
  165. arekit/contrib/prompt/sample.py +61 -0
  166. arekit/contrib/source/__init__.py +0 -0
  167. arekit/contrib/source/brat/__init__.py +0 -0
  168. arekit/contrib/source/brat/annot.py +84 -0
  169. arekit/contrib/source/brat/doc.py +28 -0
  170. arekit/contrib/source/brat/entities/__init__.py +0 -0
  171. arekit/contrib/source/brat/entities/compound.py +13 -0
  172. arekit/contrib/source/brat/entities/entity.py +42 -0
  173. arekit/contrib/source/brat/entities/parser.py +53 -0
  174. arekit/contrib/source/brat/opinions/__init__.py +0 -0
  175. arekit/contrib/source/brat/opinions/converter.py +19 -0
  176. arekit/contrib/source/brat/relation.py +32 -0
  177. arekit/contrib/source/brat/sentence.py +69 -0
  178. arekit/contrib/source/brat/sentences_reader.py +128 -0
  179. arekit/contrib/source/download.py +41 -0
  180. arekit/contrib/source/nerel/__init__.py +0 -0
  181. arekit/contrib/source/nerel/entities.py +55 -0
  182. arekit/contrib/source/nerel/folding/__init__.py +0 -0
  183. arekit/contrib/source/nerel/folding/fixed.py +74 -0
  184. arekit/contrib/source/nerel/io_utils.py +62 -0
  185. arekit/contrib/source/nerel/labels.py +241 -0
  186. arekit/contrib/source/nerel/reader.py +46 -0
  187. arekit/contrib/source/nerel/utils.py +24 -0
  188. arekit/contrib/source/nerel/versions.py +12 -0
  189. arekit/contrib/source/nerelbio/__init__.py +0 -0
  190. arekit/contrib/source/nerelbio/io_utils.py +62 -0
  191. arekit/contrib/source/nerelbio/labels.py +265 -0
  192. arekit/contrib/source/nerelbio/reader.py +8 -0
  193. arekit/contrib/source/nerelbio/versions.py +8 -0
  194. arekit/contrib/source/ruattitudes/__init__.py +0 -0
  195. arekit/contrib/source/ruattitudes/collection.py +36 -0
  196. arekit/contrib/source/ruattitudes/doc.py +51 -0
  197. arekit/contrib/source/ruattitudes/doc_brat.py +44 -0
  198. arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
  199. arekit/contrib/source/ruattitudes/entity/parser.py +7 -0
  200. arekit/contrib/source/ruattitudes/io_utils.py +56 -0
  201. arekit/contrib/source/ruattitudes/labels_fmt.py +12 -0
  202. arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
  203. arekit/contrib/source/ruattitudes/opinions/base.py +28 -0
  204. arekit/contrib/source/ruattitudes/opinions/converter.py +37 -0
  205. arekit/contrib/source/ruattitudes/reader.py +268 -0
  206. arekit/contrib/source/ruattitudes/sentence.py +73 -0
  207. arekit/contrib/source/ruattitudes/synonyms.py +17 -0
  208. arekit/contrib/source/ruattitudes/text_object.py +59 -0
  209. arekit/contrib/source/rusentiframes/__init__.py +0 -0
  210. arekit/contrib/source/rusentiframes/collection.py +157 -0
  211. arekit/contrib/source/rusentiframes/effect.py +24 -0
  212. arekit/contrib/source/rusentiframes/io_utils.py +19 -0
  213. arekit/contrib/source/rusentiframes/labels_fmt.py +22 -0
  214. arekit/contrib/source/rusentiframes/polarity.py +35 -0
  215. arekit/contrib/source/rusentiframes/role.py +15 -0
  216. arekit/contrib/source/rusentiframes/state.py +24 -0
  217. arekit/contrib/source/rusentiframes/types.py +42 -0
  218. arekit/contrib/source/rusentiframes/value.py +2 -0
  219. arekit/contrib/source/rusentrel/__init__.py +0 -0
  220. arekit/contrib/source/rusentrel/const.py +3 -0
  221. arekit/contrib/source/rusentrel/docs_reader.py +51 -0
  222. arekit/contrib/source/rusentrel/entities.py +26 -0
  223. arekit/contrib/source/rusentrel/io_utils.py +125 -0
  224. arekit/contrib/source/rusentrel/labels_fmt.py +12 -0
  225. arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
  226. arekit/contrib/source/rusentrel/opinions/collection.py +30 -0
  227. arekit/contrib/source/rusentrel/opinions/converter.py +40 -0
  228. arekit/contrib/source/rusentrel/opinions/provider.py +54 -0
  229. arekit/contrib/source/rusentrel/opinions/writer.py +42 -0
  230. arekit/contrib/source/rusentrel/synonyms.py +17 -0
  231. arekit/contrib/source/sentinerel/__init__.py +0 -0
  232. arekit/contrib/source/sentinerel/entities.py +52 -0
  233. arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
  234. arekit/contrib/source/sentinerel/folding/factory.py +31 -0
  235. arekit/contrib/source/sentinerel/folding/fixed.py +70 -0
  236. arekit/contrib/source/sentinerel/io_utils.py +87 -0
  237. arekit/contrib/source/sentinerel/labels.py +53 -0
  238. arekit/contrib/source/sentinerel/labels_scaler.py +30 -0
  239. arekit/contrib/source/sentinerel/reader.py +42 -0
  240. arekit/contrib/source/synonyms/__init__.py +0 -0
  241. arekit/contrib/source/synonyms/utils.py +19 -0
  242. arekit/contrib/source/zip_utils.py +47 -0
  243. arekit/contrib/utils/__init__.py +0 -0
  244. arekit/contrib/utils/bert/__init__.py +0 -0
  245. arekit/contrib/utils/bert/samplers.py +17 -0
  246. arekit/contrib/utils/connotations/__init__.py +0 -0
  247. arekit/contrib/utils/connotations/rusentiframes_sentiment.py +23 -0
  248. arekit/contrib/utils/data/__init__.py +0 -0
  249. arekit/contrib/utils/data/contents/__init__.py +0 -0
  250. arekit/contrib/utils/data/contents/opinions.py +37 -0
  251. arekit/contrib/utils/data/doc_provider/__init__.py +0 -0
  252. arekit/contrib/utils/data/doc_provider/dict_based.py +13 -0
  253. arekit/contrib/utils/data/doc_provider/dir_based.py +53 -0
  254. arekit/contrib/utils/data/readers/__init__.py +0 -0
  255. arekit/contrib/utils/data/readers/base.py +7 -0
  256. arekit/contrib/utils/data/readers/csv_pd.py +38 -0
  257. arekit/contrib/utils/data/readers/jsonl.py +15 -0
  258. arekit/contrib/utils/data/service/__init__.py +0 -0
  259. arekit/contrib/utils/data/service/balance.py +50 -0
  260. arekit/contrib/utils/data/storages/__init__.py +0 -0
  261. arekit/contrib/utils/data/storages/jsonl_based.py +18 -0
  262. arekit/contrib/utils/data/storages/pandas_based.py +123 -0
  263. arekit/contrib/utils/data/storages/row_cache.py +48 -0
  264. arekit/contrib/utils/data/writers/__init__.py +0 -0
  265. arekit/contrib/utils/data/writers/base.py +27 -0
  266. arekit/contrib/utils/data/writers/csv_native.py +63 -0
  267. arekit/contrib/utils/data/writers/csv_pd.py +40 -0
  268. arekit/contrib/utils/data/writers/json_opennre.py +132 -0
  269. arekit/contrib/utils/data/writers/sqlite_native.py +110 -0
  270. arekit/contrib/utils/download.py +77 -0
  271. arekit/contrib/utils/embeddings/__init__.py +0 -0
  272. arekit/contrib/utils/embeddings/rusvectores.py +58 -0
  273. arekit/contrib/utils/embeddings/tokens.py +30 -0
  274. arekit/contrib/utils/entities/__init__.py +0 -0
  275. arekit/contrib/utils/entities/filter.py +7 -0
  276. arekit/contrib/utils/entities/formatters/__init__.py +0 -0
  277. arekit/contrib/utils/entities/formatters/str_display.py +11 -0
  278. arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py +15 -0
  279. arekit/contrib/utils/io_utils/__init__.py +0 -0
  280. arekit/contrib/utils/io_utils/embedding.py +72 -0
  281. arekit/contrib/utils/io_utils/opinions.py +37 -0
  282. arekit/contrib/utils/io_utils/samples.py +79 -0
  283. arekit/contrib/utils/io_utils/utils.py +39 -0
  284. arekit/contrib/utils/lexicons/__init__.py +0 -0
  285. arekit/contrib/utils/lexicons/lexicon.py +41 -0
  286. arekit/contrib/utils/lexicons/relation.py +42 -0
  287. arekit/contrib/utils/lexicons/rusentilex.py +37 -0
  288. arekit/contrib/utils/nn/__init__.py +0 -0
  289. arekit/contrib/utils/nn/rows.py +83 -0
  290. arekit/contrib/utils/np_utils/__init__.py +0 -0
  291. arekit/contrib/utils/np_utils/embedding.py +22 -0
  292. arekit/contrib/utils/np_utils/npz_utils.py +13 -0
  293. arekit/contrib/utils/np_utils/vocab.py +20 -0
  294. arekit/contrib/utils/pipelines/__init__.py +0 -0
  295. arekit/contrib/utils/pipelines/items/__init__.py +0 -0
  296. arekit/contrib/utils/pipelines/items/sampling/__init__.py +0 -0
  297. arekit/contrib/utils/pipelines/items/sampling/base.py +99 -0
  298. arekit/contrib/utils/pipelines/items/sampling/networks.py +54 -0
  299. arekit/contrib/utils/pipelines/items/text/__init__.py +0 -0
  300. arekit/contrib/utils/pipelines/items/text/entities_default.py +23 -0
  301. arekit/contrib/utils/pipelines/items/text/frames.py +86 -0
  302. arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +36 -0
  303. arekit/contrib/utils/pipelines/items/text/frames_negation.py +32 -0
  304. arekit/contrib/utils/pipelines/items/text/terms_splitter.py +10 -0
  305. arekit/contrib/utils/pipelines/items/text/tokenizer.py +107 -0
  306. arekit/contrib/utils/pipelines/items/text/translator.py +135 -0
  307. arekit/contrib/utils/pipelines/opinion_collections.py +85 -0
  308. arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
  309. arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
  310. arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py +27 -0
  311. arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +65 -0
  312. arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +60 -0
  313. arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
  314. arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py +29 -0
  315. arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +64 -0
  316. arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +79 -0
  317. arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
  318. arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py +56 -0
  319. arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +20 -0
  320. arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +65 -0
  321. arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
  322. arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py +21 -0
  323. arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +107 -0
  324. arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
  325. arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py +29 -0
  326. arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +62 -0
  327. arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +180 -0
  328. arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +50 -0
  329. arekit/contrib/utils/pipelines/text_opinion/__init__.py +0 -0
  330. arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py +0 -0
  331. arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +34 -0
  332. arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +88 -0
  333. arekit/contrib/utils/pipelines/text_opinion/extraction.py +93 -0
  334. arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py +0 -0
  335. arekit/contrib/utils/pipelines/text_opinion/filters/base.py +4 -0
  336. arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py +16 -0
  337. arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py +29 -0
  338. arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py +26 -0
  339. arekit/contrib/utils/processing/__init__.py +0 -0
  340. arekit/contrib/utils/processing/languages/__init__.py +0 -0
  341. arekit/contrib/utils/processing/languages/mods.py +12 -0
  342. arekit/contrib/utils/processing/languages/pos.py +23 -0
  343. arekit/contrib/utils/processing/languages/ru/__init__.py +0 -0
  344. arekit/contrib/utils/processing/languages/ru/cases.py +78 -0
  345. arekit/contrib/utils/processing/languages/ru/constants.py +6 -0
  346. arekit/contrib/utils/processing/languages/ru/mods.py +13 -0
  347. arekit/contrib/utils/processing/languages/ru/number.py +23 -0
  348. arekit/contrib/utils/processing/languages/ru/pos_service.py +36 -0
  349. arekit/contrib/utils/processing/lemmatization/__init__.py +0 -0
  350. arekit/contrib/utils/processing/lemmatization/mystem.py +51 -0
  351. arekit/contrib/utils/processing/pos/__init__.py +0 -0
  352. arekit/contrib/utils/processing/pos/base.py +12 -0
  353. arekit/contrib/utils/processing/pos/mystem_wrap.py +134 -0
  354. arekit/contrib/utils/processing/pos/russian.py +10 -0
  355. arekit/contrib/utils/processing/text/__init__.py +0 -0
  356. arekit/contrib/utils/processing/text/tokens.py +127 -0
  357. arekit/contrib/utils/resources.py +25 -0
  358. arekit/contrib/utils/serializer.py +43 -0
  359. arekit/contrib/utils/sources/__init__.py +0 -0
  360. arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
  361. arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
  362. arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +63 -0
  363. arekit/contrib/utils/synonyms/__init__.py +0 -0
  364. arekit/contrib/utils/synonyms/simple.py +15 -0
  365. arekit/contrib/utils/synonyms/stemmer_based.py +38 -0
  366. arekit/contrib/utils/vectorizers/__init__.py +0 -0
  367. arekit/contrib/utils/vectorizers/bpe.py +93 -0
  368. arekit/contrib/utils/vectorizers/random_norm.py +39 -0
  369. arekit/download_data.py +11 -0
  370. arekit-0.24.0.dist-info/LICENSE +21 -0
  371. arekit-0.24.0.dist-info/METADATA +23 -0
  372. arekit-0.24.0.dist-info/RECORD +374 -0
  373. arekit-0.24.0.dist-info/WHEEL +5 -0
  374. arekit-0.24.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,51 @@
1
+ class Entity(object):
2
+
3
+ def __init__(self, value, e_type, childs=None, display_value=None, group_index=None):
4
+ assert(isinstance(value, str) and len(value) > 0)
5
+ assert(isinstance(e_type, str) or e_type is None)
6
+ assert(isinstance(display_value, str) or display_value is None)
7
+ assert(isinstance(group_index, int) or group_index is None)
8
+ assert(isinstance(childs, list) or childs is None)
9
+ self.__value = value
10
+ self.__type = e_type
11
+ self.__display_value = display_value
12
+ self.__group_index = group_index
13
+ self.__childs = childs
14
+
15
+ @property
16
+ def GroupIndex(self):
17
+ return self.__group_index
18
+
19
+ @property
20
+ def Value(self):
21
+ return self.__value
22
+
23
+ @property
24
+ def DisplayValue(self):
25
+ """ Now, we consider the default value in case
26
+ of the undefined caption, and display_value otherwise.
27
+ """
28
+ return self.__value if self.__display_value is None else self.__display_value
29
+
30
+ @property
31
+ def Type(self):
32
+ return self.__type
33
+
34
+ def set_display_value(self, caption):
35
+ """ Caption allows to customize the original value.
36
+ Required for optional value modification.
37
+ """
38
+ assert(isinstance(caption, str))
39
+ self.__display_value = caption
40
+
41
+ def set_group_index(self, value):
42
+ assert(isinstance(value, int) and value >= -1)
43
+ assert(self.__group_index is None)
44
+ self.__group_index = value
45
+
46
+ def iter_childs(self):
47
+ if self.__childs is None:
48
+ return
49
+ yield
50
+ for child in self.__childs:
51
+ yield child
@@ -0,0 +1,72 @@
1
+ class EntityCollection(object):
2
+ """ Collection of annotated entities
3
+ """
4
+
5
+ class KeyType:
6
+ BY_SYNONYMS = 0
7
+ BY_VALUE = 1
8
+
9
+ def __init__(self, entities, value_to_group_id_func):
10
+ assert(isinstance(entities, list))
11
+ assert(callable(value_to_group_id_func))
12
+
13
+ self.__entities = entities
14
+ self.__value_to_group_id_func = value_to_group_id_func
15
+
16
+ self.__by_value = self.create_index(entities=entities,
17
+ key_func=lambda e: e.Value)
18
+
19
+ self.__by_synonyms = self.create_index(
20
+ entities=entities,
21
+ key_func=lambda e: value_to_group_id_func(e.Value))
22
+
23
+ @staticmethod
24
+ def __value_or_none(d, key):
25
+ return d[key] if key in d else None
26
+
27
+ # region protected methods
28
+
29
+ def _sort_entities(self, key):
30
+ assert(callable(key))
31
+ self.__entities.sort(key=key)
32
+
33
+ # endregion
34
+
35
+ # region public methods
36
+
37
+ @staticmethod
38
+ def create_index(entities, key_func):
39
+ index = {}
40
+ for e in entities:
41
+ key = key_func(e)
42
+ if key in index:
43
+ index[key].append(e)
44
+ else:
45
+ index[key] = [e]
46
+ return index
47
+
48
+ def get_entity_by_index(self, index):
49
+ assert(isinstance(index, int))
50
+ return self.__entities[index]
51
+
52
+ def try_get_entities(self, value, group_key):
53
+ assert(isinstance(value, str))
54
+
55
+ if group_key == self.KeyType.BY_SYNONYMS:
56
+ key = self.__value_to_group_id_func(value)
57
+ return self.__value_or_none(self.__by_synonyms, key)
58
+ if group_key == self.KeyType.BY_VALUE:
59
+ return self.__value_or_none(self.__by_value, value)
60
+
61
+ # endregion
62
+
63
+ # region base methods
64
+
65
+ def __len__(self):
66
+ return len(self.__entities)
67
+
68
+ def __iter__(self):
69
+ for entity in self.__entities:
70
+ yield entity
71
+
72
+ # endregion
@@ -0,0 +1,8 @@
1
+ from arekit.common.entities.types import OpinionEntityType
2
+
3
+
4
+ class StringEntitiesFormatter(object):
5
+
6
+ def to_string(self, original_value, entity_type):
7
+ assert(isinstance(entity_type, OpinionEntityType))
8
+ raise NotImplementedError()
@@ -0,0 +1,9 @@
1
+ from enum import Enum
2
+
3
+
4
+ class OpinionEntityType(Enum):
5
+ Object = 1
6
+ Subject = 2
7
+ SynonymSubject = 3
8
+ SynonymObject = 4
9
+ Other = 5
File without changes
File without changes
@@ -0,0 +1,20 @@
1
+ class BaseSamplesIO(object):
2
+ """ Represents base experiment utils for input/output for:
3
+ samples -- data that utilized for experiments;
4
+ results -- evaluation of experiments.
5
+ """
6
+
7
+ @property
8
+ def Reader(self):
9
+ raise NotImplementedError()
10
+
11
+ @property
12
+ def Writer(self):
13
+ """ For serialization
14
+ """
15
+ raise NotImplementedError()
16
+
17
+ def create_target(self, data_type):
18
+ """ Path for reaiding/viewing
19
+ """
20
+ raise NotImplementedError()
@@ -0,0 +1,17 @@
1
+ from enum import Enum
2
+
3
+
4
+ class DataType(Enum):
5
+ """
6
+ Describes collection types that supportes in
7
+ current implementation, and provides by collections.
8
+ """
9
+
10
+ Train = 1
11
+
12
+ Test = 2
13
+
14
+ Dev = 3
15
+
16
+ Etalon = 4
17
+
File without changes
File without changes
@@ -0,0 +1,17 @@
1
+ class FrameConnotationDescriptor(object):
2
+ """
3
+ Polarity description between source (Agent) towards dest (Theme)
4
+ The latter is related to the roles of frame polarity.
5
+ """
6
+
7
+ @property
8
+ def Source(self):
9
+ raise NotImplementedError()
10
+
11
+ @property
12
+ def Destination(self):
13
+ raise NotImplementedError()
14
+
15
+ @property
16
+ def Label(self):
17
+ raise NotImplementedError()
@@ -0,0 +1,4 @@
1
+ class FrameConnotationProvider(object):
2
+
3
+ def try_provide(self, frame_id):
4
+ raise NotImplementedError()
@@ -0,0 +1,43 @@
1
+ from arekit.common.frames.variants.base import FrameVariant
2
+
3
+
4
+ class TextFrameVariant(object):
5
+ """
6
+ FrameVariant in a text, i.e. related object provides position in text.
7
+ """
8
+
9
+ def __init__(self, variant):
10
+ assert(isinstance(variant, FrameVariant))
11
+ self.__variant = variant
12
+ self.__is_negated = False
13
+
14
+ # region properties
15
+
16
+ @property
17
+ def Variant(self):
18
+ return self.__variant
19
+
20
+ @property
21
+ def IsNegated(self):
22
+ return self.__is_negated
23
+
24
+ # endregion
25
+
26
+ # region public methods
27
+
28
+ def iter_terms(self):
29
+ for term in self.__variant.iter_terms():
30
+ yield term
31
+
32
+ def set_is_negated(self, value):
33
+ assert(isinstance(value, bool))
34
+ self.__is_negated = value
35
+
36
+ # endregion
37
+
38
+ # region overriden
39
+
40
+ def __len__(self):
41
+ return len(self.__variant)
42
+
43
+ # endregion
File without changes
@@ -0,0 +1,21 @@
1
+ class FrameVariant(object):
2
+
3
+ def __init__(self, terms, frame_id):
4
+ assert(isinstance(terms, list))
5
+ assert(isinstance(frame_id, str))
6
+ self.__terms = terms
7
+ self.__frame_id = frame_id
8
+
9
+ @property
10
+ def FrameID(self):
11
+ return self.__frame_id
12
+
13
+ def get_value(self):
14
+ return " ".join(self.__terms)
15
+
16
+ def iter_terms(self):
17
+ for term in self.__terms:
18
+ yield term
19
+
20
+ def __len__(self):
21
+ return len(self.__terms)
@@ -0,0 +1,60 @@
1
+ from collections.abc import Iterable
2
+ from arekit.common.frames.variants.base import FrameVariant
3
+
4
+
5
+ class FrameVariantsCollection(object):
6
+
7
+ def __init__(self):
8
+ self.__variants = {}
9
+ self.__frames_list = []
10
+
11
+ # region private methods
12
+
13
+ @staticmethod
14
+ def __register_frame(frames_dict, frames_list, id):
15
+ assert(isinstance(id, str))
16
+ if id not in frames_dict:
17
+ frames_dict[id] = len(frames_list)
18
+ frames_list.append(id)
19
+ return frames_dict[id]
20
+
21
+ # endregion
22
+
23
+ # region public methods
24
+
25
+ def fill_from_iterable(self, variants_with_id, overwrite_existed_variant, raise_error_on_existed_variant):
26
+ assert(isinstance(variants_with_id, Iterable))
27
+ assert(isinstance(overwrite_existed_variant, bool))
28
+ assert(isinstance(raise_error_on_existed_variant, bool))
29
+ assert(len(self.__variants) == 0)
30
+ assert(len(self.__frames_list) == 0)
31
+
32
+ frames_dict = {}
33
+ for frame_id, variant in variants_with_id:
34
+ self.__register_frame(frames_dict, self.__frames_list, frame_id)
35
+
36
+ if variant in self.__variants:
37
+ if raise_error_on_existed_variant:
38
+ raise Exception("Variant '{variant}' already registered".format(variant=variant))
39
+ if not overwrite_existed_variant:
40
+ continue
41
+
42
+ self.__variants[variant] = FrameVariant(terms=variant.split(), frame_id=frame_id)
43
+
44
+ def get_frame_by_index(self, index):
45
+ return self.__frames_list[index]
46
+
47
+ def get_variant_by_value(self, value):
48
+ return self.__variants[value] if value in self.__variants else None
49
+
50
+ def has_variant(self, value):
51
+ return value in self.__variants
52
+
53
+ def iter_variants(self):
54
+ for value, variant in self.__variants.items():
55
+ yield value, variant
56
+
57
+ # endregion
58
+
59
+ def __len__(self):
60
+ return len(self.__variants)
File without changes
@@ -0,0 +1,19 @@
1
+ class Label(object):
2
+
3
+ def __eq__(self, other):
4
+ assert(isinstance(other, Label))
5
+ return type(self) == type(other)
6
+
7
+ def __ne__(self, other):
8
+ assert(isinstance(other, Label))
9
+ return type(self) != type(other)
10
+
11
+ def __hash__(self):
12
+ return hash(self.to_class_str())
13
+
14
+ def to_class_str(self):
15
+ return self.__class__.__name__
16
+
17
+
18
+ class NoLabel(Label):
19
+ pass
File without changes
@@ -0,0 +1,7 @@
1
+ class BasePairLabelProvider(object):
2
+
3
+ def __init__(self):
4
+ pass
5
+
6
+ def provide(self, source, target):
7
+ raise NotImplementedError()
@@ -0,0 +1,14 @@
1
+ from arekit.common.labels.provider.base import BasePairLabelProvider
2
+
3
+
4
+ class ConstantLabelProvider(BasePairLabelProvider):
5
+ """ Providing a predefined label instance irrespective
6
+ from the source and target entity parameters.
7
+ """
8
+
9
+ def __init__(self, label_instance):
10
+ super(ConstantLabelProvider, self).__init__()
11
+ self.__label_instance = label_instance
12
+
13
+ def provide(self, source, target):
14
+ return self.__label_instance
File without changes
@@ -0,0 +1,85 @@
1
+ from collections import OrderedDict
2
+
3
+ from arekit.common.labels.base import NoLabel, Label
4
+
5
+
6
+ class BaseLabelScaler(object):
7
+ """ NOTE:
8
+ Scaler -- set up conversion from int/uint to label and vice versa.
9
+ """
10
+
11
+ def __init__(self, uint_dict, int_dict):
12
+ assert(isinstance(uint_dict, OrderedDict))
13
+ assert(isinstance(int_dict, OrderedDict))
14
+ assert(len(uint_dict) == len(int_dict))
15
+
16
+ self.__uint_dict = uint_dict
17
+ self.__int_dict = int_dict
18
+
19
+ self.__ordered_labels = list(uint_dict.keys())
20
+ self.__no_label_instance = self.__find_no_label_instance(iter(uint_dict.keys()))
21
+
22
+ @property
23
+ def LabelsCount(self):
24
+ return len(self.__uint_dict)
25
+
26
+ def ordered_suppoted_labels(self):
27
+ return self.__ordered_labels
28
+
29
+ def get_no_label_instance(self):
30
+ if self.__no_label_instance is None:
31
+ raise Exception("NoLabel does no supported by this scaler")
32
+
33
+ return self.__no_label_instance
34
+
35
+ # region private methods
36
+
37
+ @staticmethod
38
+ def __find_no_label_instance(labels_it):
39
+ for label in labels_it:
40
+ if isinstance(label, NoLabel):
41
+ return label
42
+ return None
43
+
44
+ @staticmethod
45
+ def __ltoi(label, d):
46
+ assert(isinstance(label, Label))
47
+ assert(isinstance(d, OrderedDict))
48
+ return d[label]
49
+
50
+ @staticmethod
51
+ def __itol(value, d):
52
+ assert(isinstance(value, int))
53
+ assert(isinstance(d, OrderedDict))
54
+ for label, v in d.items():
55
+ if v == value:
56
+ return label
57
+
58
+ @staticmethod
59
+ def __has_value(value, d):
60
+ assert(isinstance(value, int))
61
+ assert(isinstance(d, OrderedDict))
62
+ for label, v in d.items():
63
+ if v == value:
64
+ return True
65
+ return False
66
+
67
+ # endregion
68
+
69
+ def classes_count(self):
70
+ return len(self.__uint_dict)
71
+
72
+ def label_to_uint(self, label):
73
+ return self.__ltoi(label, self.__uint_dict)
74
+
75
+ def label_to_int(self, label):
76
+ return self.__ltoi(label, self.__int_dict)
77
+
78
+ def uint_to_label(self, value):
79
+ return self.__itol(value, self.__uint_dict)
80
+
81
+ def int_to_label(self, value):
82
+ return self.__itol(value, self.__int_dict)
83
+
84
+ def support_int_value(self, value):
85
+ return self.__has_value(value, self.__int_dict)
@@ -0,0 +1,7 @@
1
+ from arekit.common.labels.scaler.base import BaseLabelScaler
2
+
3
+
4
+ class SentimentLabelScaler(BaseLabelScaler):
5
+
6
+ def invert_label(self, label):
7
+ raise NotImplementedError()
@@ -0,0 +1,10 @@
1
+ from collections import OrderedDict
2
+
3
+ from arekit.common.labels.scaler.base import BaseLabelScaler
4
+
5
+
6
+ class SingleLabelScaler(BaseLabelScaler):
7
+
8
+ def __init__(self, label, uint_label=0):
9
+ d = OrderedDict([(label, uint_label)])
10
+ super(SingleLabelScaler, self).__init__(uint_dict=d, int_dict=d)
@@ -0,0 +1,55 @@
1
+ from arekit.common.labels.base import Label
2
+
3
+
4
+ # TODO. This should be moded into formatter.py
5
+ class StringLabelsFormatter(object):
6
+ """ NOTE:
7
+ Set up convertion from string into label instance.
8
+ """
9
+
10
+ def __init__(self, stol):
11
+ """ stol: string to label dictionary
12
+ dictionary: string -> label_type
13
+ """
14
+ assert(isinstance(stol, dict))
15
+
16
+ for key, value in stol.items():
17
+ # Perfom parameters check.
18
+ assert(isinstance(key, str))
19
+ assert(issubclass(value, Label))
20
+
21
+ self._stol = stol
22
+ self.__supported_label_types = set(self._stol.values())
23
+
24
+ def __is_label_type_supported(self, label):
25
+ return label in self.__supported_label_types
26
+
27
+ def str_to_label(self, value):
28
+ assert(isinstance(value, str))
29
+
30
+ if not value in self._stol:
31
+ raise Exception("Label value `{}` is not supported.".format(value))
32
+
33
+ label_type = self._stol[value]
34
+ return label_type()
35
+
36
+ def label_to_str(self, label):
37
+ assert(isinstance(label, Label))
38
+
39
+ label_type = type(label)
40
+
41
+ if not self.__is_label_type_supported(label_type):
42
+ raise Exception("Label type {label} is not supported. Supported labels: [{values}]".format(
43
+ label=label_type, values=self.__supported_label_types))
44
+
45
+ for value, supported_label_type in self._stol.items():
46
+ if supported_label_type == label_type:
47
+ return value
48
+
49
+ def supports_label(self, label):
50
+ return type(label) in self.__supported_label_types
51
+
52
+ def supports_value(self, value):
53
+ assert(isinstance(value, str))
54
+ return value in self._stol
55
+
File without changes
@@ -0,0 +1,44 @@
1
+ from collections.abc import Iterable
2
+
3
+
4
+ class LinkedDataWrapper(object):
5
+
6
+ def __init__(self, linked_data):
7
+ assert(isinstance(linked_data, Iterable))
8
+ self.__linked_data = list(linked_data)
9
+ self.__tag = None
10
+
11
+ @property
12
+ def RelatedDocID(self):
13
+ """ Linked data is limited to the particular document.
14
+ """
15
+ raise NotImplementedError()
16
+
17
+ @property
18
+ def First(self):
19
+ return self[0]
20
+
21
+ @property
22
+ def Tag(self):
23
+ return self.__tag
24
+
25
+ def set_tag(self, value):
26
+ self.__tag = value
27
+
28
+ def _get_data_label(self, item):
29
+ raise NotImplementedError()
30
+
31
+ def iter_labels(self):
32
+ for item in self.__linked_data:
33
+ yield self._get_data_label(item)
34
+
35
+ def __getitem__(self, item):
36
+ assert(isinstance(item, int))
37
+ return self.__linked_data[item]
38
+
39
+ def __iter__(self):
40
+ for data in self.__linked_data:
41
+ yield data
42
+
43
+ def __len__(self):
44
+ return len(self.__linked_data)
@@ -0,0 +1,23 @@
1
+ from arekit.common.linkage.base import LinkedDataWrapper
2
+
3
+
4
+ class MetaEmptyLinkedDataWrapper(LinkedDataWrapper):
5
+ """ This is a placeholder data-wrapper utilized for passing system information
6
+ while iterating through the data pipelines.
7
+ """
8
+
9
+ def __init__(self, doc_id, meta_data=None):
10
+ """ meta_data:
11
+ optional parameter which serves any information need in further.
12
+ """
13
+ super(MetaEmptyLinkedDataWrapper, self).__init__([])
14
+ self.__doc_id = doc_id
15
+ self.__meta_data = meta_data
16
+
17
+ @property
18
+ def RelatedDocID(self):
19
+ return self.__doc_id
20
+
21
+ @property
22
+ def MetaData(self):
23
+ return self.__meta_data
@@ -0,0 +1,9 @@
1
+ from arekit.common.linkage.base import LinkedDataWrapper
2
+ from arekit.common.opinions.base import Opinion
3
+
4
+
5
+ class OpinionsLinkage(LinkedDataWrapper):
6
+
7
+ def _get_data_label(self, item):
8
+ assert(isinstance(item, Opinion))
9
+ return item.Label
@@ -0,0 +1,22 @@
1
+ from arekit.common.linkage.base import LinkedDataWrapper
2
+ from arekit.common.text_opinions.base import TextOpinion
3
+
4
+
5
+ class TextOpinionsLinkage(LinkedDataWrapper):
6
+
7
+ @property
8
+ def First(self):
9
+ first = super(TextOpinionsLinkage, self).First
10
+ assert(isinstance(first, TextOpinion))
11
+ return first
12
+
13
+ @property
14
+ def RelatedDocID(self):
15
+ return self.First.DocID
16
+
17
+ def get_linked_label(self):
18
+ return self.First.Label
19
+
20
+ def _get_data_label(self, item):
21
+ assert(isinstance(item, TextOpinion))
22
+ return item.Label