arekit 0.23.1__tar.gz → 0.25.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. arekit-0.25.0/PKG-INFO +82 -0
  2. arekit-0.25.0/README.md +60 -0
  3. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/context/terms_mapper.py +2 -2
  4. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/const.py +5 -4
  5. arekit-0.23.1/arekit/common/experiment/api/ops_doc.py → arekit-0.25.0/arekit/common/data/doc_provider.py +1 -1
  6. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/columns/sample.py +6 -1
  7. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/instances/base.py +1 -1
  8. arekit-0.25.0/arekit/common/data/input/providers/rows/base.py +64 -0
  9. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/rows/samples.py +57 -55
  10. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/sample/cropped.py +2 -2
  11. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/sample.py +1 -1
  12. arekit-0.25.0/arekit/common/data/rows_fmt.py +82 -0
  13. arekit-0.25.0/arekit/common/data/rows_parser.py +43 -0
  14. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/storages/base.py +23 -18
  15. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/views/samples.py +2 -8
  16. {arekit-0.23.1/arekit/common/news → arekit-0.25.0/arekit/common/docs}/base.py +2 -2
  17. {arekit-0.23.1/arekit/common/news → arekit-0.25.0/arekit/common/docs}/entities_grouping.py +2 -1
  18. {arekit-0.23.1/arekit/common/news → arekit-0.25.0/arekit/common/docs}/entity.py +2 -1
  19. {arekit-0.23.1/arekit/common/news → arekit-0.25.0/arekit/common/docs}/parsed/base.py +5 -5
  20. arekit-0.25.0/arekit/common/docs/parsed/providers/base.py +68 -0
  21. {arekit-0.23.1/arekit/common/news → arekit-0.25.0/arekit/common/docs}/parsed/providers/base_pairs.py +2 -2
  22. {arekit-0.23.1/arekit/common/news → arekit-0.25.0/arekit/common/docs}/parsed/providers/entity_service.py +27 -22
  23. {arekit-0.23.1/arekit/common/news → arekit-0.25.0/arekit/common/docs}/parsed/providers/opinion_pairs.py +2 -2
  24. {arekit-0.23.1/arekit/common/news → arekit-0.25.0/arekit/common/docs}/parsed/providers/text_opinion_pairs.py +6 -6
  25. arekit-0.25.0/arekit/common/docs/parsed/service.py +31 -0
  26. arekit-0.25.0/arekit/common/docs/parser.py +66 -0
  27. {arekit-0.23.1/arekit/common/news → arekit-0.25.0/arekit/common/docs}/sentence.py +1 -1
  28. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/entities/base.py +11 -2
  29. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/experiment/api/base_samples_io.py +1 -1
  30. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/frames/variants/collection.py +2 -2
  31. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/linkage/base.py +2 -2
  32. arekit-0.25.0/arekit/common/linkage/meta.py +23 -0
  33. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/linkage/opinions.py +1 -1
  34. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/linkage/text_opinions.py +2 -2
  35. arekit-0.25.0/arekit/common/opinions/annot/algo/base.py +4 -0
  36. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/opinions/annot/algo/pair_based.py +15 -13
  37. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/opinions/annot/algo/predefined.py +4 -4
  38. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/opinions/annot/algo_based.py +5 -5
  39. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/opinions/annot/base.py +3 -3
  40. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/opinions/base.py +7 -7
  41. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/opinions/collection.py +3 -3
  42. arekit-0.25.0/arekit/common/pipeline/base.py +21 -0
  43. arekit-0.25.0/arekit/common/pipeline/batching.py +28 -0
  44. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/pipeline/context.py +5 -1
  45. arekit-0.25.0/arekit/common/pipeline/items/base.py +49 -0
  46. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/pipeline/items/flatten.py +5 -1
  47. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/pipeline/items/handle.py +2 -1
  48. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/pipeline/items/iter.py +2 -1
  49. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/pipeline/items/map.py +2 -1
  50. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/pipeline/items/map_nested.py +4 -0
  51. arekit-0.25.0/arekit/common/pipeline/utils.py +32 -0
  52. arekit-0.25.0/arekit/common/service/sqlite.py +36 -0
  53. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/synonyms/base.py +2 -2
  54. arekit-0.23.1/arekit/common/text/partitioning/str.py → arekit-0.25.0/arekit/common/text/partitioning.py +16 -11
  55. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/text_opinions/base.py +11 -11
  56. arekit-0.25.0/arekit/common/utils.py +85 -0
  57. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/networks/embedding.py +3 -3
  58. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/networks/embedding_io.py +5 -5
  59. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/networks/input/const.py +0 -2
  60. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/networks/input/providers/sample.py +15 -29
  61. arekit-0.25.0/arekit/contrib/networks/input/rows_parser.py +47 -0
  62. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/prompt/sample.py +18 -16
  63. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/data/contents/opinions.py +17 -5
  64. arekit-0.25.0/arekit/contrib/utils/data/doc_provider/dict_based.py +13 -0
  65. {arekit-0.23.1/arekit/contrib/utils/data/doc_ops → arekit-0.25.0/arekit/contrib/utils/data/doc_provider}/dir_based.py +7 -7
  66. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/data/readers/base.py +3 -0
  67. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/data/readers/csv_pd.py +10 -4
  68. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/data/readers/jsonl.py +3 -0
  69. arekit-0.25.0/arekit/contrib/utils/data/readers/sqlite.py +14 -0
  70. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/data/service/balance.py +0 -1
  71. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/data/storages/pandas_based.py +3 -5
  72. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/data/storages/row_cache.py +18 -6
  73. arekit-0.25.0/arekit/contrib/utils/data/storages/sqlite_based.py +17 -0
  74. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/data/writers/base.py +5 -0
  75. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/data/writers/csv_native.py +3 -0
  76. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/data/writers/csv_pd.py +3 -0
  77. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/data/writers/json_opennre.py +31 -13
  78. arekit-0.25.0/arekit/contrib/utils/data/writers/sqlite_native.py +114 -0
  79. arekit-0.25.0/arekit/contrib/utils/io_utils/embedding.py +72 -0
  80. arekit-0.25.0/arekit/contrib/utils/io_utils/utils.py +22 -0
  81. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/pipelines/items/sampling/base.py +31 -26
  82. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/pipelines/items/sampling/networks.py +7 -10
  83. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/pipelines/items/text/entities_default.py +2 -2
  84. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/pipelines/items/text/frames.py +2 -3
  85. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +3 -3
  86. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/pipelines/items/text/frames_negation.py +2 -1
  87. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/pipelines/items/text/tokenizer.py +3 -5
  88. arekit-0.25.0/arekit/contrib/utils/pipelines/items/text/translator.py +136 -0
  89. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/pipelines/opinion_collections.py +5 -5
  90. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +7 -7
  91. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/pipelines/text_opinion/extraction.py +34 -22
  92. arekit-0.25.0/arekit/contrib/utils/pipelines/text_opinion/filters/base.py +4 -0
  93. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py +1 -1
  94. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py +3 -3
  95. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py +4 -4
  96. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/serializer.py +4 -23
  97. arekit-0.25.0/arekit.egg-info/PKG-INFO +82 -0
  98. {arekit-0.23.1 → arekit-0.25.0}/arekit.egg-info/SOURCES.txt +33 -177
  99. arekit-0.25.0/logo.png +0 -0
  100. {arekit-0.23.1 → arekit-0.25.0}/setup.py +9 -4
  101. arekit-0.23.1/PKG-INFO +0 -19
  102. arekit-0.23.1/README.md +0 -77
  103. arekit-0.23.1/arekit/common/data/input/providers/rows/base.py +0 -41
  104. arekit-0.23.1/arekit/common/data/row_ids/base.py +0 -79
  105. arekit-0.23.1/arekit/common/data/row_ids/binary.py +0 -38
  106. arekit-0.23.1/arekit/common/data/row_ids/multiple.py +0 -14
  107. arekit-0.23.1/arekit/common/folding/base.py +0 -36
  108. arekit-0.23.1/arekit/common/folding/fixed.py +0 -42
  109. arekit-0.23.1/arekit/common/folding/nofold.py +0 -15
  110. arekit-0.23.1/arekit/common/folding/united.py +0 -46
  111. arekit-0.23.1/arekit/common/news/objects_parser.py +0 -37
  112. arekit-0.23.1/arekit/common/news/parsed/providers/base.py +0 -48
  113. arekit-0.23.1/arekit/common/news/parsed/service.py +0 -31
  114. arekit-0.23.1/arekit/common/news/parser.py +0 -34
  115. arekit-0.23.1/arekit/common/opinions/annot/algo/base.py +0 -4
  116. arekit-0.23.1/arekit/common/pipeline/base.py +0 -25
  117. arekit-0.23.1/arekit/common/pipeline/items/base.py +0 -12
  118. arekit-0.23.1/arekit/common/text/parser.py +0 -12
  119. arekit-0.23.1/arekit/common/text/partitioning/base.py +0 -4
  120. arekit-0.23.1/arekit/common/text/partitioning/terms.py +0 -35
  121. arekit-0.23.1/arekit/common/utils.py +0 -98
  122. arekit-0.23.1/arekit/contrib/networks/input/rows_parser.py +0 -134
  123. arekit-0.23.1/arekit/contrib/source/brat/annot.py +0 -83
  124. arekit-0.23.1/arekit/contrib/source/brat/entities/compound.py +0 -33
  125. arekit-0.23.1/arekit/contrib/source/brat/entities/entity.py +0 -42
  126. arekit-0.23.1/arekit/contrib/source/brat/entities/parser.py +0 -53
  127. arekit-0.23.1/arekit/contrib/source/brat/news.py +0 -28
  128. arekit-0.23.1/arekit/contrib/source/brat/opinions/converter.py +0 -19
  129. arekit-0.23.1/arekit/contrib/source/brat/relation.py +0 -32
  130. arekit-0.23.1/arekit/contrib/source/brat/sentence.py +0 -69
  131. arekit-0.23.1/arekit/contrib/source/brat/sentences_reader.py +0 -128
  132. arekit-0.23.1/arekit/contrib/source/download.py +0 -41
  133. arekit-0.23.1/arekit/contrib/source/nerel/entities.py +0 -55
  134. arekit-0.23.1/arekit/contrib/source/nerel/folding/fixed.py +0 -75
  135. arekit-0.23.1/arekit/contrib/source/nerel/io_utils.py +0 -62
  136. arekit-0.23.1/arekit/contrib/source/nerel/labels.py +0 -241
  137. arekit-0.23.1/arekit/contrib/source/nerel/reader.py +0 -46
  138. arekit-0.23.1/arekit/contrib/source/nerel/utils.py +0 -24
  139. arekit-0.23.1/arekit/contrib/source/nerel/versions.py +0 -12
  140. arekit-0.23.1/arekit/contrib/source/nerelbio/io_utils.py +0 -62
  141. arekit-0.23.1/arekit/contrib/source/nerelbio/labels.py +0 -265
  142. arekit-0.23.1/arekit/contrib/source/nerelbio/reader.py +0 -8
  143. arekit-0.23.1/arekit/contrib/source/nerelbio/versions.py +0 -8
  144. arekit-0.23.1/arekit/contrib/source/ruattitudes/collection.py +0 -36
  145. arekit-0.23.1/arekit/contrib/source/ruattitudes/entity/parser.py +0 -7
  146. arekit-0.23.1/arekit/contrib/source/ruattitudes/io_utils.py +0 -56
  147. arekit-0.23.1/arekit/contrib/source/ruattitudes/labels_fmt.py +0 -12
  148. arekit-0.23.1/arekit/contrib/source/ruattitudes/news.py +0 -51
  149. arekit-0.23.1/arekit/contrib/source/ruattitudes/news_brat.py +0 -44
  150. arekit-0.23.1/arekit/contrib/source/ruattitudes/opinions/base.py +0 -28
  151. arekit-0.23.1/arekit/contrib/source/ruattitudes/opinions/converter.py +0 -37
  152. arekit-0.23.1/arekit/contrib/source/ruattitudes/reader.py +0 -268
  153. arekit-0.23.1/arekit/contrib/source/ruattitudes/sentence.py +0 -73
  154. arekit-0.23.1/arekit/contrib/source/ruattitudes/synonyms.py +0 -17
  155. arekit-0.23.1/arekit/contrib/source/ruattitudes/text_object.py +0 -57
  156. arekit-0.23.1/arekit/contrib/source/rusentiframes/collection.py +0 -157
  157. arekit-0.23.1/arekit/contrib/source/rusentiframes/effect.py +0 -24
  158. arekit-0.23.1/arekit/contrib/source/rusentiframes/io_utils.py +0 -19
  159. arekit-0.23.1/arekit/contrib/source/rusentiframes/labels_fmt.py +0 -22
  160. arekit-0.23.1/arekit/contrib/source/rusentiframes/polarity.py +0 -35
  161. arekit-0.23.1/arekit/contrib/source/rusentiframes/role.py +0 -15
  162. arekit-0.23.1/arekit/contrib/source/rusentiframes/state.py +0 -24
  163. arekit-0.23.1/arekit/contrib/source/rusentiframes/types.py +0 -42
  164. arekit-0.23.1/arekit/contrib/source/rusentiframes/value.py +0 -2
  165. arekit-0.23.1/arekit/contrib/source/rusentrel/const.py +0 -3
  166. arekit-0.23.1/arekit/contrib/source/rusentrel/entities.py +0 -26
  167. arekit-0.23.1/arekit/contrib/source/rusentrel/io_utils.py +0 -125
  168. arekit-0.23.1/arekit/contrib/source/rusentrel/labels_fmt.py +0 -12
  169. arekit-0.23.1/arekit/contrib/source/rusentrel/news_reader.py +0 -51
  170. arekit-0.23.1/arekit/contrib/source/rusentrel/opinions/collection.py +0 -30
  171. arekit-0.23.1/arekit/contrib/source/rusentrel/opinions/converter.py +0 -40
  172. arekit-0.23.1/arekit/contrib/source/rusentrel/opinions/provider.py +0 -54
  173. arekit-0.23.1/arekit/contrib/source/rusentrel/opinions/writer.py +0 -42
  174. arekit-0.23.1/arekit/contrib/source/rusentrel/synonyms.py +0 -17
  175. arekit-0.23.1/arekit/contrib/source/sentinerel/entities.py +0 -52
  176. arekit-0.23.1/arekit/contrib/source/sentinerel/folding/factory.py +0 -32
  177. arekit-0.23.1/arekit/contrib/source/sentinerel/folding/fixed.py +0 -73
  178. arekit-0.23.1/arekit/contrib/source/sentinerel/io_utils.py +0 -87
  179. arekit-0.23.1/arekit/contrib/source/sentinerel/labels.py +0 -53
  180. arekit-0.23.1/arekit/contrib/source/sentinerel/labels_scaler.py +0 -30
  181. arekit-0.23.1/arekit/contrib/source/sentinerel/reader.py +0 -42
  182. arekit-0.23.1/arekit/contrib/source/synonyms/utils.py +0 -19
  183. arekit-0.23.1/arekit/contrib/source/zip_utils.py +0 -47
  184. arekit-0.23.1/arekit/contrib/utils/bert/rows.py +0 -0
  185. arekit-0.23.1/arekit/contrib/utils/bert/text_b_rus.py +0 -18
  186. arekit-0.23.1/arekit/contrib/utils/connotations/rusentiframes_sentiment.py +0 -23
  187. arekit-0.23.1/arekit/contrib/utils/cv/doc_stat/base.py +0 -37
  188. arekit-0.23.1/arekit/contrib/utils/cv/doc_stat/sentence.py +0 -12
  189. arekit-0.23.1/arekit/contrib/utils/cv/splitters/base.py +0 -4
  190. arekit-0.23.1/arekit/contrib/utils/cv/splitters/default.py +0 -53
  191. arekit-0.23.1/arekit/contrib/utils/cv/splitters/statistical.py +0 -57
  192. arekit-0.23.1/arekit/contrib/utils/cv/two_class.py +0 -77
  193. arekit-0.23.1/arekit/contrib/utils/data/doc_ops/dict_based.py +0 -13
  194. arekit-0.23.1/arekit/contrib/utils/data/ext.py +0 -31
  195. arekit-0.23.1/arekit/contrib/utils/data/storages/__init__.py +0 -0
  196. arekit-0.23.1/arekit/contrib/utils/data/views/__init__.py +0 -0
  197. arekit-0.23.1/arekit/contrib/utils/data/views/linkages/__init__.py +0 -0
  198. arekit-0.23.1/arekit/contrib/utils/data/views/linkages/base.py +0 -58
  199. arekit-0.23.1/arekit/contrib/utils/data/views/linkages/multilabel.py +0 -48
  200. arekit-0.23.1/arekit/contrib/utils/data/views/linkages/utils.py +0 -24
  201. arekit-0.23.1/arekit/contrib/utils/data/views/opinions.py +0 -14
  202. arekit-0.23.1/arekit/contrib/utils/data/writers/__init__.py +0 -0
  203. arekit-0.23.1/arekit/contrib/utils/download.py +0 -78
  204. arekit-0.23.1/arekit/contrib/utils/embeddings/__init__.py +0 -0
  205. arekit-0.23.1/arekit/contrib/utils/entities/__init__.py +0 -0
  206. arekit-0.23.1/arekit/contrib/utils/entities/formatters/__init__.py +0 -0
  207. arekit-0.23.1/arekit/contrib/utils/entities/formatters/str_rus_cased_fmt.py +0 -78
  208. arekit-0.23.1/arekit/contrib/utils/entities/formatters/str_rus_nocased_fmt.py +0 -15
  209. arekit-0.23.1/arekit/contrib/utils/entities/formatters/str_simple_fmt.py +0 -24
  210. arekit-0.23.1/arekit/contrib/utils/entities/formatters/str_simple_uppercase_fmt.py +0 -21
  211. arekit-0.23.1/arekit/contrib/utils/io_utils/__init__.py +0 -0
  212. arekit-0.23.1/arekit/contrib/utils/io_utils/embedding.py +0 -80
  213. arekit-0.23.1/arekit/contrib/utils/io_utils/opinions.py +0 -39
  214. arekit-0.23.1/arekit/contrib/utils/io_utils/samples.py +0 -78
  215. arekit-0.23.1/arekit/contrib/utils/io_utils/utils.py +0 -43
  216. arekit-0.23.1/arekit/contrib/utils/lexicons/__init__.py +0 -0
  217. arekit-0.23.1/arekit/contrib/utils/lexicons/lexicon.py +0 -43
  218. arekit-0.23.1/arekit/contrib/utils/lexicons/relation.py +0 -45
  219. arekit-0.23.1/arekit/contrib/utils/lexicons/rusentilex.py +0 -34
  220. arekit-0.23.1/arekit/contrib/utils/nn/__init__.py +0 -0
  221. arekit-0.23.1/arekit/contrib/utils/nn/rows.py +0 -83
  222. arekit-0.23.1/arekit/contrib/utils/np_utils/__init__.py +0 -0
  223. arekit-0.23.1/arekit/contrib/utils/pipelines/__init__.py +0 -0
  224. arekit-0.23.1/arekit/contrib/utils/pipelines/items/__init__.py +0 -0
  225. arekit-0.23.1/arekit/contrib/utils/pipelines/items/sampling/__init__.py +0 -0
  226. arekit-0.23.1/arekit/contrib/utils/pipelines/items/sampling/bert.py +0 -5
  227. arekit-0.23.1/arekit/contrib/utils/pipelines/items/text/__init__.py +0 -0
  228. arekit-0.23.1/arekit/contrib/utils/pipelines/items/text/terms_splitter.py +0 -10
  229. arekit-0.23.1/arekit/contrib/utils/pipelines/items/to_output.py +0 -101
  230. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
  231. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
  232. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/nerel/doc_ops.py +0 -27
  233. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +0 -59
  234. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +0 -60
  235. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
  236. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/nerel_bio/doc_ops.py +0 -29
  237. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +0 -59
  238. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +0 -79
  239. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
  240. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/ruattitudes/doc_ops.py +0 -56
  241. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +0 -19
  242. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +0 -58
  243. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
  244. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/rusentrel/doc_ops.py +0 -21
  245. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +0 -100
  246. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
  247. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/sentinerel/doc_ops.py +0 -29
  248. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +0 -62
  249. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +0 -175
  250. arekit-0.23.1/arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +0 -50
  251. arekit-0.23.1/arekit/contrib/utils/pipelines/text_opinion/__init__.py +0 -0
  252. arekit-0.23.1/arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py +0 -0
  253. arekit-0.23.1/arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +0 -88
  254. arekit-0.23.1/arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py +0 -0
  255. arekit-0.23.1/arekit/contrib/utils/pipelines/text_opinion/filters/base.py +0 -4
  256. arekit-0.23.1/arekit/contrib/utils/processing/__init__.py +0 -0
  257. arekit-0.23.1/arekit/contrib/utils/processing/languages/__init__.py +0 -0
  258. arekit-0.23.1/arekit/contrib/utils/processing/languages/ru/__init__.py +0 -0
  259. arekit-0.23.1/arekit/contrib/utils/processing/lemmatization/__init__.py +0 -0
  260. arekit-0.23.1/arekit/contrib/utils/processing/pos/__init__.py +0 -0
  261. arekit-0.23.1/arekit/contrib/utils/processing/text/__init__.py +0 -0
  262. arekit-0.23.1/arekit/contrib/utils/resources.py +0 -26
  263. arekit-0.23.1/arekit/contrib/utils/sources/__init__.py +0 -0
  264. arekit-0.23.1/arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
  265. arekit-0.23.1/arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
  266. arekit-0.23.1/arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +0 -63
  267. arekit-0.23.1/arekit/contrib/utils/synonyms/__init__.py +0 -0
  268. arekit-0.23.1/arekit/contrib/utils/utils_folding.py +0 -19
  269. arekit-0.23.1/arekit/contrib/utils/vectorizers/__init__.py +0 -0
  270. arekit-0.23.1/arekit/download_data.py +0 -11
  271. arekit-0.23.1/arekit.egg-info/PKG-INFO +0 -19
  272. {arekit-0.23.1 → arekit-0.25.0}/LICENSE +0 -0
  273. {arekit-0.23.1 → arekit-0.25.0}/arekit/__init__.py +0 -0
  274. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/__init__.py +0 -0
  275. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/bound.py +0 -0
  276. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/context/__init__.py +0 -0
  277. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/context/token.py +0 -0
  278. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/__init__.py +0 -0
  279. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/__init__.py +0 -0
  280. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/__init__.py +0 -0
  281. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/columns/__init__.py +0 -0
  282. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/columns/base.py +0 -0
  283. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/const.py +0 -0
  284. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/contents.py +0 -0
  285. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/instances/__init__.py +0 -0
  286. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/instances/multiple.py +0 -0
  287. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/instances/single.py +0 -0
  288. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/label/__init__.py +0 -0
  289. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/label/base.py +0 -0
  290. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/label/binary.py +0 -0
  291. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/label/multiple.py +0 -0
  292. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/rows/__init__.py +0 -0
  293. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/sample/__init__.py +0 -0
  294. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/text/__init__.py +0 -0
  295. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/providers/text/single.py +0 -0
  296. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/repositories/__init__.py +0 -0
  297. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/repositories/base.py +0 -0
  298. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/repositories/sample.py +0 -0
  299. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/data/input/terms_mapper.py +0 -0
  300. {arekit-0.23.1/arekit/common/data/row_ids → arekit-0.25.0/arekit/common/data/storages}/__init__.py +0 -0
  301. {arekit-0.23.1/arekit/common/data/storages → arekit-0.25.0/arekit/common/data/views}/__init__.py +0 -0
  302. {arekit-0.23.1/arekit/common/data/views → arekit-0.25.0/arekit/common/docs}/__init__.py +0 -0
  303. {arekit-0.23.1/arekit/common/entities → arekit-0.25.0/arekit/common/docs/parsed}/__init__.py +0 -0
  304. {arekit-0.23.1/arekit/common/experiment → arekit-0.25.0/arekit/common/docs/parsed/providers}/__init__.py +0 -0
  305. {arekit-0.23.1/arekit/common/news → arekit-0.25.0/arekit/common/docs}/parsed/term_position.py +0 -0
  306. {arekit-0.23.1/arekit/common/experiment/api → arekit-0.25.0/arekit/common/entities}/__init__.py +0 -0
  307. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/entities/collection.py +0 -0
  308. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/entities/str_fmt.py +0 -0
  309. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/entities/types.py +0 -0
  310. {arekit-0.23.1/arekit/common/folding → arekit-0.25.0/arekit/common/experiment}/__init__.py +0 -0
  311. {arekit-0.23.1/arekit/common/frames → arekit-0.25.0/arekit/common/experiment/api}/__init__.py +0 -0
  312. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/experiment/data_type.py +0 -0
  313. {arekit-0.23.1/arekit/common/frames/connotations → arekit-0.25.0/arekit/common/frames}/__init__.py +0 -0
  314. {arekit-0.23.1/arekit/common/frames/variants → arekit-0.25.0/arekit/common/frames/connotations}/__init__.py +0 -0
  315. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/frames/connotations/descriptor.py +0 -0
  316. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/frames/connotations/provider.py +0 -0
  317. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/frames/text_variant.py +0 -0
  318. {arekit-0.23.1/arekit/common/labels → arekit-0.25.0/arekit/common/frames/variants}/__init__.py +0 -0
  319. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/frames/variants/base.py +0 -0
  320. {arekit-0.23.1/arekit/common/labels/provider → arekit-0.25.0/arekit/common/labels}/__init__.py +0 -0
  321. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/labels/base.py +0 -0
  322. {arekit-0.23.1/arekit/common/labels/scaler → arekit-0.25.0/arekit/common/labels/provider}/__init__.py +0 -0
  323. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/labels/provider/base.py +0 -0
  324. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/labels/provider/constant.py +0 -0
  325. {arekit-0.23.1/arekit/common/linkage → arekit-0.25.0/arekit/common/labels/scaler}/__init__.py +0 -0
  326. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/labels/scaler/base.py +0 -0
  327. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/labels/scaler/sentiment.py +0 -0
  328. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/labels/scaler/single.py +0 -0
  329. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/labels/str_fmt.py +0 -0
  330. {arekit-0.23.1/arekit/common/model → arekit-0.25.0/arekit/common/linkage}/__init__.py +0 -0
  331. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/log_utils.py +0 -0
  332. {arekit-0.23.1/arekit/common/model/labeling → arekit-0.25.0/arekit/common/model}/__init__.py +0 -0
  333. {arekit-0.23.1/arekit/common/news → arekit-0.25.0/arekit/common/model/labeling}/__init__.py +0 -0
  334. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/model/labeling/base.py +0 -0
  335. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/model/labeling/modes.py +0 -0
  336. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/model/labeling/single.py +0 -0
  337. {arekit-0.23.1/arekit/common/news/parsed → arekit-0.25.0/arekit/common/opinions}/__init__.py +0 -0
  338. {arekit-0.23.1/arekit/common/news/parsed/providers → arekit-0.25.0/arekit/common/opinions/annot}/__init__.py +0 -0
  339. {arekit-0.23.1/arekit/common/opinions → arekit-0.25.0/arekit/common/opinions/annot/algo}/__init__.py +0 -0
  340. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/opinions/enums.py +0 -0
  341. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/opinions/provider.py +0 -0
  342. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/opinions/writer.py +0 -0
  343. {arekit-0.23.1/arekit/common/opinions/annot → arekit-0.25.0/arekit/common/pipeline}/__init__.py +0 -0
  344. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/pipeline/conts.py +0 -0
  345. {arekit-0.23.1/arekit/common/opinions/annot/algo → arekit-0.25.0/arekit/common/pipeline/items}/__init__.py +0 -0
  346. {arekit-0.23.1/arekit/common/pipeline → arekit-0.25.0/arekit/common/service}/__init__.py +0 -0
  347. {arekit-0.23.1/arekit/common/pipeline/items → arekit-0.25.0/arekit/common/synonyms}/__init__.py +0 -0
  348. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/synonyms/grouping.py +0 -0
  349. {arekit-0.23.1/arekit/common/synonyms → arekit-0.25.0/arekit/common/text}/__init__.py +0 -0
  350. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/text/enums.py +0 -0
  351. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/text/parsed.py +0 -0
  352. {arekit-0.23.1 → arekit-0.25.0}/arekit/common/text/stemmer.py +0 -0
  353. {arekit-0.23.1/arekit/common/text → arekit-0.25.0/arekit/common/text_opinions}/__init__.py +0 -0
  354. {arekit-0.23.1/arekit/common/text/partitioning → arekit-0.25.0/arekit/contrib}/__init__.py +0 -0
  355. {arekit-0.23.1/arekit/common/text_opinions → arekit-0.25.0/arekit/contrib/bert}/__init__.py +0 -0
  356. {arekit-0.23.1/arekit/contrib → arekit-0.25.0/arekit/contrib/bert/input}/__init__.py +0 -0
  357. {arekit-0.23.1/arekit/contrib/bert → arekit-0.25.0/arekit/contrib/bert/input/providers}/__init__.py +0 -0
  358. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/bert/input/providers/cropped_sample.py +0 -0
  359. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/bert/input/providers/text_pair.py +0 -0
  360. {arekit-0.23.1/arekit/contrib/bert/input → arekit-0.25.0/arekit/contrib/bert/terms}/__init__.py +0 -0
  361. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/bert/terms/mapper.py +0 -0
  362. {arekit-0.23.1/arekit/contrib/bert/input/providers → arekit-0.25.0/arekit/contrib/networks}/__init__.py +0 -0
  363. {arekit-0.23.1/arekit/contrib/bert/terms → arekit-0.25.0/arekit/contrib/networks/input}/__init__.py +0 -0
  364. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/networks/input/ctx_serialization.py +0 -0
  365. {arekit-0.23.1/arekit/contrib/networks → arekit-0.25.0/arekit/contrib/networks/input/embedding}/__init__.py +0 -0
  366. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/networks/input/embedding/matrix.py +0 -0
  367. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/networks/input/embedding/offsets.py +0 -0
  368. {arekit-0.23.1/arekit/contrib/networks/input → arekit-0.25.0/arekit/contrib/networks/input/formatters}/__init__.py +0 -0
  369. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/networks/input/formatters/pos_mapper.py +0 -0
  370. {arekit-0.23.1/arekit/contrib/networks/input/embedding → arekit-0.25.0/arekit/contrib/networks/input/providers}/__init__.py +0 -0
  371. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/networks/input/providers/term_connotation.py +0 -0
  372. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/networks/input/providers/text.py +0 -0
  373. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/networks/input/term_types.py +0 -0
  374. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/networks/input/terms_mapping.py +0 -0
  375. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/networks/vectorizer.py +0 -0
  376. {arekit-0.23.1/arekit/contrib/networks/input/formatters → arekit-0.25.0/arekit/contrib/prompt}/__init__.py +0 -0
  377. {arekit-0.23.1/arekit/contrib/networks/input/providers → arekit-0.25.0/arekit/contrib/utils}/__init__.py +0 -0
  378. {arekit-0.23.1/arekit/contrib/prompt → arekit-0.25.0/arekit/contrib/utils/bert}/__init__.py +0 -0
  379. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/bert/samplers.py +0 -0
  380. {arekit-0.23.1/arekit/contrib/source → arekit-0.25.0/arekit/contrib/utils/data}/__init__.py +0 -0
  381. {arekit-0.23.1/arekit/contrib/source/brat → arekit-0.25.0/arekit/contrib/utils/data/contents}/__init__.py +0 -0
  382. {arekit-0.23.1/arekit/contrib/source/brat/entities → arekit-0.25.0/arekit/contrib/utils/data/doc_provider}/__init__.py +0 -0
  383. {arekit-0.23.1/arekit/contrib/source/brat/opinions → arekit-0.25.0/arekit/contrib/utils/data/readers}/__init__.py +0 -0
  384. {arekit-0.23.1/arekit/contrib/source/nerel → arekit-0.25.0/arekit/contrib/utils/data/service}/__init__.py +0 -0
  385. {arekit-0.23.1/arekit/contrib/source/nerel/folding → arekit-0.25.0/arekit/contrib/utils/data/storages}/__init__.py +0 -0
  386. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/data/storages/jsonl_based.py +0 -0
  387. {arekit-0.23.1/arekit/contrib/source/nerelbio → arekit-0.25.0/arekit/contrib/utils/data/writers}/__init__.py +0 -0
  388. {arekit-0.23.1/arekit/contrib/source/ruattitudes → arekit-0.25.0/arekit/contrib/utils/embeddings}/__init__.py +0 -0
  389. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/embeddings/rusvectores.py +0 -0
  390. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/embeddings/tokens.py +0 -0
  391. {arekit-0.23.1/arekit/contrib/source/ruattitudes/entity → arekit-0.25.0/arekit/contrib/utils/entities}/__init__.py +0 -0
  392. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/entities/filter.py +0 -0
  393. {arekit-0.23.1/arekit/contrib/source/ruattitudes/opinions → arekit-0.25.0/arekit/contrib/utils/entities/formatters}/__init__.py +0 -0
  394. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/entities/formatters/str_display.py +0 -0
  395. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py +0 -0
  396. {arekit-0.23.1/arekit/contrib/source/rusentiframes → arekit-0.25.0/arekit/contrib/utils/io_utils}/__init__.py +0 -0
  397. {arekit-0.23.1/arekit/contrib/source/rusentrel → arekit-0.25.0/arekit/contrib/utils/np_utils}/__init__.py +0 -0
  398. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/np_utils/embedding.py +0 -0
  399. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/np_utils/npz_utils.py +0 -0
  400. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/np_utils/vocab.py +0 -0
  401. {arekit-0.23.1/arekit/contrib/source/rusentrel/opinions → arekit-0.25.0/arekit/contrib/utils/pipelines}/__init__.py +0 -0
  402. {arekit-0.23.1/arekit/contrib/source/sentinerel → arekit-0.25.0/arekit/contrib/utils/pipelines/items}/__init__.py +0 -0
  403. {arekit-0.23.1/arekit/contrib/source/sentinerel/folding → arekit-0.25.0/arekit/contrib/utils/pipelines/items/sampling}/__init__.py +0 -0
  404. {arekit-0.23.1/arekit/contrib/source/synonyms → arekit-0.25.0/arekit/contrib/utils/pipelines/items/text}/__init__.py +0 -0
  405. {arekit-0.23.1/arekit/contrib/utils → arekit-0.25.0/arekit/contrib/utils/pipelines/text_opinion}/__init__.py +0 -0
  406. {arekit-0.23.1/arekit/contrib/utils/bert → arekit-0.25.0/arekit/contrib/utils/pipelines/text_opinion/annot}/__init__.py +0 -0
  407. {arekit-0.23.1/arekit/contrib/utils/connotations → arekit-0.25.0/arekit/contrib/utils/pipelines/text_opinion/filters}/__init__.py +0 -0
  408. {arekit-0.23.1/arekit/contrib/utils/cv → arekit-0.25.0/arekit/contrib/utils/processing}/__init__.py +0 -0
  409. {arekit-0.23.1/arekit/contrib/utils/cv/doc_stat → arekit-0.25.0/arekit/contrib/utils/processing/languages}/__init__.py +0 -0
  410. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/processing/languages/mods.py +0 -0
  411. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/processing/languages/pos.py +0 -0
  412. {arekit-0.23.1/arekit/contrib/utils/cv/splitters → arekit-0.25.0/arekit/contrib/utils/processing/languages/ru}/__init__.py +0 -0
  413. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/processing/languages/ru/cases.py +0 -0
  414. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/processing/languages/ru/constants.py +0 -0
  415. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/processing/languages/ru/mods.py +0 -0
  416. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/processing/languages/ru/number.py +0 -0
  417. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/processing/languages/ru/pos_service.py +0 -0
  418. {arekit-0.23.1/arekit/contrib/utils/data → arekit-0.25.0/arekit/contrib/utils/processing/lemmatization}/__init__.py +0 -0
  419. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/processing/lemmatization/mystem.py +0 -0
  420. {arekit-0.23.1/arekit/contrib/utils/data/contents → arekit-0.25.0/arekit/contrib/utils/processing/pos}/__init__.py +0 -0
  421. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/processing/pos/base.py +0 -0
  422. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/processing/pos/mystem_wrap.py +0 -0
  423. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/processing/pos/russian.py +0 -0
  424. {arekit-0.23.1/arekit/contrib/utils/data/doc_ops → arekit-0.25.0/arekit/contrib/utils/processing/text}/__init__.py +0 -0
  425. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/processing/text/tokens.py +0 -0
  426. {arekit-0.23.1/arekit/contrib/utils/data/readers → arekit-0.25.0/arekit/contrib/utils/synonyms}/__init__.py +0 -0
  427. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/synonyms/simple.py +0 -0
  428. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/synonyms/stemmer_based.py +0 -0
  429. {arekit-0.23.1/arekit/contrib/utils/data/service → arekit-0.25.0/arekit/contrib/utils/vectorizers}/__init__.py +0 -0
  430. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/vectorizers/bpe.py +0 -0
  431. {arekit-0.23.1 → arekit-0.25.0}/arekit/contrib/utils/vectorizers/random_norm.py +0 -0
  432. {arekit-0.23.1 → arekit-0.25.0}/arekit.egg-info/dependency_links.txt +0 -0
  433. {arekit-0.23.1 → arekit-0.25.0}/arekit.egg-info/requires.txt +0 -0
  434. {arekit-0.23.1 → arekit-0.25.0}/arekit.egg-info/top_level.txt +0 -0
  435. {arekit-0.23.1 → arekit-0.25.0}/setup.cfg +0 -0
arekit-0.25.0/PKG-INFO ADDED
@@ -0,0 +1,82 @@
1
+ Metadata-Version: 2.1
2
+ Name: arekit
3
+ Version: 0.25.0
4
+ Summary: Document level Attitude and Relation Extraction toolkit (AREkit) for sampling and prompting mass-media news into datasets for ML-model training
5
+ Home-page: https://github.com/nicolay-r/AREkit
6
+ Author: Nicolay Rusnachenko
7
+ Author-email: rusnicolay@gmail.com
8
+ License: MIT License
9
+ Keywords: natural language processing,relation extraction,sentiment analysis
10
+ Classifier: Programming Language :: Python
11
+ Classifier: Programming Language :: Python :: 3.6
12
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
14
+ Classifier: Topic :: Text Processing :: Linguistic
15
+ Requires-Python: >=3.6
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: tqdm
19
+ Requires-Dist: enum34==1.1.10
20
+ Requires-Dist: numpy>=1.14.5
21
+ Requires-Dist: pymystem3==0.2.0
22
+
23
+ # AREkit 0.25.0
24
+
25
+ ![](https://img.shields.io/badge/Python-3.9+-brightgreen.svg)
26
+
27
+ <p align="center">
28
+ <img src="logo.png"/>
29
+ </p>
30
+
31
+ **AREkit** (Attitude and Relation Extraction Toolkit) --
32
+ is a python toolkit, devoted to document level Attitude and Relation Extraction between text objects from mass-media news.
33
+
34
+ ## Description
35
+
36
+
37
+ This toolkit aims at memory-effective data processing in Relation Extraction (RE) related tasks.
38
+
39
+ <p align="center">
40
+ <img src="docs/arekit-pipeline-concept.png"/>
41
+ </p>
42
+
43
+ > Figure: AREkit pipelines design. More on
44
+ > **[ARElight: Context Sampling of Large Texts for Deep Learning Relation Extraction](https://link.springer.com/chapter/10.1007/978-3-031-56069-9_23)** paper
45
+
46
+ In particular, this framework serves the following features:
47
+ * ➿ [pipelines](https://github.com/nicolay-r/AREkit/wiki/Pipelines:-Text-Opinion-Annotation) and iterators for handling large-scale collections serialization without out-of-memory issues.
48
+ * 🔗 EL (entity-linking) API support for objects,
49
+ * ➰ avoidance of cyclic connections,
50
+ * :straight_ruler: distance consideration between relation participants (in `terms` or `sentences`),
51
+ * 📑 relations annotations and filtering rules,
52
+ * *️⃣ entities formatting or masking, and more.
53
+
54
+ The core functionality includes:
55
+ * API for document presentation with EL (Entity Linking, i.e. Object Synonymy) support
56
+ for sentence level relations preparation (dubbed as contexts);
57
+ * API for contexts extraction;
58
+ * Relations transferring from sentence-level onto document-level, and more.
59
+
60
+ ## Installation
61
+
62
+ ```bash
63
+ pip install git+https://github.com/nicolay-r/AREkit.git@0.25.0-rc
64
+ ```
65
+
66
+ ## Usage
67
+
68
+ Please follow the **[tutorial section on project Wiki](https://github.com/nicolay-r/AREkit/wiki/Tutorials)** for mode details.
69
+
70
+ ## How to cite
71
+ A great research is also accompanied by the faithful reference.
72
+ if you use or extend our work, please cite as follows:
73
+
74
+ ```bibtex
75
+ @inproceedings{rusnachenko2024arelight,
76
+ title={ARElight: Context Sampling of Large Texts for Deep Learning Relation Extraction},
77
+ author={Rusnachenko, Nicolay and Liang, Huizhi and Kolomeets, Maxim and Shi, Lei},
78
+ booktitle={European Conference on Information Retrieval},
79
+ year={2024},
80
+ organization={Springer}
81
+ }
82
+ ```
@@ -0,0 +1,60 @@
1
+ # AREkit 0.25.0
2
+
3
+ ![](https://img.shields.io/badge/Python-3.9+-brightgreen.svg)
4
+
5
+ <p align="center">
6
+ <img src="logo.png"/>
7
+ </p>
8
+
9
+ **AREkit** (Attitude and Relation Extraction Toolkit) --
10
+ is a python toolkit, devoted to document level Attitude and Relation Extraction between text objects from mass-media news.
11
+
12
+ ## Description
13
+
14
+
15
+ This toolkit aims at memory-effective data processing in Relation Extraction (RE) related tasks.
16
+
17
+ <p align="center">
18
+ <img src="docs/arekit-pipeline-concept.png"/>
19
+ </p>
20
+
21
+ > Figure: AREkit pipelines design. More on
22
+ > **[ARElight: Context Sampling of Large Texts for Deep Learning Relation Extraction](https://link.springer.com/chapter/10.1007/978-3-031-56069-9_23)** paper
23
+
24
+ In particular, this framework serves the following features:
25
+ * ➿ [pipelines](https://github.com/nicolay-r/AREkit/wiki/Pipelines:-Text-Opinion-Annotation) and iterators for handling large-scale collections serialization without out-of-memory issues.
26
+ * 🔗 EL (entity-linking) API support for objects,
27
+ * ➰ avoidance of cyclic connections,
28
+ * :straight_ruler: distance consideration between relation participants (in `terms` or `sentences`),
29
+ * 📑 relations annotations and filtering rules,
30
+ * *️⃣ entities formatting or masking, and more.
31
+
32
+ The core functionality includes:
33
+ * API for document presentation with EL (Entity Linking, i.e. Object Synonymy) support
34
+ for sentence level relations preparation (dubbed as contexts);
35
+ * API for contexts extraction;
36
+ * Relations transferring from sentence-level onto document-level, and more.
37
+
38
+ ## Installation
39
+
40
+ ```bash
41
+ pip install git+https://github.com/nicolay-r/AREkit.git@0.25.0-rc
42
+ ```
43
+
44
+ ## Usage
45
+
46
+ Please follow the **[tutorial section on project Wiki](https://github.com/nicolay-r/AREkit/wiki/Tutorials)** for mode details.
47
+
48
+ ## How to cite
49
+ A great research is also accompanied by the faithful reference.
50
+ if you use or extend our work, please cite as follows:
51
+
52
+ ```bibtex
53
+ @inproceedings{rusnachenko2024arelight,
54
+ title={ARElight: Context Sampling of Large Texts for Deep Learning Relation Extraction},
55
+ author={Rusnachenko, Nicolay and Liang, Huizhi and Kolomeets, Maxim and Shi, Lei},
56
+ booktitle={European Conference on Information Retrieval},
57
+ year={2024},
58
+ organization={Springer}
59
+ }
60
+ ```
@@ -1,4 +1,4 @@
1
- import collections
1
+ from collections.abc import Iterable
2
2
 
3
3
  from arekit.common.context.token import Token
4
4
  from arekit.common.entities.base import Entity
@@ -10,7 +10,7 @@ class TextTermsMapper(object):
10
10
  def iter_mapped(self, terms):
11
11
  """ Performs mapping operation of each terms in a sequence
12
12
  """
13
- assert(isinstance(terms, collections.Iterable))
13
+ assert(isinstance(terms, Iterable))
14
14
 
15
15
  self._before_mapping()
16
16
 
@@ -1,11 +1,12 @@
1
1
  ID = 'id'
2
2
  DOC_ID = 'doc_id'
3
3
  TEXT = 'text_a'
4
- LABEL = 'label'
4
+ LABEL_UINT = 'label_uint'
5
+ LABEL_STR = 'label_str'
5
6
 
6
- # Corresponds to fields with attitude ends. (values, STRING)
7
- SOURCE = 'source'
8
- TARGET = 'target'
7
+ # Global identifier of the opinion in the sampled data.
8
+ OPINION_ID = "opinion_id"
9
+ OPINION_LINKAGE_ID = "linkage_id"
9
10
 
10
11
  # Corresponds to fields with attitude ends. (indices, INT)
11
12
  S_IND = 's_ind'
@@ -1,4 +1,4 @@
1
- class DocumentOperations(object):
1
+ class DocumentProvider(object):
2
2
  """ Provides operations with documents
3
3
  """
4
4
 
@@ -37,7 +37,8 @@ class SampleColumnsProvider(BaseColumnsProvider):
37
37
 
38
38
  # insert labels
39
39
  if self.__store_labels:
40
- dtypes_list.append((const.LABEL, 'int32'))
40
+ dtypes_list.append((const.LABEL_UINT, 'int32'))
41
+ dtypes_list.append((const.LABEL_STR, str))
41
42
 
42
43
  # insert text columns
43
44
  for col_name in self.__text_column_names:
@@ -47,6 +48,10 @@ class SampleColumnsProvider(BaseColumnsProvider):
47
48
  dtypes_list.append((const.S_IND, 'int32'))
48
49
  dtypes_list.append((const.T_IND, 'int32'))
49
50
 
51
+ # opinion-extraction task related fields
52
+ dtypes_list.append((const.OPINION_ID, 'int32'))
53
+ dtypes_list.append((const.OPINION_LINKAGE_ID, 'int32'))
54
+
50
55
  return dtypes_list
51
56
 
52
57
  def set_text_column_names(self, text_column_names):
@@ -11,4 +11,4 @@ class BaseLinkedDataInstancesProvider(object):
11
11
  """ Implementation based on the first element of the linkage.
12
12
  """
13
13
  assert(isinstance(linked_data, LinkedDataWrapper))
14
- return linked_data.First.Sentiment
14
+ return linked_data.First.Label
@@ -0,0 +1,64 @@
1
+ from collections import Counter
2
+ from collections.abc import Iterable
3
+ import logging
4
+
5
+ from arekit.common.data.input.providers.contents import ContentsProvider
6
+ from arekit.common.linkage.base import LinkedDataWrapper
7
+ from arekit.common.docs.parsed.providers.entity_service import EntityServiceProvider
8
+ from arekit.common.linkage.meta import MetaEmptyLinkedDataWrapper
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class BaseRowProvider(object):
14
+ """ Base provider for rows that suppose to be filled into BaseRowsStorage.
15
+ """
16
+
17
+ def __init__(self):
18
+ self.__rows_counter = None
19
+
20
+ # region protected methods
21
+
22
+ # TODO. This might be also generalized.
23
+ # TODO. Idle-mode is also a implementation and task specific parameter, i.e. might be removed from here.
24
+ def _provide_rows(self, parsed_doc, entity_service, text_opinion_linkage, idle_mode):
25
+ raise NotImplementedError()
26
+
27
+ def _count_row(self):
28
+ index = self.__rows_counter["rows_iterated"]
29
+ self.__rows_counter["rows_iterated"] += 1
30
+ return index
31
+
32
+ # endregion
33
+
34
+ def __iter_rows(self, linked_data, idle_mode):
35
+ parsed_doc_service = linked_data.Tag
36
+ return self._provide_rows(parsed_doc=parsed_doc_service.ParsedDocument,
37
+ entity_service=parsed_doc_service.get_provider(EntityServiceProvider.NAME),
38
+ text_opinion_linkage=linked_data,
39
+ idle_mode=idle_mode)
40
+
41
+ def iter_by_rows(self, contents_provider, doc_ids_iter, idle_mode):
42
+ assert(isinstance(contents_provider, ContentsProvider))
43
+ assert(isinstance(doc_ids_iter, Iterable))
44
+
45
+ self.__rows_counter = Counter()
46
+
47
+ for linked_data in contents_provider.from_doc_ids(doc_ids=doc_ids_iter, idle_mode=idle_mode):
48
+ assert(isinstance(linked_data, LinkedDataWrapper))
49
+
50
+ if isinstance(linked_data, MetaEmptyLinkedDataWrapper):
51
+ if idle_mode:
52
+ # In the case of the IDLE mode we do not consider the meta-data.
53
+ data_it = []
54
+ else:
55
+ # Consider the actual linked data instance.
56
+ data_it = [linked_data]
57
+ else:
58
+ # Consider the actual rows of the related linked data.
59
+ data_it = self.__iter_rows(linked_data=linked_data, idle_mode=idle_mode)
60
+
61
+ for data in data_it:
62
+ yield linked_data.RelatedDocID, data
63
+
64
+ self.__rows_counter = None
@@ -8,15 +8,14 @@ from arekit.common.data.input.providers.label.binary import BinaryLabelProvider
8
8
  from arekit.common.data.input.providers.label.multiple import MultipleLabelProvider
9
9
  from arekit.common.data.input.providers.rows.base import BaseRowProvider
10
10
  from arekit.common.data.input.providers.text.single import BaseSingleTextProvider
11
- from arekit.common.data.row_ids.binary import BinaryIDProvider
12
- from arekit.common.data.row_ids.multiple import MultipleIDProvider
11
+ from arekit.common.data.rows_fmt import create_base_column_fmt
13
12
  from arekit.common.entities.base import Entity
14
13
  from arekit.common.labels.base import Label
15
14
 
16
15
  from arekit.common.linkage.text_opinions import TextOpinionsLinkage
17
- from arekit.common.news.parsed.base import ParsedNews
18
- from arekit.common.news.parsed.providers.entity_service import EntityEndType, EntityServiceProvider
19
- from arekit.common.news.parsed.term_position import TermPositionTypes
16
+ from arekit.common.docs.parsed.base import ParsedDocument
17
+ from arekit.common.docs.parsed.providers.entity_service import EntityEndType, EntityServiceProvider
18
+ from arekit.common.docs.parsed.term_position import TermPositionTypes
20
19
  from arekit.common.text_opinions.base import TextOpinion
21
20
 
22
21
 
@@ -34,9 +33,9 @@ class BaseSampleRowProvider(BaseRowProvider):
34
33
 
35
34
  self._label_provider = label_provider
36
35
  self.__text_provider = text_provider
37
- self.__row_ids_provider = self.__create_row_ids_provider(label_provider)
38
36
  self.__instances_provider = self.__create_instances_provider(label_provider)
39
37
  self.__store_labels = None
38
+ self._val_fmt = create_base_column_fmt(fmt_type="writer")
40
39
 
41
40
  # region properties
42
41
 
@@ -52,56 +51,67 @@ class BaseSampleRowProvider(BaseRowProvider):
52
51
 
53
52
  # region protected methods
54
53
 
55
- def _provide_sentence_terms(self, parsed_news, sentence_ind, s_ind, t_ind):
56
- terms_iter = parsed_news.iter_sentence_terms(sentence_index=sentence_ind, return_id=False)
54
+ def _provide_sentence_terms(self, parsed_doc, sentence_ind, s_ind, t_ind):
55
+ terms_iter = parsed_doc.iter_sentence_terms(sentence_index=sentence_ind, return_id=False)
57
56
  return list(terms_iter), s_ind, t_ind
58
57
 
59
58
  # TODO. This is a very task-specific description, too many data provided.
60
59
  # TODO. Switch this API to dict of params
61
60
  def _fill_row_core(self, row, text_opinion_linkage, index_in_linked, etalon_label,
62
- parsed_news, sentence_ind, s_ind, t_ind):
61
+ parsed_doc, sentence_ind, s_ind, t_ind):
63
62
  assert(isinstance(self.__store_labels, bool))
64
63
 
65
- def __assign_value(column, value):
66
- row[column] = value
67
-
68
- row[const.ID] = self.__row_ids_provider.create_sample_id(
69
- linked_opinions=text_opinion_linkage,
70
- index_in_linked=index_in_linked,
71
- label_scaler=self._label_provider.LabelScaler)
64
+ sentence_terms, actual_s_ind, actual_t_ind = self._provide_sentence_terms(
65
+ parsed_doc=parsed_doc, sentence_ind=sentence_ind, s_ind=s_ind, t_ind=t_ind)
72
66
 
73
- row[const.DOC_ID] = text_opinion_linkage.First.DocID
67
+ # Entity indices from the related context.
68
+ entities = list(filter(lambda term: isinstance(term, Entity), sentence_terms))
74
69
 
75
- row[const.SENT_IND] = sentence_ind
70
+ # Values mapping.
71
+ vm = {
72
+ const.ID: self._count_row(),
73
+ const.OPINION_ID: text_opinion_linkage.First.TextOpinionID,
74
+ const.OPINION_LINKAGE_ID: index_in_linked,
75
+ const.DOC_ID: text_opinion_linkage.First.DocID,
76
+ const.SENT_IND: sentence_ind,
77
+ const.ENTITY_VALUES: entities,
78
+ const.ENTITY_TYPES: entities,
79
+ const.ENTITIES: [str(i) for i, t in enumerate(sentence_terms) if isinstance(t, Entity)],
80
+ const.S_IND: actual_s_ind,
81
+ const.T_IND: actual_t_ind,
82
+ const.LABEL_UINT: None,
83
+ const.LABEL_STR: None
84
+ }
85
+
86
+ # Compose text value.
87
+ def __assign_value(column, value):
88
+ vm[column] = value
76
89
 
77
90
  expected_label = text_opinion_linkage.get_linked_label()
78
91
 
79
- if self.__store_labels:
80
- row[const.LABEL] = self._label_provider.calculate_output_uint_label(
81
- expected_uint_label=self._label_provider.LabelScaler.label_to_uint(expected_label),
82
- etalon_uint_label=self._label_provider.LabelScaler.label_to_uint(etalon_label))
83
-
84
- sentence_terms, actual_s_ind, actual_t_ind = self._provide_sentence_terms(
85
- parsed_news=parsed_news, sentence_ind=sentence_ind, s_ind=s_ind, t_ind=t_ind)
86
-
87
92
  self.__text_provider.add_text_in_row(
88
- set_text_func=lambda column, value: __assign_value(column, value),
89
- sentence_terms=sentence_terms,
90
- s_ind=actual_s_ind,
91
- t_ind=actual_t_ind,
93
+ set_text_func=__assign_value, sentence_terms=sentence_terms,
94
+ s_ind=actual_s_ind, t_ind=actual_t_ind,
92
95
  expected_label=expected_label)
93
96
 
94
- # Entity indicies from the related context.
95
- entities = list(filter(lambda term: isinstance(term, Entity), sentence_terms))
96
- entity_inds = [str(i) for i, t in enumerate(sentence_terms) if isinstance(t, Entity)]
97
- row[const.ENTITY_VALUES] = ",".join([e.DisplayValue.replace(',', '') for e in entities])
98
- row[const.ENTITY_TYPES] = ",".join([e.Type.replace(',', '') for e in entities])
99
- row[const.ENTITIES] = ",".join(entity_inds)
97
+ if self.__store_labels:
98
+ l2i = self._label_provider.LabelScaler.label_to_uint
99
+ ui2l = self._label_provider.LabelScaler.uint_to_label
100
+ uint_label = self._label_provider.calculate_output_uint_label(
101
+ expected_uint_label=l2i(expected_label), etalon_uint_label=l2i(etalon_label))
102
+ vm[const.LABEL_UINT] = uint_label
103
+ vm[const.LABEL_STR] = type(ui2l(uint_label)).__name__
100
104
 
101
- row[const.S_IND] = actual_s_ind
102
- row[const.T_IND] = actual_t_ind
105
+ self._apply_row_data(row=row, vm=vm, val_fmt=self._val_fmt)
106
+
107
+ @staticmethod
108
+ def _apply_row_data(row, vm, val_fmt):
109
+ for k, v in vm.items():
110
+ if v is None:
111
+ continue
112
+ row[k] = v if k not in val_fmt else val_fmt[k](v)
103
113
 
104
- def _provide_rows(self, parsed_news, entity_service, text_opinion_linkage, idle_mode):
114
+ def _provide_rows(self, parsed_doc, entity_service, text_opinion_linkage, idle_mode):
105
115
  assert(isinstance(idle_mode, bool))
106
116
 
107
117
  row_dict = OrderedDict()
@@ -109,7 +119,7 @@ class BaseSampleRowProvider(BaseRowProvider):
109
119
  for index_in_linked in range(len(text_opinion_linkage)):
110
120
 
111
121
  rows_it = self.__provide_rows(
112
- parsed_news=parsed_news,
122
+ parsed_doc=parsed_doc,
113
123
  entity_service=entity_service,
114
124
  row_dict=row_dict,
115
125
  text_opinion_linkage=text_opinion_linkage,
@@ -123,36 +133,28 @@ class BaseSampleRowProvider(BaseRowProvider):
123
133
 
124
134
  # region private methods
125
135
 
126
- @staticmethod
127
- def __create_row_ids_provider(label_provider):
128
- # TODO. #376 related. This should be removed after refactoring, because
129
- # TODO. we consider an ordinary IDs, that not based on the other data.
130
- if isinstance(label_provider, BinaryLabelProvider):
131
- return BinaryIDProvider()
132
- if isinstance(label_provider, MultipleLabelProvider):
133
- return MultipleIDProvider()
134
-
135
136
  @staticmethod
136
137
  def __create_instances_provider(label_provider):
137
- # TODO. #473 related: thiese label providers are based on text opinion extraction task!
138
+ # TODO. #473 related: these label providers are based on text opinion extraction task!
138
139
  if isinstance(label_provider, BinaryLabelProvider):
139
140
  return MultipleInstancesLinkedTextOpinionsProvider(label_provider.SupportedLabels)
140
141
  if isinstance(label_provider, MultipleLabelProvider):
141
142
  return SingleInstanceLinkedDataProvider()
142
143
 
143
- def __provide_rows(self, row_dict, parsed_news, entity_service,
144
+ def __provide_rows(self, row_dict, parsed_doc, entity_service,
144
145
  text_opinion_linkage, index_in_linked, idle_mode):
145
146
  """
146
147
  Providing Rows depending on row_id_formatter type
147
148
  """
148
- assert(isinstance(parsed_news, ParsedNews))
149
+ assert(isinstance(parsed_doc, ParsedDocument))
149
150
  assert(isinstance(row_dict, OrderedDict))
150
151
  assert(isinstance(text_opinion_linkage, TextOpinionsLinkage))
151
152
 
152
153
  etalon_label = self.__instances_provider.provide_label(text_opinion_linkage)
153
154
  for instance in self.__instances_provider.iter_instances(text_opinion_linkage):
154
155
  yield self.__create_row(row=row_dict,
155
- parsed_news=parsed_news,
156
+ row_id=0,
157
+ parsed_doc=parsed_doc,
156
158
  entity_service=entity_service,
157
159
  text_opinions_linkage=instance,
158
160
  index_in_linked=index_in_linked,
@@ -160,7 +162,7 @@ class BaseSampleRowProvider(BaseRowProvider):
160
162
  etalon_label=etalon_label,
161
163
  idle_mode=idle_mode)
162
164
 
163
- def __create_row(self, row, parsed_news, entity_service, text_opinions_linkage,
165
+ def __create_row(self, row, row_id, parsed_doc, entity_service, text_opinions_linkage,
164
166
  index_in_linked, etalon_label, idle_mode):
165
167
  """
166
168
  Composing row in following format:
@@ -196,7 +198,7 @@ class BaseSampleRowProvider(BaseRowProvider):
196
198
  raise Exception("Limitation: Multi-Sentence text_opinions are not supported.")
197
199
 
198
200
  self._fill_row_core(row=row,
199
- parsed_news=parsed_news,
201
+ parsed_doc=parsed_doc,
200
202
  sentence_ind=source_s_ind,
201
203
  text_opinion_linkage=text_opinions_linkage,
202
204
  index_in_linked=index_in_linked,
@@ -34,9 +34,9 @@ class CroppedSampleRowProvider(BaseSampleRowProvider):
34
34
 
35
35
  return _from, _to
36
36
 
37
- def _provide_sentence_terms(self, parsed_news, sentence_ind, s_ind, t_ind):
37
+ def _provide_sentence_terms(self, parsed_doc, sentence_ind, s_ind, t_ind):
38
38
  terms_iter, src_ind, tgt_ind = super(CroppedSampleRowProvider, self)._provide_sentence_terms(
39
- parsed_news=parsed_news, sentence_ind=sentence_ind, s_ind=s_ind, t_ind=t_ind)
39
+ parsed_doc=parsed_doc, sentence_ind=sentence_ind, s_ind=s_ind, t_ind=t_ind)
40
40
  terms = list(terms_iter)
41
41
  _from, _to = self.__calc_window_bounds(window_size=self.__crop_window_size,
42
42
  s_ind=s_ind, t_ind=t_ind, input_length=len(terms))
@@ -1,6 +1,6 @@
1
1
  from collections import OrderedDict
2
2
 
3
- from arekit.common.news.parsed.providers.entity_service import EntityServiceProvider, DistanceType
3
+ from arekit.common.docs.parsed.providers.entity_service import EntityServiceProvider, DistanceType
4
4
  from arekit.common.text_opinions.base import TextOpinion
5
5
 
6
6
 
@@ -0,0 +1,82 @@
1
+ from arekit.common.data import const
2
+ from arekit.common.utils import filter_whitespaces, split_by_whitespaces
3
+
4
+
5
+ def process_values_list(value, args_sep):
6
+ return value.split(args_sep)
7
+
8
+
9
+ def process_indices_list(value, no_value_func, args_sep):
10
+ return no_value_func() if not value else [int(v) for v in str(value).split(args_sep)]
11
+
12
+
13
+ def process_text(value):
14
+ """ The core method of the input text processing.
15
+ """
16
+ assert(isinstance(value, str) or isinstance(value, list))
17
+ return filter_whitespaces([term for term in split_by_whitespaces(value)]
18
+ if isinstance(value, str) else value)
19
+
20
+
21
+ def create_base_column_value_fmt(no_value_func=lambda: None, args_sep=","):
22
+
23
+ self_func = lambda value: value
24
+
25
+ return {
26
+ const.ID: {
27
+ "writer": self_func,
28
+ "parser": self_func
29
+ },
30
+ const.DOC_ID: {
31
+ "writer": self_func,
32
+ "parser": self_func,
33
+ },
34
+ const.S_IND: {
35
+ "writer": self_func,
36
+ "parser": lambda value: int(value)
37
+ },
38
+ const.T_IND: {
39
+ "writer": self_func,
40
+ "parser": lambda value: int(value)
41
+ },
42
+ const.SENT_IND: {
43
+ "writer": self_func,
44
+ "parser": lambda value: int(value)
45
+ },
46
+ const.OPINION_ID: {
47
+ "writer": self_func,
48
+ "parser": lambda value: int(value)
49
+ },
50
+ const.OPINION_LINKAGE_ID: {
51
+ "writer": self_func,
52
+ "parser": lambda value: int(value)
53
+ },
54
+ const.ENTITY_VALUES: {
55
+ "writer": lambda entities: args_sep.join([e.DisplayValue.replace(args_sep, '') for e in entities]),
56
+ "parser": lambda value: process_values_list(value, args_sep=args_sep),
57
+ },
58
+ const.ENTITY_TYPES: {
59
+ "writer": lambda entities: args_sep.join([e.Type.replace(args_sep, '') for e in entities]),
60
+ "parser": lambda value: process_values_list(value, args_sep=args_sep)
61
+ },
62
+ const.ENTITIES: {
63
+ "writer": lambda entity_inds: args_sep.join(entity_inds),
64
+ "parser": lambda value: process_indices_list(value, no_value_func=no_value_func, args_sep=args_sep)
65
+ },
66
+ const.TEXT: {
67
+ "writer": self_func,
68
+ "parser": lambda value: process_text(value)
69
+ },
70
+ const.LABEL_UINT: {
71
+ "writer": self_func,
72
+ "parser": lambda value: int(value)
73
+ }
74
+ }
75
+
76
+
77
+ def create_base_column_fmt(fmt_type, args_sep=","):
78
+ assert(isinstance(fmt_type, str))
79
+ d = create_base_column_value_fmt(args_sep=args_sep)
80
+ for k, v in d.items():
81
+ d[k] = v[fmt_type]
82
+ return d
@@ -0,0 +1,43 @@
1
+ class ParsedSampleRow(object):
2
+ """ Provides a parsed information for a sample row.
3
+ """
4
+
5
+ def __init__(self, row, columns_fmts, no_value_func):
6
+ """ row: dict
7
+ dict of the pairs ("field_name", value)
8
+ columns_fmt: list
9
+ list of the formatters, where every formatter represent a dictionary.
10
+ no_value_func: func
11
+ the default value the conveys the absence of the parameter-value.
12
+ """
13
+ assert(isinstance(row, dict))
14
+ assert(isinstance(columns_fmts, list))
15
+ assert(callable(no_value_func))
16
+
17
+ self.__uint_label = None
18
+ self.__params = {}
19
+ self.__no_value = no_value_func
20
+
21
+ for key, value in row.items():
22
+
23
+ for columns_fmt in columns_fmts:
24
+ assert(isinstance(columns_fmt, dict))
25
+
26
+ if key not in columns_fmt:
27
+ continue
28
+
29
+ self.__params[key] = columns_fmt[key](value)
30
+ break
31
+
32
+ def __value_or_none(self, key):
33
+ return self.__params[key] if key in self.__params else self.__no_value()
34
+
35
+ def __getitem__(self, item):
36
+ assert (isinstance(item, str) or item is None)
37
+ if item not in self.__params:
38
+ return self.__no_value()
39
+ return self.__params[item] if item is not None else self.__no_value()
40
+
41
+ @classmethod
42
+ def parse(cls, row, columns_fmts, no_value_func):
43
+ return cls(row=row, columns_fmts=columns_fmts, no_value_func=no_value_func)