arekit 0.24.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (374) hide show
  1. arekit/__init__.py +0 -0
  2. arekit/common/__init__.py +0 -0
  3. arekit/common/bound.py +48 -0
  4. arekit/common/context/__init__.py +0 -0
  5. arekit/common/context/terms_mapper.py +51 -0
  6. arekit/common/context/token.py +16 -0
  7. arekit/common/data/__init__.py +0 -0
  8. arekit/common/data/const.py +21 -0
  9. arekit/common/data/doc_provider.py +6 -0
  10. arekit/common/data/input/__init__.py +0 -0
  11. arekit/common/data/input/providers/__init__.py +0 -0
  12. arekit/common/data/input/providers/columns/__init__.py +0 -0
  13. arekit/common/data/input/providers/columns/base.py +9 -0
  14. arekit/common/data/input/providers/columns/sample.py +59 -0
  15. arekit/common/data/input/providers/const.py +3 -0
  16. arekit/common/data/input/providers/contents.py +9 -0
  17. arekit/common/data/input/providers/instances/__init__.py +0 -0
  18. arekit/common/data/input/providers/instances/base.py +14 -0
  19. arekit/common/data/input/providers/instances/multiple.py +27 -0
  20. arekit/common/data/input/providers/instances/single.py +8 -0
  21. arekit/common/data/input/providers/label/__init__.py +0 -0
  22. arekit/common/data/input/providers/label/base.py +24 -0
  23. arekit/common/data/input/providers/label/binary.py +11 -0
  24. arekit/common/data/input/providers/label/multiple.py +15 -0
  25. arekit/common/data/input/providers/rows/__init__.py +0 -0
  26. arekit/common/data/input/providers/rows/base.py +64 -0
  27. arekit/common/data/input/providers/rows/samples.py +227 -0
  28. arekit/common/data/input/providers/sample/__init__.py +0 -0
  29. arekit/common/data/input/providers/sample/cropped.py +43 -0
  30. arekit/common/data/input/providers/text/__init__.py +0 -0
  31. arekit/common/data/input/providers/text/single.py +49 -0
  32. arekit/common/data/input/repositories/__init__.py +0 -0
  33. arekit/common/data/input/repositories/base.py +68 -0
  34. arekit/common/data/input/repositories/sample.py +22 -0
  35. arekit/common/data/input/sample.py +66 -0
  36. arekit/common/data/input/terms_mapper.py +88 -0
  37. arekit/common/data/rows_fmt.py +82 -0
  38. arekit/common/data/rows_parser.py +43 -0
  39. arekit/common/data/storages/__init__.py +0 -0
  40. arekit/common/data/storages/base.py +109 -0
  41. arekit/common/data/views/__init__.py +0 -0
  42. arekit/common/data/views/samples.py +26 -0
  43. arekit/common/docs/__init__.py +0 -0
  44. arekit/common/docs/base.py +30 -0
  45. arekit/common/docs/entities_grouping.py +16 -0
  46. arekit/common/docs/entity.py +18 -0
  47. arekit/common/docs/objects_parser.py +37 -0
  48. arekit/common/docs/parsed/__init__.py +0 -0
  49. arekit/common/docs/parsed/base.py +101 -0
  50. arekit/common/docs/parsed/providers/__init__.py +0 -0
  51. arekit/common/docs/parsed/providers/base.py +68 -0
  52. arekit/common/docs/parsed/providers/base_pairs.py +51 -0
  53. arekit/common/docs/parsed/providers/entity_service.py +175 -0
  54. arekit/common/docs/parsed/providers/opinion_pairs.py +20 -0
  55. arekit/common/docs/parsed/providers/text_opinion_pairs.py +78 -0
  56. arekit/common/docs/parsed/service.py +31 -0
  57. arekit/common/docs/parsed/term_position.py +42 -0
  58. arekit/common/docs/parser.py +34 -0
  59. arekit/common/docs/sentence.py +14 -0
  60. arekit/common/entities/__init__.py +0 -0
  61. arekit/common/entities/base.py +51 -0
  62. arekit/common/entities/collection.py +72 -0
  63. arekit/common/entities/str_fmt.py +8 -0
  64. arekit/common/entities/types.py +9 -0
  65. arekit/common/experiment/__init__.py +0 -0
  66. arekit/common/experiment/api/__init__.py +0 -0
  67. arekit/common/experiment/api/base_samples_io.py +20 -0
  68. arekit/common/experiment/data_type.py +17 -0
  69. arekit/common/frames/__init__.py +0 -0
  70. arekit/common/frames/connotations/__init__.py +0 -0
  71. arekit/common/frames/connotations/descriptor.py +17 -0
  72. arekit/common/frames/connotations/provider.py +4 -0
  73. arekit/common/frames/text_variant.py +43 -0
  74. arekit/common/frames/variants/__init__.py +0 -0
  75. arekit/common/frames/variants/base.py +21 -0
  76. arekit/common/frames/variants/collection.py +60 -0
  77. arekit/common/labels/__init__.py +0 -0
  78. arekit/common/labels/base.py +19 -0
  79. arekit/common/labels/provider/__init__.py +0 -0
  80. arekit/common/labels/provider/base.py +7 -0
  81. arekit/common/labels/provider/constant.py +14 -0
  82. arekit/common/labels/scaler/__init__.py +0 -0
  83. arekit/common/labels/scaler/base.py +85 -0
  84. arekit/common/labels/scaler/sentiment.py +7 -0
  85. arekit/common/labels/scaler/single.py +10 -0
  86. arekit/common/labels/str_fmt.py +55 -0
  87. arekit/common/linkage/__init__.py +0 -0
  88. arekit/common/linkage/base.py +44 -0
  89. arekit/common/linkage/meta.py +23 -0
  90. arekit/common/linkage/opinions.py +9 -0
  91. arekit/common/linkage/text_opinions.py +22 -0
  92. arekit/common/log_utils.py +29 -0
  93. arekit/common/model/__init__.py +0 -0
  94. arekit/common/model/labeling/__init__.py +0 -0
  95. arekit/common/model/labeling/base.py +24 -0
  96. arekit/common/model/labeling/modes.py +8 -0
  97. arekit/common/model/labeling/single.py +24 -0
  98. arekit/common/opinions/__init__.py +0 -0
  99. arekit/common/opinions/annot/__init__.py +0 -0
  100. arekit/common/opinions/annot/algo/__init__.py +0 -0
  101. arekit/common/opinions/annot/algo/base.py +4 -0
  102. arekit/common/opinions/annot/algo/pair_based.py +99 -0
  103. arekit/common/opinions/annot/algo/predefined.py +16 -0
  104. arekit/common/opinions/annot/algo_based.py +55 -0
  105. arekit/common/opinions/annot/base.py +15 -0
  106. arekit/common/opinions/base.py +74 -0
  107. arekit/common/opinions/collection.py +150 -0
  108. arekit/common/opinions/enums.py +6 -0
  109. arekit/common/opinions/provider.py +4 -0
  110. arekit/common/opinions/writer.py +4 -0
  111. arekit/common/pipeline/__init__.py +0 -0
  112. arekit/common/pipeline/base.py +25 -0
  113. arekit/common/pipeline/context.py +36 -0
  114. arekit/common/pipeline/conts.py +2 -0
  115. arekit/common/pipeline/items/__init__.py +0 -0
  116. arekit/common/pipeline/items/base.py +12 -0
  117. arekit/common/pipeline/items/flatten.py +14 -0
  118. arekit/common/pipeline/items/handle.py +17 -0
  119. arekit/common/pipeline/items/iter.py +11 -0
  120. arekit/common/pipeline/items/map.py +11 -0
  121. arekit/common/pipeline/items/map_nested.py +13 -0
  122. arekit/common/synonyms/__init__.py +0 -0
  123. arekit/common/synonyms/base.py +151 -0
  124. arekit/common/synonyms/grouping.py +21 -0
  125. arekit/common/text/__init__.py +0 -0
  126. arekit/common/text/enums.py +12 -0
  127. arekit/common/text/parsed.py +42 -0
  128. arekit/common/text/parser.py +12 -0
  129. arekit/common/text/partitioning/__init__.py +0 -0
  130. arekit/common/text/partitioning/base.py +4 -0
  131. arekit/common/text/partitioning/str.py +36 -0
  132. arekit/common/text/partitioning/terms.py +35 -0
  133. arekit/common/text/stemmer.py +16 -0
  134. arekit/common/text_opinions/__init__.py +0 -0
  135. arekit/common/text_opinions/base.py +105 -0
  136. arekit/common/utils.py +129 -0
  137. arekit/contrib/__init__.py +0 -0
  138. arekit/contrib/bert/__init__.py +0 -0
  139. arekit/contrib/bert/input/__init__.py +0 -0
  140. arekit/contrib/bert/input/providers/__init__.py +0 -0
  141. arekit/contrib/bert/input/providers/cropped_sample.py +17 -0
  142. arekit/contrib/bert/input/providers/text_pair.py +62 -0
  143. arekit/contrib/bert/terms/__init__.py +0 -0
  144. arekit/contrib/bert/terms/mapper.py +20 -0
  145. arekit/contrib/networks/__init__.py +0 -0
  146. arekit/contrib/networks/embedding.py +149 -0
  147. arekit/contrib/networks/embedding_io.py +18 -0
  148. arekit/contrib/networks/input/__init__.py +0 -0
  149. arekit/contrib/networks/input/const.py +6 -0
  150. arekit/contrib/networks/input/ctx_serialization.py +28 -0
  151. arekit/contrib/networks/input/embedding/__init__.py +0 -0
  152. arekit/contrib/networks/input/embedding/matrix.py +29 -0
  153. arekit/contrib/networks/input/embedding/offsets.py +55 -0
  154. arekit/contrib/networks/input/formatters/__init__.py +0 -0
  155. arekit/contrib/networks/input/formatters/pos_mapper.py +22 -0
  156. arekit/contrib/networks/input/providers/__init__.py +0 -0
  157. arekit/contrib/networks/input/providers/sample.py +129 -0
  158. arekit/contrib/networks/input/providers/term_connotation.py +23 -0
  159. arekit/contrib/networks/input/providers/text.py +24 -0
  160. arekit/contrib/networks/input/rows_parser.py +47 -0
  161. arekit/contrib/networks/input/term_types.py +13 -0
  162. arekit/contrib/networks/input/terms_mapping.py +60 -0
  163. arekit/contrib/networks/vectorizer.py +6 -0
  164. arekit/contrib/prompt/__init__.py +0 -0
  165. arekit/contrib/prompt/sample.py +61 -0
  166. arekit/contrib/source/__init__.py +0 -0
  167. arekit/contrib/source/brat/__init__.py +0 -0
  168. arekit/contrib/source/brat/annot.py +84 -0
  169. arekit/contrib/source/brat/doc.py +28 -0
  170. arekit/contrib/source/brat/entities/__init__.py +0 -0
  171. arekit/contrib/source/brat/entities/compound.py +13 -0
  172. arekit/contrib/source/brat/entities/entity.py +42 -0
  173. arekit/contrib/source/brat/entities/parser.py +53 -0
  174. arekit/contrib/source/brat/opinions/__init__.py +0 -0
  175. arekit/contrib/source/brat/opinions/converter.py +19 -0
  176. arekit/contrib/source/brat/relation.py +32 -0
  177. arekit/contrib/source/brat/sentence.py +69 -0
  178. arekit/contrib/source/brat/sentences_reader.py +128 -0
  179. arekit/contrib/source/download.py +41 -0
  180. arekit/contrib/source/nerel/__init__.py +0 -0
  181. arekit/contrib/source/nerel/entities.py +55 -0
  182. arekit/contrib/source/nerel/folding/__init__.py +0 -0
  183. arekit/contrib/source/nerel/folding/fixed.py +74 -0
  184. arekit/contrib/source/nerel/io_utils.py +62 -0
  185. arekit/contrib/source/nerel/labels.py +241 -0
  186. arekit/contrib/source/nerel/reader.py +46 -0
  187. arekit/contrib/source/nerel/utils.py +24 -0
  188. arekit/contrib/source/nerel/versions.py +12 -0
  189. arekit/contrib/source/nerelbio/__init__.py +0 -0
  190. arekit/contrib/source/nerelbio/io_utils.py +62 -0
  191. arekit/contrib/source/nerelbio/labels.py +265 -0
  192. arekit/contrib/source/nerelbio/reader.py +8 -0
  193. arekit/contrib/source/nerelbio/versions.py +8 -0
  194. arekit/contrib/source/ruattitudes/__init__.py +0 -0
  195. arekit/contrib/source/ruattitudes/collection.py +36 -0
  196. arekit/contrib/source/ruattitudes/doc.py +51 -0
  197. arekit/contrib/source/ruattitudes/doc_brat.py +44 -0
  198. arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
  199. arekit/contrib/source/ruattitudes/entity/parser.py +7 -0
  200. arekit/contrib/source/ruattitudes/io_utils.py +56 -0
  201. arekit/contrib/source/ruattitudes/labels_fmt.py +12 -0
  202. arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
  203. arekit/contrib/source/ruattitudes/opinions/base.py +28 -0
  204. arekit/contrib/source/ruattitudes/opinions/converter.py +37 -0
  205. arekit/contrib/source/ruattitudes/reader.py +268 -0
  206. arekit/contrib/source/ruattitudes/sentence.py +73 -0
  207. arekit/contrib/source/ruattitudes/synonyms.py +17 -0
  208. arekit/contrib/source/ruattitudes/text_object.py +59 -0
  209. arekit/contrib/source/rusentiframes/__init__.py +0 -0
  210. arekit/contrib/source/rusentiframes/collection.py +157 -0
  211. arekit/contrib/source/rusentiframes/effect.py +24 -0
  212. arekit/contrib/source/rusentiframes/io_utils.py +19 -0
  213. arekit/contrib/source/rusentiframes/labels_fmt.py +22 -0
  214. arekit/contrib/source/rusentiframes/polarity.py +35 -0
  215. arekit/contrib/source/rusentiframes/role.py +15 -0
  216. arekit/contrib/source/rusentiframes/state.py +24 -0
  217. arekit/contrib/source/rusentiframes/types.py +42 -0
  218. arekit/contrib/source/rusentiframes/value.py +2 -0
  219. arekit/contrib/source/rusentrel/__init__.py +0 -0
  220. arekit/contrib/source/rusentrel/const.py +3 -0
  221. arekit/contrib/source/rusentrel/docs_reader.py +51 -0
  222. arekit/contrib/source/rusentrel/entities.py +26 -0
  223. arekit/contrib/source/rusentrel/io_utils.py +125 -0
  224. arekit/contrib/source/rusentrel/labels_fmt.py +12 -0
  225. arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
  226. arekit/contrib/source/rusentrel/opinions/collection.py +30 -0
  227. arekit/contrib/source/rusentrel/opinions/converter.py +40 -0
  228. arekit/contrib/source/rusentrel/opinions/provider.py +54 -0
  229. arekit/contrib/source/rusentrel/opinions/writer.py +42 -0
  230. arekit/contrib/source/rusentrel/synonyms.py +17 -0
  231. arekit/contrib/source/sentinerel/__init__.py +0 -0
  232. arekit/contrib/source/sentinerel/entities.py +52 -0
  233. arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
  234. arekit/contrib/source/sentinerel/folding/factory.py +31 -0
  235. arekit/contrib/source/sentinerel/folding/fixed.py +70 -0
  236. arekit/contrib/source/sentinerel/io_utils.py +87 -0
  237. arekit/contrib/source/sentinerel/labels.py +53 -0
  238. arekit/contrib/source/sentinerel/labels_scaler.py +30 -0
  239. arekit/contrib/source/sentinerel/reader.py +42 -0
  240. arekit/contrib/source/synonyms/__init__.py +0 -0
  241. arekit/contrib/source/synonyms/utils.py +19 -0
  242. arekit/contrib/source/zip_utils.py +47 -0
  243. arekit/contrib/utils/__init__.py +0 -0
  244. arekit/contrib/utils/bert/__init__.py +0 -0
  245. arekit/contrib/utils/bert/samplers.py +17 -0
  246. arekit/contrib/utils/connotations/__init__.py +0 -0
  247. arekit/contrib/utils/connotations/rusentiframes_sentiment.py +23 -0
  248. arekit/contrib/utils/data/__init__.py +0 -0
  249. arekit/contrib/utils/data/contents/__init__.py +0 -0
  250. arekit/contrib/utils/data/contents/opinions.py +37 -0
  251. arekit/contrib/utils/data/doc_provider/__init__.py +0 -0
  252. arekit/contrib/utils/data/doc_provider/dict_based.py +13 -0
  253. arekit/contrib/utils/data/doc_provider/dir_based.py +53 -0
  254. arekit/contrib/utils/data/readers/__init__.py +0 -0
  255. arekit/contrib/utils/data/readers/base.py +7 -0
  256. arekit/contrib/utils/data/readers/csv_pd.py +38 -0
  257. arekit/contrib/utils/data/readers/jsonl.py +15 -0
  258. arekit/contrib/utils/data/service/__init__.py +0 -0
  259. arekit/contrib/utils/data/service/balance.py +50 -0
  260. arekit/contrib/utils/data/storages/__init__.py +0 -0
  261. arekit/contrib/utils/data/storages/jsonl_based.py +18 -0
  262. arekit/contrib/utils/data/storages/pandas_based.py +123 -0
  263. arekit/contrib/utils/data/storages/row_cache.py +48 -0
  264. arekit/contrib/utils/data/writers/__init__.py +0 -0
  265. arekit/contrib/utils/data/writers/base.py +27 -0
  266. arekit/contrib/utils/data/writers/csv_native.py +63 -0
  267. arekit/contrib/utils/data/writers/csv_pd.py +40 -0
  268. arekit/contrib/utils/data/writers/json_opennre.py +132 -0
  269. arekit/contrib/utils/data/writers/sqlite_native.py +110 -0
  270. arekit/contrib/utils/download.py +77 -0
  271. arekit/contrib/utils/embeddings/__init__.py +0 -0
  272. arekit/contrib/utils/embeddings/rusvectores.py +58 -0
  273. arekit/contrib/utils/embeddings/tokens.py +30 -0
  274. arekit/contrib/utils/entities/__init__.py +0 -0
  275. arekit/contrib/utils/entities/filter.py +7 -0
  276. arekit/contrib/utils/entities/formatters/__init__.py +0 -0
  277. arekit/contrib/utils/entities/formatters/str_display.py +11 -0
  278. arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py +15 -0
  279. arekit/contrib/utils/io_utils/__init__.py +0 -0
  280. arekit/contrib/utils/io_utils/embedding.py +72 -0
  281. arekit/contrib/utils/io_utils/opinions.py +37 -0
  282. arekit/contrib/utils/io_utils/samples.py +79 -0
  283. arekit/contrib/utils/io_utils/utils.py +39 -0
  284. arekit/contrib/utils/lexicons/__init__.py +0 -0
  285. arekit/contrib/utils/lexicons/lexicon.py +41 -0
  286. arekit/contrib/utils/lexicons/relation.py +42 -0
  287. arekit/contrib/utils/lexicons/rusentilex.py +37 -0
  288. arekit/contrib/utils/nn/__init__.py +0 -0
  289. arekit/contrib/utils/nn/rows.py +83 -0
  290. arekit/contrib/utils/np_utils/__init__.py +0 -0
  291. arekit/contrib/utils/np_utils/embedding.py +22 -0
  292. arekit/contrib/utils/np_utils/npz_utils.py +13 -0
  293. arekit/contrib/utils/np_utils/vocab.py +20 -0
  294. arekit/contrib/utils/pipelines/__init__.py +0 -0
  295. arekit/contrib/utils/pipelines/items/__init__.py +0 -0
  296. arekit/contrib/utils/pipelines/items/sampling/__init__.py +0 -0
  297. arekit/contrib/utils/pipelines/items/sampling/base.py +99 -0
  298. arekit/contrib/utils/pipelines/items/sampling/networks.py +54 -0
  299. arekit/contrib/utils/pipelines/items/text/__init__.py +0 -0
  300. arekit/contrib/utils/pipelines/items/text/entities_default.py +23 -0
  301. arekit/contrib/utils/pipelines/items/text/frames.py +86 -0
  302. arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +36 -0
  303. arekit/contrib/utils/pipelines/items/text/frames_negation.py +32 -0
  304. arekit/contrib/utils/pipelines/items/text/terms_splitter.py +10 -0
  305. arekit/contrib/utils/pipelines/items/text/tokenizer.py +107 -0
  306. arekit/contrib/utils/pipelines/items/text/translator.py +135 -0
  307. arekit/contrib/utils/pipelines/opinion_collections.py +85 -0
  308. arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
  309. arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
  310. arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py +27 -0
  311. arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +65 -0
  312. arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +60 -0
  313. arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
  314. arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py +29 -0
  315. arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +64 -0
  316. arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +79 -0
  317. arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
  318. arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py +56 -0
  319. arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +20 -0
  320. arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +65 -0
  321. arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
  322. arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py +21 -0
  323. arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +107 -0
  324. arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
  325. arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py +29 -0
  326. arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +62 -0
  327. arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +180 -0
  328. arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +50 -0
  329. arekit/contrib/utils/pipelines/text_opinion/__init__.py +0 -0
  330. arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py +0 -0
  331. arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +34 -0
  332. arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +88 -0
  333. arekit/contrib/utils/pipelines/text_opinion/extraction.py +93 -0
  334. arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py +0 -0
  335. arekit/contrib/utils/pipelines/text_opinion/filters/base.py +4 -0
  336. arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py +16 -0
  337. arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py +29 -0
  338. arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py +26 -0
  339. arekit/contrib/utils/processing/__init__.py +0 -0
  340. arekit/contrib/utils/processing/languages/__init__.py +0 -0
  341. arekit/contrib/utils/processing/languages/mods.py +12 -0
  342. arekit/contrib/utils/processing/languages/pos.py +23 -0
  343. arekit/contrib/utils/processing/languages/ru/__init__.py +0 -0
  344. arekit/contrib/utils/processing/languages/ru/cases.py +78 -0
  345. arekit/contrib/utils/processing/languages/ru/constants.py +6 -0
  346. arekit/contrib/utils/processing/languages/ru/mods.py +13 -0
  347. arekit/contrib/utils/processing/languages/ru/number.py +23 -0
  348. arekit/contrib/utils/processing/languages/ru/pos_service.py +36 -0
  349. arekit/contrib/utils/processing/lemmatization/__init__.py +0 -0
  350. arekit/contrib/utils/processing/lemmatization/mystem.py +51 -0
  351. arekit/contrib/utils/processing/pos/__init__.py +0 -0
  352. arekit/contrib/utils/processing/pos/base.py +12 -0
  353. arekit/contrib/utils/processing/pos/mystem_wrap.py +134 -0
  354. arekit/contrib/utils/processing/pos/russian.py +10 -0
  355. arekit/contrib/utils/processing/text/__init__.py +0 -0
  356. arekit/contrib/utils/processing/text/tokens.py +127 -0
  357. arekit/contrib/utils/resources.py +25 -0
  358. arekit/contrib/utils/serializer.py +43 -0
  359. arekit/contrib/utils/sources/__init__.py +0 -0
  360. arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
  361. arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
  362. arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +63 -0
  363. arekit/contrib/utils/synonyms/__init__.py +0 -0
  364. arekit/contrib/utils/synonyms/simple.py +15 -0
  365. arekit/contrib/utils/synonyms/stemmer_based.py +38 -0
  366. arekit/contrib/utils/vectorizers/__init__.py +0 -0
  367. arekit/contrib/utils/vectorizers/bpe.py +93 -0
  368. arekit/contrib/utils/vectorizers/random_norm.py +39 -0
  369. arekit/download_data.py +11 -0
  370. arekit-0.24.0.dist-info/LICENSE +21 -0
  371. arekit-0.24.0.dist-info/METADATA +23 -0
  372. arekit-0.24.0.dist-info/RECORD +374 -0
  373. arekit-0.24.0.dist-info/WHEEL +5 -0
  374. arekit-0.24.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,29 @@
1
+ import logging
2
+
3
+ logger = logging.getLogger(__name__)
4
+ logging.basicConfig(level=logging.DEBUG)
5
+
6
+
7
+ def log_synonym_existed(value):
8
+ logger.info("Collection already has a value '{}'. Skipped".format(value.encode('utf-8')))
9
+
10
+
11
+ def log_synonym_for_entity_does_not_exist(entity_value, end_type, raise_exception):
12
+ message = "'{s}' for end {e} does not exist in read-only SynonymsCollection".format(
13
+ s=entity_value,
14
+ e=end_type)
15
+
16
+ if raise_exception:
17
+ raise Exception(message)
18
+ else:
19
+ logger.info(message)
20
+
21
+
22
+ def log_opinion_already_exist(opinion, raise_exception, display_log):
23
+ message = "'{s}->{t}' already exists in collection".format(s=opinion.SourceValue,
24
+ t=opinion.TargetValue).encode('utf-8')
25
+
26
+ if raise_exception:
27
+ raise Exception(message)
28
+ elif display_log:
29
+ logger.info(message + ' [REJECTED]')
File without changes
File without changes
@@ -0,0 +1,24 @@
1
+ from arekit.common.labels.scaler.base import BaseLabelScaler
2
+
3
+
4
+ class LabelsHelper(object):
5
+
6
+ def __init__(self, label_scaler):
7
+ assert(isinstance(label_scaler, BaseLabelScaler))
8
+ self._label_scaler = label_scaler
9
+
10
+ def label_from_uint(self, value):
11
+ return self._label_scaler.uint_to_label(value=value)
12
+
13
+ def label_to_uint(self, label):
14
+ return self._label_scaler.label_to_uint(label=label)
15
+
16
+ def get_classes_count(self):
17
+ return len(self._label_scaler.ordered_suppoted_labels())
18
+
19
+ def aggregate_labels(self, labels_list, label_calc_mode):
20
+ raise NotImplementedError()
21
+
22
+ @staticmethod
23
+ def compose_opinion(text_opinion, label):
24
+ raise NotImplementedError()
@@ -0,0 +1,8 @@
1
+ from enum import Enum
2
+
3
+
4
+ class LabelCalculationMode(Enum):
5
+
6
+ FIRST_APPEARED = 'take_first_appeared'
7
+
8
+ AVERAGE = 'average'
@@ -0,0 +1,24 @@
1
+ import numpy as np
2
+
3
+ from arekit.common.model.labeling.base import LabelsHelper
4
+ from arekit.common.model.labeling.modes import LabelCalculationMode
5
+
6
+
7
+ class SingleLabelsHelper(LabelsHelper):
8
+
9
+ def aggregate_labels(self, labels_list, label_calc_mode):
10
+ assert(isinstance(labels_list, list))
11
+ assert(isinstance(label_calc_mode, LabelCalculationMode))
12
+
13
+ label = None
14
+
15
+ if label_calc_mode == LabelCalculationMode.FIRST_APPEARED:
16
+ label = labels_list[0]
17
+
18
+ if label_calc_mode == LabelCalculationMode.AVERAGE:
19
+ int_labels = [self._label_scaler.label_to_int(label)
20
+ for label in labels_list]
21
+ label = self._label_scaler.int_to_label(int(np.sign(sum(int_labels))))
22
+
23
+ return label
24
+
File without changes
File without changes
File without changes
@@ -0,0 +1,4 @@
1
+ class BaseOpinionAnnotationAlgorithm(object):
2
+
3
+ def iter_opinions(self, parsed_doc, existed_opinions=None):
4
+ pass
@@ -0,0 +1,99 @@
1
+ from arekit.common.entities.types import OpinionEntityType
2
+ from arekit.common.labels.provider.base import BasePairLabelProvider
3
+ from arekit.common.docs.entity import DocumentEntity
4
+ from arekit.common.docs.parsed.base import ParsedDocument
5
+ from arekit.common.docs.parsed.providers.entity_service import EntityServiceProvider, DistanceType
6
+ from arekit.common.docs.parsed.providers.opinion_pairs import OpinionPairsProvider
7
+ from arekit.common.opinions.annot.algo.base import BaseOpinionAnnotationAlgorithm
8
+ from arekit.common.opinions.base import Opinion
9
+
10
+
11
+ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
12
+ """ Is a pair-based annotation algorithm which assumes to compose source-target entity pairs
13
+ This is a default annotator which found its application in Sentiment Attitude Extraction task [1].
14
+
15
+ References:
16
+ [1] Extracting Sentiment Attitudes from Analytical Texts https://arxiv.org/pdf/1808.08932.pdf
17
+ """
18
+
19
+ def __init__(self, dist_in_terms_bound, label_provider, entity_index_func, dist_in_sents=0,
20
+ is_entity_ignored_func=None):
21
+ """
22
+ dist_in_terms_bound: int
23
+ max allowed distance in term (less than passed value)
24
+ is_entity_ignored_func: func
25
+ entity, type -> bool
26
+ """
27
+ assert(isinstance(dist_in_terms_bound, int) or dist_in_terms_bound is None)
28
+ assert(isinstance(label_provider, BasePairLabelProvider))
29
+ assert(callable(entity_index_func))
30
+ assert(isinstance(dist_in_sents, int))
31
+ assert(callable(is_entity_ignored_func) or is_entity_ignored_func is None)
32
+
33
+ self.__label_provider = label_provider
34
+ self.__dist_in_terms_bound = dist_in_terms_bound
35
+ self.__dist_in_sents = dist_in_sents
36
+ self.__is_entity_ignored_func = is_entity_ignored_func
37
+ self.__entity_index_func = entity_index_func
38
+
39
+ # region private methods
40
+
41
+ @staticmethod
42
+ def __create_key_by_entity_pair(e1, e2):
43
+ assert(isinstance(e1, DocumentEntity))
44
+ assert(isinstance(e2, DocumentEntity))
45
+ return "{}_{}".format(e1.IdInDocument, e2.IdInDocument)
46
+
47
+ def __try_create_pair_key(self, entity_service, e1, e2, existed_opinions):
48
+ assert(isinstance(entity_service, EntityServiceProvider))
49
+ assert(isinstance(e1, DocumentEntity))
50
+ assert(isinstance(e2, DocumentEntity))
51
+
52
+ if e1.IdInDocument == e2.IdInDocument:
53
+ return
54
+
55
+ if self.__is_entity_ignored_func is not None:
56
+ if self.__is_entity_ignored_func(e1, OpinionEntityType.Subject):
57
+ return
58
+ if self.__is_entity_ignored_func(e2, OpinionEntityType.Object):
59
+ return
60
+
61
+ s_dist = entity_service.calc_dist_between_entities(e1=e1, e2=e2, distance_type=DistanceType.InSentences)
62
+
63
+ if s_dist > self.__dist_in_sents:
64
+ return
65
+
66
+ t_dist = entity_service.calc_dist_between_entities(e1=e1, e2=e2, distance_type=DistanceType.InTerms)
67
+
68
+ if self.__dist_in_terms_bound is not None and t_dist > self.__dist_in_terms_bound:
69
+ return
70
+
71
+ if existed_opinions is not None:
72
+ o = Opinion(source_value=e1.Value,
73
+ target_value=e2.Value,
74
+ label=self.__label_provider.provide(source=e1, target=e2))
75
+ if existed_opinions.has_synonymous_opinion(opinion=o):
76
+ return
77
+
78
+ return self.__create_key_by_entity_pair(e1=e1, e2=e2)
79
+
80
+ # endregion
81
+
82
+ def iter_opinions(self, parsed_doc, existed_opinions=None):
83
+ assert(isinstance(parsed_doc, ParsedDocument))
84
+
85
+ def __filter_pair_func(e1, e2):
86
+ key = self.__try_create_pair_key(entity_service=entity_service_provider,
87
+ e1=e1, e2=e2,
88
+ existed_opinions=existed_opinions)
89
+
90
+ return key is not None
91
+
92
+ # Initialize providers.
93
+ opinions_provider = OpinionPairsProvider(entity_index_func=self.__entity_index_func)
94
+ entity_service_provider = EntityServiceProvider(entity_index_func=self.__entity_index_func)
95
+ opinions_provider.init_parsed_doc(parsed_doc)
96
+ entity_service_provider.init_parsed_doc(parsed_doc)
97
+
98
+ return opinions_provider.iter_from_all(label_provider=self.__label_provider,
99
+ filter_func=__filter_pair_func)
@@ -0,0 +1,16 @@
1
+ from arekit.common.docs.parsed.base import ParsedDocument
2
+ from arekit.common.opinions.annot.algo.base import BaseOpinionAnnotationAlgorithm
3
+
4
+
5
+ class PredefinedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
6
+ """ A placeholder of the algorithm which is consider to return
7
+ a predefined list of opinions, provided by a given document_id.
8
+ """
9
+
10
+ def __init__(self, get_opinions_by_doc_id_func):
11
+ assert(callable(get_opinions_by_doc_id_func))
12
+ self.__get_opinions_by_doc_id_func = get_opinions_by_doc_id_func
13
+
14
+ def iter_opinions(self, parsed_doc, existed_opinions=None):
15
+ assert(isinstance(parsed_doc, ParsedDocument))
16
+ return self.__get_opinions_by_doc_id_func(parsed_doc.RelatedDocID)
@@ -0,0 +1,55 @@
1
+ import logging
2
+
3
+ from arekit.common.docs.parsed.base import ParsedDocument
4
+ from arekit.common.opinions.annot.algo.base import BaseOpinionAnnotationAlgorithm
5
+ from arekit.common.opinions.annot.base import BaseOpinionAnnotator
6
+ from arekit.common.opinions.collection import OpinionCollection
7
+
8
+ logger = logging.getLogger(__name__)
9
+ logging.basicConfig(level=logging.INFO)
10
+
11
+
12
+ class AlgorithmBasedOpinionAnnotator(BaseOpinionAnnotator):
13
+ """ Algorithm-based annotator
14
+ """
15
+
16
+ def __init__(self, annot_algo, create_empty_collection_func, get_doc_existed_opinions_func=None):
17
+ """ create_empty_collection_func: func
18
+ function that creates an empty OpinionCollection
19
+ get_doc_existed_opinions_func: func or None
20
+ function that provides existed opinions for a document;
21
+ if None, then we consider an absence of the existed document-level opinions.
22
+ """
23
+ assert(isinstance(annot_algo, BaseOpinionAnnotationAlgorithm))
24
+ assert(callable(get_doc_existed_opinions_func) or get_doc_existed_opinions_func is None)
25
+ super(AlgorithmBasedOpinionAnnotator, self).__init__()
26
+
27
+ self.__annot_algo = annot_algo
28
+ self.__create_empty_collection_func = create_empty_collection_func
29
+ self.__get_existed_opinions_func = (lambda _: None) \
30
+ if get_doc_existed_opinions_func is None else get_doc_existed_opinions_func
31
+
32
+ # region private methods
33
+
34
+ def _annot_collection_core(self, parsed_doc):
35
+ assert(isinstance(parsed_doc, ParsedDocument))
36
+
37
+ opinions = self.__get_existed_opinions_func(parsed_doc.RelatedDocID)
38
+ assert(isinstance(opinions, OpinionCollection) or opinions is None)
39
+
40
+ annotated_opinions_it = self.__annot_algo.iter_opinions(
41
+ parsed_doc=parsed_doc, existed_opinions=opinions)
42
+
43
+ collection = self.__create_empty_collection_func()
44
+ assert(isinstance(collection, OpinionCollection))
45
+
46
+ # Filling. Keep all the opinions without duplications.
47
+ for opinion in annotated_opinions_it:
48
+ if collection.has_synonymous_opinion(opinion):
49
+ continue
50
+ collection.add_opinion(opinion)
51
+
52
+ return collection
53
+
54
+ # endregion
55
+
@@ -0,0 +1,15 @@
1
+ class BaseOpinionAnnotator(object):
2
+ """
3
+ Performs annotation for a particular data_type
4
+ using OpinOps and DocOps API.
5
+ """
6
+
7
+ def _annot_collection_core(self, parsed_doc):
8
+ raise NotImplementedError
9
+
10
+ # region public methods
11
+
12
+ def annotate_collection(self, parsed_doc):
13
+ return self._annot_collection_core(parsed_doc=parsed_doc)
14
+
15
+ # endregion
@@ -0,0 +1,74 @@
1
+ from arekit.common.labels.base import Label
2
+ from arekit.common.opinions.enums import OpinionEndTypes
3
+ from arekit.common.synonyms.base import SynonymsCollection
4
+
5
+
6
+ class Opinion(object):
7
+ """ Source opinion description
8
+ """
9
+
10
+ def __init__(self, source_value, target_value, label):
11
+ assert(isinstance(source_value, str))
12
+ assert(isinstance(target_value, str))
13
+ assert(isinstance(label, Label))
14
+ self.__source_value = source_value
15
+ self.__target_value = target_value
16
+ self.__label = label
17
+ self.__tag = None
18
+
19
+ # region properties
20
+
21
+ @property
22
+ def SourceValue(self):
23
+ return self.__source_value
24
+
25
+ @property
26
+ def TargetValue(self):
27
+ return self.__target_value
28
+
29
+ @property
30
+ def Label(self):
31
+ return self.__label
32
+
33
+ @property
34
+ def Tag(self):
35
+ return self.__tag
36
+
37
+ # endregion
38
+
39
+ def __get_end_synonym_inds(self, synonyms):
40
+ s_ind = synonyms.get_synonym_group_index(self.__source_value)
41
+ t_ind = synonyms.get_synonym_group_index(self.__target_value)
42
+ return s_ind, t_ind
43
+
44
+ # region public methods
45
+
46
+ def get_value(self, end_type):
47
+ assert(isinstance(end_type, OpinionEndTypes))
48
+
49
+ if end_type == OpinionEndTypes.Source:
50
+ return self.SourceValue
51
+
52
+ if end_type == OpinionEndTypes.Target:
53
+ return self.TargetValue
54
+
55
+ raise Exception("Unknown end_type='{e_type}'".format(e_type=end_type))
56
+
57
+ def set_tag(self, value):
58
+ self.__tag = value
59
+
60
+ def is_loop(self, synonyms):
61
+ s_ind, t_ind = self.__get_end_synonym_inds(synonyms)
62
+ return s_ind == t_ind
63
+
64
+ def create_synonym_id(self, synonyms):
65
+ assert(isinstance(synonyms, SynonymsCollection))
66
+ s_ind, t_ind = self.__get_end_synonym_inds(synonyms)
67
+ return "{}_{}".format(s_ind, t_ind)
68
+
69
+ def has_synonym_for_end(self, synonyms, end_type):
70
+ assert(isinstance(synonyms, SynonymsCollection))
71
+ assert(isinstance(end_type, OpinionEndTypes))
72
+ return synonyms.contains_synonym_value(self.get_value(end_type))
73
+
74
+ # endregion
@@ -0,0 +1,150 @@
1
+ from collections.abc import Iterable
2
+
3
+ from arekit.common import log_utils
4
+ from arekit.common.labels.base import Label
5
+ from arekit.common.opinions.base import Opinion
6
+ from arekit.common.opinions.enums import OpinionEndTypes
7
+ from arekit.common.synonyms.base import SynonymsCollection
8
+
9
+
10
+ class OpinionCollection(object):
11
+ """
12
+ Document-level Collection of labeled opinions
13
+ """
14
+
15
+ def __init__(self, synonyms,
16
+ opinions=None,
17
+ error_on_duplicates=True,
18
+ error_on_synonym_end_missed=True):
19
+ """
20
+ opinions: list
21
+ list of opinions
22
+ synonyms: SynonymsCollection
23
+ raise_exception_on_duplicates: bool
24
+ denotes whether there is a need to fire exception for duplicates in opinions list.
25
+ """
26
+ assert(isinstance(opinions, Iterable) or isinstance(opinions, type(None)))
27
+ assert(isinstance(synonyms, SynonymsCollection))
28
+ assert(isinstance(error_on_duplicates, bool))
29
+ assert(isinstance(error_on_synonym_end_missed, bool))
30
+
31
+ self.__by_synonyms = {}
32
+ self.__ordered_opinion_keys = []
33
+ self.__synonyms = synonyms
34
+ self.__error_on_duplicates = error_on_duplicates
35
+ self.__error_on_synonym_end_missed = error_on_synonym_end_missed
36
+
37
+ if opinions is None:
38
+ return
39
+
40
+ for opinion in opinions:
41
+ self.__register_opinion(
42
+ opinion=opinion,
43
+ error_on_existence=error_on_duplicates,
44
+ error_on_synonym_end_missed=error_on_synonym_end_missed)
45
+
46
+ # region public methods
47
+
48
+ def try_get_synonyms_opinion(self, opinion, label=None):
49
+ return self.__try_get_synonyms_opinion(opinion=opinion, label=label)
50
+
51
+ def has_synonymous_opinion(self, opinion, label=None):
52
+ return self.__try_get_synonyms_opinion(opinion=opinion, label=label) is not None
53
+
54
+ def add_opinion(self, opinion):
55
+ assert(isinstance(opinion, Opinion))
56
+ self.__register_opinion(opinion=opinion,
57
+ error_on_existence=True,
58
+ error_on_synonym_end_missed=True)
59
+
60
+ # endregion
61
+
62
+ # region private methods
63
+
64
+ def __try_get_synonyms_opinion(self, opinion, label=None):
65
+ assert(isinstance(opinion, Opinion))
66
+ assert(label is None or isinstance(label, Label))
67
+
68
+ for end_type in OpinionEndTypes:
69
+ if not opinion.has_synonym_for_end(synonyms=self.__synonyms, end_type=end_type):
70
+ return None
71
+
72
+ s_id = opinion.create_synonym_id(self.__synonyms)
73
+ if s_id not in self.__by_synonyms:
74
+ return None
75
+
76
+ f_o = self.__by_synonyms[s_id]
77
+ if label is None:
78
+ return f_o
79
+ elif f_o.Label == label:
80
+ return f_o
81
+ else:
82
+ return None
83
+
84
+ def __add_synonym(self, value):
85
+ self.__synonyms.add_synonym_value(value)
86
+
87
+ def __register_opinion(self, opinion,
88
+ error_on_existence,
89
+ error_on_synonym_end_missed,
90
+ show_duplications=False):
91
+ assert(isinstance(error_on_existence, bool))
92
+ assert(isinstance(error_on_synonym_end_missed, bool))
93
+
94
+ for end_type in OpinionEndTypes:
95
+ value = opinion.get_value(end_type)
96
+ if opinion.has_synonym_for_end(synonyms=self.__synonyms, end_type=end_type):
97
+ # OK.
98
+ continue
99
+ if not self.__synonyms.IsReadOnly:
100
+ # OK. Registering new synonyms as it is possible.
101
+ self.__add_synonym(value)
102
+ continue
103
+
104
+ log_utils.log_synonym_for_entity_does_not_exist(
105
+ entity_value=value,
106
+ end_type=end_type,
107
+ raise_exception=error_on_synonym_end_missed)
108
+
109
+ # Rejecting.
110
+ return False
111
+
112
+ if opinion.is_loop(self.__synonyms):
113
+ # Ignoring loops.
114
+ return False
115
+
116
+ key = opinion.create_synonym_id(self.__synonyms)
117
+
118
+ assert(isinstance(key, str))
119
+ if key in self.__by_synonyms:
120
+
121
+ log_utils.log_opinion_already_exist(opinion=opinion,
122
+ raise_exception=error_on_existence,
123
+ display_log=show_duplications)
124
+
125
+ # Rejecting.
126
+ return False
127
+
128
+ # Perform registration.
129
+ self.__by_synonyms[key] = opinion
130
+ self.__ordered_opinion_keys.append(key)
131
+
132
+ return True
133
+
134
+ # endregion
135
+
136
+ # region base methods
137
+
138
+ def __len__(self):
139
+ return len(self.__by_synonyms)
140
+
141
+ def __iter__(self):
142
+ for key in self.__ordered_opinion_keys:
143
+ yield self.__by_synonyms[key]
144
+
145
+ def __getitem__(self, item):
146
+ assert(isinstance(item, int))
147
+ key = self.__ordered_opinion_keys[item]
148
+ return self.__by_synonyms[key]
149
+
150
+ # endregion
@@ -0,0 +1,6 @@
1
+ from enum import Enum
2
+
3
+
4
+ class OpinionEndTypes(Enum):
5
+ Source = 1
6
+ Target = 2
@@ -0,0 +1,4 @@
1
+ class OpinionCollectionsProvider(object):
2
+
3
+ def iter_opinions(self, source, encoding, labels_formatter, error_on_non_supported):
4
+ raise NotImplementedError()
@@ -0,0 +1,4 @@
1
+ class OpinionCollectionWriter(object):
2
+
3
+ def serialize(self, collection, target, encoding, labels_formatter, error_on_non_supported):
4
+ raise NotImplementedError()
File without changes
@@ -0,0 +1,25 @@
1
+ from arekit.common.pipeline.context import PipelineContext
2
+ from arekit.common.pipeline.items.base import BasePipelineItem
3
+
4
+
5
+ class BasePipeline(object):
6
+
7
+ def __init__(self, pipeline):
8
+ assert(isinstance(pipeline, list))
9
+ self.__pipeline = pipeline
10
+
11
+ def run(self, input_data, params_dict=None, parent_ctx=None):
12
+ assert(isinstance(params_dict, dict) or params_dict is None)
13
+
14
+ pipeline_ctx = PipelineContext(d=params_dict if params_dict is not None else dict(),
15
+ parent_ctx=parent_ctx)
16
+
17
+ for item in filter(lambda itm: itm is not None, self.__pipeline):
18
+ assert(isinstance(item, BasePipelineItem))
19
+ input_data = item.apply(input_data=input_data, pipeline_ctx=pipeline_ctx)
20
+
21
+ return input_data
22
+
23
+ def append(self, item):
24
+ assert(isinstance(item, BasePipelineItem))
25
+ self.__pipeline.append(item)
@@ -0,0 +1,36 @@
1
+ from arekit.common.pipeline.conts import PARENT_CTX
2
+
3
+
4
+ class PipelineContext(object):
5
+ """ Context of parameters utilized in pipeline
6
+ """
7
+
8
+ def __init__(self, d, parent_ctx=None):
9
+ assert(isinstance(d, dict))
10
+ assert(isinstance(parent_ctx, PipelineContext) or parent_ctx is None)
11
+ assert(PARENT_CTX not in d)
12
+ self._d = d
13
+ self._d[PARENT_CTX] = parent_ctx
14
+
15
+ def __provide(self, param):
16
+ return self._d[param]
17
+
18
+ # region public
19
+
20
+ def provide(self, param):
21
+ return self.__provide(param)
22
+
23
+ def provide_or_none(self, param):
24
+ return self.__provide(param) if param in self._d else None
25
+
26
+ def update(self, param, value):
27
+ self._d[param] = value
28
+
29
+ # endregion
30
+
31
+ # region base methods
32
+
33
+ def __contains__(self, item):
34
+ return item in self._d
35
+
36
+ # endregion
@@ -0,0 +1,2 @@
1
+ # Reference to the parent pipeline context.
2
+ PARENT_CTX = "parent_ctx"
File without changes
@@ -0,0 +1,12 @@
1
+ class BasePipelineItem(object):
2
+ """ Single pipeline item that might be instatiated and embedded into pipeline.
3
+ """
4
+
5
+ def apply_core(self, input_data, pipeline_ctx):
6
+ raise NotImplementedError()
7
+
8
+ def apply(self, input_data, pipeline_ctx=None):
9
+ """ Performs input processing an update it for a further pipeline items.
10
+ """
11
+ output_data = self.apply_core(input_data=input_data, pipeline_ctx=pipeline_ctx)
12
+ return output_data if output_data is not None else input_data
@@ -0,0 +1,14 @@
1
+ from arekit.common.pipeline.items.base import BasePipelineItem
2
+
3
+
4
+ class FlattenIterPipelineItem(BasePipelineItem):
5
+ """ Considered to flat iterations of items that represent iterations.
6
+ """
7
+
8
+ def __flat_iter(self, iter_data):
9
+ for iter_item in iter_data:
10
+ for item in iter_item:
11
+ yield item
12
+
13
+ def apply_core(self, input_data, pipeline_ctx):
14
+ return self.__flat_iter(input_data)
@@ -0,0 +1,17 @@
1
+ from arekit.common.pipeline.items.base import BasePipelineItem
2
+
3
+
4
+ class HandleIterPipelineItem(BasePipelineItem):
5
+
6
+ def __init__(self, handle_func=None):
7
+ assert(callable(handle_func))
8
+ self.__handle_func = handle_func
9
+
10
+ def __updated_data(self, items_iter):
11
+ for item in items_iter:
12
+ # Perform item handling
13
+ self.__handle_func(item)
14
+ yield item
15
+
16
+ def apply_core(self, input_data, pipeline_ctx):
17
+ return self.__updated_data(input_data)
@@ -0,0 +1,11 @@
1
+ from arekit.common.pipeline.items.base import BasePipelineItem
2
+
3
+
4
+ class FilterPipelineItem(BasePipelineItem):
5
+
6
+ def __init__(self, filter_func=None):
7
+ assert(callable(filter_func))
8
+ self.__filter_func = filter_func
9
+
10
+ def apply_core(self, input_data, pipeline_ctx):
11
+ return filter(self.__filter_func, input_data)