arekit 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. arekit/common/docs/entities_grouping.py +2 -1
  2. arekit/common/docs/parser.py +52 -20
  3. arekit/common/pipeline/base.py +12 -16
  4. arekit/common/pipeline/batching.py +28 -0
  5. arekit/common/pipeline/context.py +5 -1
  6. arekit/common/pipeline/items/base.py +38 -1
  7. arekit/common/pipeline/items/flatten.py +5 -1
  8. arekit/common/pipeline/items/handle.py +2 -1
  9. arekit/common/pipeline/items/iter.py +2 -1
  10. arekit/common/pipeline/items/map.py +2 -1
  11. arekit/common/pipeline/items/map_nested.py +4 -0
  12. arekit/common/pipeline/utils.py +32 -0
  13. arekit/common/service/sqlite.py +36 -0
  14. arekit/common/text/{partitioning/str.py → partitioning.py} +14 -9
  15. arekit/common/utils.py +0 -44
  16. arekit/contrib/utils/data/contents/opinions.py +13 -3
  17. arekit/contrib/utils/data/readers/sqlite.py +14 -0
  18. arekit/contrib/utils/data/storages/row_cache.py +6 -1
  19. arekit/contrib/utils/data/storages/sqlite_based.py +17 -0
  20. arekit/contrib/utils/data/writers/sqlite_native.py +4 -0
  21. arekit/contrib/utils/io_utils/utils.py +1 -18
  22. arekit/contrib/utils/pipelines/items/sampling/base.py +7 -12
  23. arekit/contrib/utils/pipelines/items/sampling/networks.py +3 -2
  24. arekit/contrib/utils/pipelines/items/text/entities_default.py +2 -2
  25. arekit/contrib/utils/pipelines/items/text/frames.py +2 -3
  26. arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +2 -2
  27. arekit/contrib/utils/pipelines/items/text/frames_negation.py +2 -1
  28. arekit/contrib/utils/pipelines/items/text/tokenizer.py +2 -4
  29. arekit/contrib/utils/pipelines/items/text/translator.py +2 -1
  30. arekit/contrib/utils/pipelines/text_opinion/extraction.py +6 -9
  31. arekit/contrib/utils/serializer.py +1 -2
  32. arekit-0.25.0.data/data/logo.png +0 -0
  33. arekit-0.25.0.dist-info/METADATA +82 -0
  34. {arekit-0.24.0.dist-info → arekit-0.25.0.dist-info}/RECORD +38 -153
  35. {arekit-0.24.0.dist-info → arekit-0.25.0.dist-info}/WHEEL +1 -1
  36. arekit/common/docs/objects_parser.py +0 -37
  37. arekit/common/text/parser.py +0 -12
  38. arekit/common/text/partitioning/base.py +0 -4
  39. arekit/common/text/partitioning/terms.py +0 -35
  40. arekit/contrib/source/__init__.py +0 -0
  41. arekit/contrib/source/brat/__init__.py +0 -0
  42. arekit/contrib/source/brat/annot.py +0 -84
  43. arekit/contrib/source/brat/doc.py +0 -28
  44. arekit/contrib/source/brat/entities/__init__.py +0 -0
  45. arekit/contrib/source/brat/entities/compound.py +0 -13
  46. arekit/contrib/source/brat/entities/entity.py +0 -42
  47. arekit/contrib/source/brat/entities/parser.py +0 -53
  48. arekit/contrib/source/brat/opinions/__init__.py +0 -0
  49. arekit/contrib/source/brat/opinions/converter.py +0 -19
  50. arekit/contrib/source/brat/relation.py +0 -32
  51. arekit/contrib/source/brat/sentence.py +0 -69
  52. arekit/contrib/source/brat/sentences_reader.py +0 -128
  53. arekit/contrib/source/download.py +0 -41
  54. arekit/contrib/source/nerel/__init__.py +0 -0
  55. arekit/contrib/source/nerel/entities.py +0 -55
  56. arekit/contrib/source/nerel/folding/__init__.py +0 -0
  57. arekit/contrib/source/nerel/folding/fixed.py +0 -74
  58. arekit/contrib/source/nerel/io_utils.py +0 -62
  59. arekit/contrib/source/nerel/labels.py +0 -241
  60. arekit/contrib/source/nerel/reader.py +0 -46
  61. arekit/contrib/source/nerel/utils.py +0 -24
  62. arekit/contrib/source/nerel/versions.py +0 -12
  63. arekit/contrib/source/nerelbio/__init__.py +0 -0
  64. arekit/contrib/source/nerelbio/io_utils.py +0 -62
  65. arekit/contrib/source/nerelbio/labels.py +0 -265
  66. arekit/contrib/source/nerelbio/reader.py +0 -8
  67. arekit/contrib/source/nerelbio/versions.py +0 -8
  68. arekit/contrib/source/ruattitudes/__init__.py +0 -0
  69. arekit/contrib/source/ruattitudes/collection.py +0 -36
  70. arekit/contrib/source/ruattitudes/doc.py +0 -51
  71. arekit/contrib/source/ruattitudes/doc_brat.py +0 -44
  72. arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
  73. arekit/contrib/source/ruattitudes/entity/parser.py +0 -7
  74. arekit/contrib/source/ruattitudes/io_utils.py +0 -56
  75. arekit/contrib/source/ruattitudes/labels_fmt.py +0 -12
  76. arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
  77. arekit/contrib/source/ruattitudes/opinions/base.py +0 -28
  78. arekit/contrib/source/ruattitudes/opinions/converter.py +0 -37
  79. arekit/contrib/source/ruattitudes/reader.py +0 -268
  80. arekit/contrib/source/ruattitudes/sentence.py +0 -73
  81. arekit/contrib/source/ruattitudes/synonyms.py +0 -17
  82. arekit/contrib/source/ruattitudes/text_object.py +0 -59
  83. arekit/contrib/source/rusentiframes/__init__.py +0 -0
  84. arekit/contrib/source/rusentiframes/collection.py +0 -157
  85. arekit/contrib/source/rusentiframes/effect.py +0 -24
  86. arekit/contrib/source/rusentiframes/io_utils.py +0 -19
  87. arekit/contrib/source/rusentiframes/labels_fmt.py +0 -22
  88. arekit/contrib/source/rusentiframes/polarity.py +0 -35
  89. arekit/contrib/source/rusentiframes/role.py +0 -15
  90. arekit/contrib/source/rusentiframes/state.py +0 -24
  91. arekit/contrib/source/rusentiframes/types.py +0 -42
  92. arekit/contrib/source/rusentiframes/value.py +0 -2
  93. arekit/contrib/source/rusentrel/__init__.py +0 -0
  94. arekit/contrib/source/rusentrel/const.py +0 -3
  95. arekit/contrib/source/rusentrel/docs_reader.py +0 -51
  96. arekit/contrib/source/rusentrel/entities.py +0 -26
  97. arekit/contrib/source/rusentrel/io_utils.py +0 -125
  98. arekit/contrib/source/rusentrel/labels_fmt.py +0 -12
  99. arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
  100. arekit/contrib/source/rusentrel/opinions/collection.py +0 -30
  101. arekit/contrib/source/rusentrel/opinions/converter.py +0 -40
  102. arekit/contrib/source/rusentrel/opinions/provider.py +0 -54
  103. arekit/contrib/source/rusentrel/opinions/writer.py +0 -42
  104. arekit/contrib/source/rusentrel/synonyms.py +0 -17
  105. arekit/contrib/source/sentinerel/__init__.py +0 -0
  106. arekit/contrib/source/sentinerel/entities.py +0 -52
  107. arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
  108. arekit/contrib/source/sentinerel/folding/factory.py +0 -31
  109. arekit/contrib/source/sentinerel/folding/fixed.py +0 -70
  110. arekit/contrib/source/sentinerel/io_utils.py +0 -87
  111. arekit/contrib/source/sentinerel/labels.py +0 -53
  112. arekit/contrib/source/sentinerel/labels_scaler.py +0 -30
  113. arekit/contrib/source/sentinerel/reader.py +0 -42
  114. arekit/contrib/source/synonyms/__init__.py +0 -0
  115. arekit/contrib/source/synonyms/utils.py +0 -19
  116. arekit/contrib/source/zip_utils.py +0 -47
  117. arekit/contrib/utils/connotations/__init__.py +0 -0
  118. arekit/contrib/utils/connotations/rusentiframes_sentiment.py +0 -23
  119. arekit/contrib/utils/download.py +0 -77
  120. arekit/contrib/utils/io_utils/opinions.py +0 -37
  121. arekit/contrib/utils/io_utils/samples.py +0 -79
  122. arekit/contrib/utils/lexicons/__init__.py +0 -0
  123. arekit/contrib/utils/lexicons/lexicon.py +0 -41
  124. arekit/contrib/utils/lexicons/relation.py +0 -42
  125. arekit/contrib/utils/lexicons/rusentilex.py +0 -37
  126. arekit/contrib/utils/nn/__init__.py +0 -0
  127. arekit/contrib/utils/nn/rows.py +0 -83
  128. arekit/contrib/utils/pipelines/items/text/terms_splitter.py +0 -10
  129. arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
  130. arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
  131. arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py +0 -27
  132. arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +0 -65
  133. arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +0 -60
  134. arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
  135. arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py +0 -29
  136. arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +0 -64
  137. arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +0 -79
  138. arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
  139. arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py +0 -56
  140. arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +0 -20
  141. arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +0 -65
  142. arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
  143. arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py +0 -21
  144. arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +0 -107
  145. arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
  146. arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py +0 -29
  147. arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +0 -62
  148. arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +0 -180
  149. arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +0 -50
  150. arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +0 -88
  151. arekit/contrib/utils/resources.py +0 -25
  152. arekit/contrib/utils/sources/__init__.py +0 -0
  153. arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
  154. arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
  155. arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +0 -63
  156. arekit/download_data.py +0 -11
  157. arekit-0.24.0.dist-info/METADATA +0 -23
  158. /arekit/common/{text/partitioning → service}/__init__.py +0 -0
  159. {arekit-0.24.0.dist-info → arekit-0.25.0.dist-info}/LICENSE +0 -0
  160. {arekit-0.24.0.dist-info → arekit-0.25.0.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@ from arekit.contrib.utils.pipelines.items.sampling.base import BaseSerializerPip
8
8
 
9
9
  class NetworksInputSerializerPipelineItem(BaseSerializerPipelineItem):
10
10
 
11
- def __init__(self, save_labels_func, rows_provider, samples_io, emb_io, storage, save_embedding=True):
11
+ def __init__(self, save_labels_func, rows_provider, samples_io, emb_io, storage, save_embedding=True, **kwargs):
12
12
  """ This pipeline item allows to perform a data preparation for neural network models.
13
13
 
14
14
  considering a list of the whole data_types with the related pipelines,
@@ -23,7 +23,8 @@ class NetworksInputSerializerPipelineItem(BaseSerializerPipelineItem):
23
23
  rows_provider=rows_provider,
24
24
  samples_io=samples_io,
25
25
  save_labels_func=save_labels_func,
26
- storage=storage)
26
+ storage=storage,
27
+ **kwargs)
27
28
 
28
29
  self.__emb_io = emb_io
29
30
  self.__save_embedding = save_embedding
@@ -4,8 +4,8 @@ from arekit.common.pipeline.items.base import BasePipelineItem
4
4
 
5
5
  class TextEntitiesParser(BasePipelineItem):
6
6
 
7
- def __init__(self):
8
- super(TextEntitiesParser, self).__init__()
7
+ def __init__(self, **kwargs):
8
+ super(TextEntitiesParser, self).__init__(**kwargs)
9
9
 
10
10
  @staticmethod
11
11
  def __process_word(word):
@@ -6,11 +6,10 @@ from arekit.common.pipeline.items.base import BasePipelineItem
6
6
 
7
7
  class FrameVariantsParser(BasePipelineItem):
8
8
 
9
- def __init__(self, frame_variants):
9
+ def __init__(self, frame_variants, **kwargs):
10
10
  assert(isinstance(frame_variants, FrameVariantsCollection))
11
11
  assert(len(frame_variants) > 0)
12
-
13
- super(FrameVariantsParser, self).__init__()
12
+ super(FrameVariantsParser, self).__init__(**kwargs)
14
13
 
15
14
  self.__frame_variants = frame_variants
16
15
  self.__max_variant_len = max([len(variant) for _, variant in frame_variants.iter_variants()])
@@ -5,10 +5,10 @@ from arekit.contrib.utils.processing.languages.ru.mods import RussianLanguageMod
5
5
 
6
6
  class LemmasBasedFrameVariantsParser(FrameVariantsParser):
7
7
 
8
- def __init__(self, frame_variants, stemmer, locale_mods=RussianLanguageMods, save_lemmas=False):
8
+ def __init__(self, frame_variants, stemmer, locale_mods=RussianLanguageMods, save_lemmas=False, **kwargs):
9
9
  assert(isinstance(stemmer, Stemmer))
10
10
  assert(isinstance(save_lemmas, bool))
11
- super(LemmasBasedFrameVariantsParser, self).__init__(frame_variants=frame_variants)
11
+ super(LemmasBasedFrameVariantsParser, self).__init__(frame_variants=frame_variants, **kwargs)
12
12
 
13
13
  self.__frame_variants = frame_variants
14
14
  self.__stemmer = stemmer
@@ -7,8 +7,9 @@ from arekit.contrib.utils.processing.languages.ru.mods import RussianLanguageMod
7
7
 
8
8
  class FrameVariantsSentimentNegation(BasePipelineItem):
9
9
 
10
- def __init__(self, locale_mods=RussianLanguageMods):
10
+ def __init__(self, locale_mods=RussianLanguageMods, **kwargs):
11
11
  assert(issubclass(locale_mods, BaseLanguageMods))
12
+ super(FrameVariantsSentimentNegation, self).__init__(**kwargs)
12
13
  self._locale_mods = locale_mods
13
14
 
14
15
  @staticmethod
@@ -1,7 +1,6 @@
1
1
  import logging
2
2
 
3
3
  from arekit.common.context.token import Token
4
- from arekit.common.pipeline.context import PipelineContext
5
4
  from arekit.common.pipeline.items.base import BasePipelineItem
6
5
  from arekit.common.utils import split_by_whitespaces
7
6
  from arekit.contrib.utils.processing.text.tokens import Tokens
@@ -14,14 +13,13 @@ class DefaultTextTokenizer(BasePipelineItem):
14
13
  """ Default parser implementation.
15
14
  """
16
15
 
17
- def __init__(self, keep_tokens=True):
18
- super(DefaultTextTokenizer, self).__init__()
16
+ def __init__(self, keep_tokens=True, **kwargs):
17
+ super(DefaultTextTokenizer, self).__init__(**kwargs)
19
18
  self.__keep_tokens = keep_tokens
20
19
 
21
20
  # region protected methods
22
21
 
23
22
  def apply_core(self, input_data, pipeline_ctx):
24
- assert(isinstance(pipeline_ctx, PipelineContext))
25
23
  output_data = self.__process_parts(input_data)
26
24
  if not self.__keep_tokens:
27
25
  output_data = [word for word in output_data if not isinstance(word, Token)]
@@ -9,10 +9,11 @@ class MLTextTranslatorPipelineItem(BasePipelineItem):
9
9
  """ Machine learning based translator pipeline item.
10
10
  """
11
11
 
12
- def __init__(self, batch_translate_model, do_translate_entity=True):
12
+ def __init__(self, batch_translate_model, do_translate_entity=True, **kwargs):
13
13
  """ Model, which is based on translation of the text,
14
14
  represented as a list of words.
15
15
  """
16
+ super(MLTextTranslatorPipelineItem, self).__init__(**kwargs)
16
17
  self.__do_translate_entity = do_translate_entity
17
18
  self.__translate = batch_translate_model
18
19
 
@@ -3,12 +3,10 @@ from arekit.common.linkage.text_opinions import TextOpinionsLinkage
3
3
  from arekit.common.docs.parsed.base import ParsedDocument
4
4
  from arekit.common.docs.parsed.providers.entity_service import EntityServiceProvider
5
5
  from arekit.common.docs.parsed.service import ParsedDocumentService
6
- from arekit.common.docs.parser import DocumentParser
7
- from arekit.common.pipeline.base import BasePipeline
6
+ from arekit.common.docs.parser import DocumentParsers
8
7
  from arekit.common.pipeline.items.flatten import FlattenIterPipelineItem
9
8
  from arekit.common.pipeline.items.map import MapPipelineItem
10
9
  from arekit.common.pipeline.items.map_nested import MapNestedPipelineItem
11
- from arekit.common.text.parser import BaseTextParser
12
10
  from arekit.common.text_opinions.base import TextOpinion
13
11
  from arekit.contrib.utils.pipelines.text_opinion.filters.base import TextOpinionFilter
14
12
  from arekit.contrib.utils.pipelines.text_opinion.filters.limitation import FrameworkLimitationsTextOpinionFilter
@@ -64,9 +62,8 @@ def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
64
62
  yield MetaEmptyLinkedDataWrapper(doc_id=parsed_doc.RelatedDocID)
65
63
 
66
64
 
67
- def text_opinion_extraction_pipeline(text_parser, get_doc_by_id_func, annotators, entity_index_func,
65
+ def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotators, entity_index_func,
68
66
  text_opinion_filters=None, use_meta_between_docs=True):
69
- assert(isinstance(text_parser, BaseTextParser))
70
67
  assert(callable(get_doc_by_id_func))
71
68
  assert(isinstance(annotators, list))
72
69
  assert(isinstance(text_opinion_filters, list) or text_opinion_filters is None)
@@ -75,13 +72,13 @@ def text_opinion_extraction_pipeline(text_parser, get_doc_by_id_func, annotators
75
72
  extra_filters = [] if text_opinion_filters is None else text_opinion_filters
76
73
  actual_text_opinion_filters = [FrameworkLimitationsTextOpinionFilter()] + extra_filters
77
74
 
78
- return BasePipeline([
75
+ return [
79
76
  # (doc_id) -> (doc)
80
77
  MapPipelineItem(map_func=lambda doc_id: get_doc_by_id_func(doc_id)),
81
78
 
82
79
  # (doc, ppl_ctx) -> (parsed_doc)
83
- MapNestedPipelineItem(map_func=lambda doc, ppl_ctx: DocumentParser.parse(
84
- doc=doc, text_parser=text_parser, parent_ppl_ctx=ppl_ctx)),
80
+ MapNestedPipelineItem(map_func=lambda doc, ppl_ctx: DocumentParsers.parse(
81
+ doc=doc, pipeline_items=pipeline_items, parent_ppl_ctx=ppl_ctx)),
85
82
 
86
83
  # (parsed_doc) -> (text_opinions)
87
84
  MapPipelineItem(map_func=lambda parsed_doc: __iter_text_opinion_linkages(
@@ -90,4 +87,4 @@ def text_opinion_extraction_pipeline(text_parser, get_doc_by_id_func, annotators
90
87
 
91
88
  # linkages[] -> linkages
92
89
  FlattenIterPipelineItem()
93
- ])
90
+ ]
@@ -7,7 +7,6 @@ from arekit.common.data.input.providers.rows.base import BaseRowProvider
7
7
  from arekit.common.data.input.repositories.base import BaseInputRepository
8
8
  from arekit.common.data.input.repositories.sample import BaseInputSamplesRepository
9
9
  from arekit.common.data.storages.base import BaseRowsStorage
10
- from arekit.common.pipeline.base import BasePipeline
11
10
  from arekit.contrib.utils.data.contents.opinions import InputTextOpinionProvider
12
11
 
13
12
  logger = logging.getLogger(__name__)
@@ -28,7 +27,7 @@ class InputDataSerializationHelper(object):
28
27
 
29
28
  @staticmethod
30
29
  def fill_and_write(pipeline, repo, target, writer, doc_ids_iter, desc=""):
31
- assert(isinstance(pipeline, BasePipeline))
30
+ assert(isinstance(pipeline, list))
32
31
  assert(isinstance(doc_ids_iter, Iterable))
33
32
  assert(isinstance(repo, BaseInputRepository))
34
33
 
Binary file
@@ -0,0 +1,82 @@
1
+ Metadata-Version: 2.1
2
+ Name: arekit
3
+ Version: 0.25.0
4
+ Summary: Document level Attitude and Relation Extraction toolkit (AREkit) for sampling and prompting mass-media news into datasets for ML-model training
5
+ Home-page: https://github.com/nicolay-r/AREkit
6
+ Author: Nicolay Rusnachenko
7
+ Author-email: rusnicolay@gmail.com
8
+ License: MIT License
9
+ Keywords: natural language processing,relation extraction,sentiment analysis
10
+ Classifier: Programming Language :: Python
11
+ Classifier: Programming Language :: Python :: 3.6
12
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
13
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
14
+ Classifier: Topic :: Text Processing :: Linguistic
15
+ Requires-Python: >=3.6
16
+ Description-Content-Type: text/markdown
17
+ License-File: LICENSE
18
+ Requires-Dist: tqdm
19
+ Requires-Dist: enum34==1.1.10
20
+ Requires-Dist: numpy>=1.14.5
21
+ Requires-Dist: pymystem3==0.2.0
22
+
23
+ # AREkit 0.25.0
24
+
25
+ ![](https://img.shields.io/badge/Python-3.9+-brightgreen.svg)
26
+
27
+ <p align="center">
28
+ <img src="logo.png"/>
29
+ </p>
30
+
31
+ **AREkit** (Attitude and Relation Extraction Toolkit) --
32
+ is a python toolkit, devoted to document level Attitude and Relation Extraction between text objects from mass-media news.
33
+
34
+ ## Description
35
+
36
+
37
+ This toolkit aims at memory-effective data processing in Relation Extraction (RE) related tasks.
38
+
39
+ <p align="center">
40
+ <img src="docs/arekit-pipeline-concept.png"/>
41
+ </p>
42
+
43
+ > Figure: AREkit pipelines design. More on
44
+ > **[ARElight: Context Sampling of Large Texts for Deep Learning Relation Extraction](https://link.springer.com/chapter/10.1007/978-3-031-56069-9_23)** paper
45
+
46
+ In particular, this framework serves the following features:
47
+ * ➿ [pipelines](https://github.com/nicolay-r/AREkit/wiki/Pipelines:-Text-Opinion-Annotation) and iterators for handling large-scale collections serialization without out-of-memory issues.
48
+ * 🔗 EL (entity-linking) API support for objects,
49
+ * ➰ avoidance of cyclic connections,
50
+ * :straight_ruler: distance consideration between relation participants (in `terms` or `sentences`),
51
+ * 📑 relations annotations and filtering rules,
52
+ * *️⃣ entities formatting or masking, and more.
53
+
54
+ The core functionality includes:
55
+ * API for document presentation with EL (Entity Linking, i.e. Object Synonymy) support
56
+ for sentence level relations preparation (dubbed as contexts);
57
+ * API for contexts extraction;
58
+ * Relations transferring from sentence-level onto document-level, and more.
59
+
60
+ ## Installation
61
+
62
+ ```bash
63
+ pip install git+https://github.com/nicolay-r/AREkit.git@0.25.0-rc
64
+ ```
65
+
66
+ ## Usage
67
+
68
+ Please follow the **[tutorial section on project Wiki](https://github.com/nicolay-r/AREkit/wiki/Tutorials)** for mode details.
69
+
70
+ ## How to cite
71
+ A great research is also accompanied by the faithful reference.
72
+ if you use or extend our work, please cite as follows:
73
+
74
+ ```bibtex
75
+ @inproceedings{rusnachenko2024arelight,
76
+ title={ARElight: Context Sampling of Large Texts for Deep Learning Relation Extraction},
77
+ author={Rusnachenko, Nicolay and Liang, Huizhi and Kolomeets, Maxim and Shi, Lei},
78
+ booktitle={European Conference on Information Retrieval},
79
+ year={2024},
80
+ organization={Springer}
81
+ }
82
+ ```
@@ -1,9 +1,8 @@
1
1
  arekit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- arekit/download_data.py,sha256=4hrNTLk2j6w-p7SZzBGsMh4V3OHvJGhFvw_hsZ-iu8A,194
3
2
  arekit/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
3
  arekit/common/bound.py,sha256=lPpHY6ct_CU9e4qXeYjhJfWbTj6Sb_NVtZ1CJheQPNE,1402
5
4
  arekit/common/log_utils.py,sha256=OfEQxbExkuRAl9dxlgFEqcFhI4HHoMYT7WE8ud0IPOM,924
6
- arekit/common/utils.py,sha256=iHUIeiGRY2tyXK0ac3vpdEfwLBJqfrrB6TDJClEmy_k,3880
5
+ arekit/common/utils.py,sha256=eVRGhRy882ow-63Glncc3pJ-_43KSI0ukBePjC8ogAY,2394
7
6
  arekit/common/context/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
7
  arekit/common/context/terms_mapper.py,sha256=QA02Cv7D2JKTlXkez_0w0J8HuvNziNF2vrqLgy4Bwc8,1447
9
8
  arekit/common/context/token.py,sha256=CpWAlvprUnJfCtYvO8lwdfU_ofSKAOGOudXTwppyzSk,459
@@ -45,10 +44,9 @@ arekit/common/data/views/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
45
44
  arekit/common/data/views/samples.py,sha256=LDqUDqArGt90ujRB4kDFgDHLmR2_AQoUnzhxpXYWYaM,882
46
45
  arekit/common/docs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
46
  arekit/common/docs/base.py,sha256=uXUOtpR9BEsDBfDHg4eLqOjfSVOV_o9VPii3nSxLZuY,734
48
- arekit/common/docs/entities_grouping.py,sha256=AxNQMDKCqGVV8tiT05lZ7gA7KG4mPrWJAqGQJl0DZ58,625
47
+ arekit/common/docs/entities_grouping.py,sha256=_r254fNr0j6BjHuLZBLjj21yWm4_k__5aOcBXcAaQUQ,704
49
48
  arekit/common/docs/entity.py,sha256=TxrZMdIEgjk-PgCyskCkVis2KAw_M7vTBp3ppP6G05M,662
50
- arekit/common/docs/objects_parser.py,sha256=srsykElofnztKdnhdLCOp8xaEoERmqsmBcAy682-4Hk,1168
51
- arekit/common/docs/parser.py,sha256=26nOcfXzfsoYCwxjBms_qBtO4T_b66G4nLvmNJeSZmE,1505
49
+ arekit/common/docs/parser.py,sha256=514lQNrZiwU_mxgyuWBkDhqjS5SVAvcIHx9GQUTuVG8,2883
52
50
  arekit/common/docs/sentence.py,sha256=nZCCFj2yk71POoXCBfEMN3pteM2qQdj60eEzxMVY_3k,302
53
51
  arekit/common/docs/parsed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
52
  arekit/common/docs/parsed/base.py,sha256=WPstqOpBuLKjtz6UO_bI0DpOPF3Sm0wYEVwjtldbPXE,3175
@@ -111,28 +109,28 @@ arekit/common/opinions/annot/algo/base.py,sha256=ymll-4-SplCY7CLswjOZEC1vsVHIEzU
111
109
  arekit/common/opinions/annot/algo/pair_based.py,sha256=HbYn1mAsn5g11NiC9pfrMqNtJn_GzvqPFGpafMqqB2o,4419
112
110
  arekit/common/opinions/annot/algo/predefined.py,sha256=zU39SADPKnykHCNB-Bmn_0bvd6gYWWYmfgfi-68hHSs,741
113
111
  arekit/common/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
- arekit/common/pipeline/base.py,sha256=aTk0xbaZadTKyeRFzoT_WGLbHL0LVcuHTEKfxD6CpNU,917
115
- arekit/common/pipeline/context.py,sha256=ueah-0cOOGJEtzkEm07FAWvNZLKrW_bOPILrY0swPdU,853
112
+ arekit/common/pipeline/base.py,sha256=8TgWNy5QrnKEp1bq3lhyGSgIfYe5ZIZU3c-DYBJ9LPA,957
113
+ arekit/common/pipeline/batching.py,sha256=DdOvOladOo2aEv3JZ8NQnCvsNGcWk4TFzENrZqTGyXk,1239
114
+ arekit/common/pipeline/context.py,sha256=Fw25lBVakHNAXjtkdEqopR-Jh59cDKGWD2jCJxBrj7Y,1126
116
115
  arekit/common/pipeline/conts.py,sha256=NAQNsHt1kK3HnxWv3M6yXi0c7C6Mx6ZZ6KZc0yE0eas,70
116
+ arekit/common/pipeline/utils.py,sha256=5VqH1LtRa4tYUbyiRvWdBmP4biFhTKq9vhr8QiRFFkY,882
117
117
  arekit/common/pipeline/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
118
- arekit/common/pipeline/items/base.py,sha256=hWW1m4dTIsbodq27P29IGOpN1KOdubtsqOEOSyAlmTw,516
119
- arekit/common/pipeline/items/flatten.py,sha256=2zGu56AEkkH2SJJjNwBA4uPsfx_99zNaQp0eaWhOhWk,429
120
- arekit/common/pipeline/items/handle.py,sha256=OGL40tfonB3r0qegfKVjedsSC1922AHryuvipg0_bxM,505
121
- arekit/common/pipeline/items/iter.py,sha256=NLV0c8Jw-sAvde2MLAAiM10fRw7Hk2OiaksZy6cUNq4,337
122
- arekit/common/pipeline/items/map.py,sha256=WA3bBn9_rtsXceEkKUmEbae--fR1V1ui0Yvs0qSTYfM,315
123
- arekit/common/pipeline/items/map_nested.py,sha256=x_zUOKiCQqKA9JpuFTZnfuTaTBlPJuXgmHXkhfyk5VY,520
118
+ arekit/common/pipeline/items/base.py,sha256=dWIZVGJjYuURLCiZj8YQHWtsS725SOi9SPZaCPV7NvI,1694
119
+ arekit/common/pipeline/items/flatten.py,sha256=9T4jWqPGv4UDxajlM0Nm0-gvwUgqqYB8XH0efTum9a0,542
120
+ arekit/common/pipeline/items/handle.py,sha256=QS5Byj7-o5jmFi0ag58NE3zm2-JzVIunIgc3Pn1ij6g,578
121
+ arekit/common/pipeline/items/iter.py,sha256=Tk9WdUMPOq20s7jEWEpU4PmillnVtQ8nIa2ct7iw-3s,406
122
+ arekit/common/pipeline/items/map.py,sha256=G5wBdjaaxePD0pijrxsfpJACeP7kzj7HerjCkNIhmII,381
123
+ arekit/common/pipeline/items/map_nested.py,sha256=vs0GdJNr3qSF9p2yd1nWji5E1HGzECbvOfN2MqoHc2A,630
124
+ arekit/common/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
125
+ arekit/common/service/sqlite.py,sha256=1jLIszkcJGeT0hUos8Y0Chp3o9XRUfljG2P9q0T2_Ds,1440
124
126
  arekit/common/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
125
127
  arekit/common/synonyms/base.py,sha256=YxD-CKCjlEtar1zTdumnfC3vKgbP2wLODR9mMEwbbnA,4237
126
128
  arekit/common/synonyms/grouping.py,sha256=fi7QQbBvsTvvP2CPTesSPEsPNmGfc6euqj-HPhVvtlg,698
127
129
  arekit/common/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
128
130
  arekit/common/text/enums.py,sha256=nelEI7B-szLUtl8xds8Kw_vgK5JWg_Aj7IadEj2q_1Y,141
129
131
  arekit/common/text/parsed.py,sha256=YxGRHtozDd3sDVI3hMT_hOO7Wmsy7_zLkblfnSXeJ9g,1104
130
- arekit/common/text/parser.py,sha256=5Xy_s8KWjlrhPLy6mASkJL690-kTYb3erTrTJoiZH4k,494
132
+ arekit/common/text/partitioning.py,sha256=OL8r3-xaMafnT7FuPXDHINlA-BQgx6cLaMqm366WKCU,1153
131
133
  arekit/common/text/stemmer.py,sha256=OJ5XelxLN-7m3uLPDU9C7CWdkXDeK-xieexQN6RYLXc,341
132
- arekit/common/text/partitioning/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
133
- arekit/common/text/partitioning/base.py,sha256=R6YPCN71SZa9sQJsxEaAY6IdgFRlt7tXstrjA79ADtI,108
134
- arekit/common/text/partitioning/str.py,sha256=gHadcKtELxvrk_XurVsMk63ZSNmvKRqTMtN2OtTw_Fs,1002
135
- arekit/common/text/partitioning/terms.py,sha256=_5P-xZJOHqOwSij_bi-b0hATCmOme5a38n1fAh67iAo,969
136
134
  arekit/common/text_opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
137
135
  arekit/common/text_opinions/base.py,sha256=KootNvGAbUVCV5uFgLjK-bm9bbQSIvZUz0q9CBToGa8,3447
138
136
  arekit/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -164,94 +162,13 @@ arekit/contrib/networks/input/providers/term_connotation.py,sha256=Q90pVN4hQgYAk
164
162
  arekit/contrib/networks/input/providers/text.py,sha256=kucezKm6Ilmy5wuM2jUP5xk9zh1K1Pf8KcMd1prrp8k,917
165
163
  arekit/contrib/prompt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
164
  arekit/contrib/prompt/sample.py,sha256=MxpbDR0ww7WmdtuPu74B8R6QKVXeuzO0CKGOJIYwbRk,3164
167
- arekit/contrib/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
168
- arekit/contrib/source/download.py,sha256=O5epK1eGC-ZeLzG4Q7jJDiFfpxur-vX8X6XqDtfjswM,2438
169
- arekit/contrib/source/zip_utils.py,sha256=cS7aCPOUkTOfZyqDwI23U1Aqplq-yXW8E1ayrTQiBl0,1468
170
- arekit/contrib/source/brat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
171
- arekit/contrib/source/brat/annot.py,sha256=wrPU88mqmud0CYeHVxR3U55f_QhiWU5p6qvCJnWPSkQ,2556
172
- arekit/contrib/source/brat/doc.py,sha256=MvFu8arXvyHRsW7jsIcdryfg_dLL-SBY3RQZ5uravVo,1095
173
- arekit/contrib/source/brat/relation.py,sha256=6FA5gYcaD0_X5vzhF0ft42gzfDcq9qzDKALSBmw8N5E,733
174
- arekit/contrib/source/brat/sentence.py,sha256=q10cCwQtvXCMv2y_lSrdGH_5FzR5etsNPc50GGKO0KY,2633
175
- arekit/contrib/source/brat/sentences_reader.py,sha256=kWCAQqlIfQXP925bRLDeK9e2UusVxhGE_1IIDCLrjGQ,4677
176
- arekit/contrib/source/brat/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
177
- arekit/contrib/source/brat/entities/compound.py,sha256=GnZC2dlgMdoJecCIm-uUoQ7fppyWYiwki5ClPeVvwfY,505
178
- arekit/contrib/source/brat/entities/entity.py,sha256=rHm15x2AaK8SzkhrKTIu298N0huFiGPRlY4kGfiQYYA,1337
179
- arekit/contrib/source/brat/entities/parser.py,sha256=W9S2TmreDIAf4jdhuA3SSp_LFYCOsglRKJb5xQQ9_1E,1934
180
- arekit/contrib/source/brat/opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
181
- arekit/contrib/source/brat/opinions/converter.py,sha256=bRzzI_fEojWCGsmwx-Z_dqISeWTaR1ZBJAHcALNViWo,822
182
- arekit/contrib/source/nerel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
183
- arekit/contrib/source/nerel/entities.py,sha256=f_qgvArMdiBikjr9QZIgWfIDEp4TADpiGymqPIpwY14,2806
184
- arekit/contrib/source/nerel/io_utils.py,sha256=Nb2PwC3udoHOLVXDuIuMd8rQiVMc8z7LeGYZjCAQ1Kg,2181
185
- arekit/contrib/source/nerel/labels.py,sha256=nUjSmmR6nDvjr08jIEDWf7kCuw3NzoPwvzMqn3BFycU,2297
186
- arekit/contrib/source/nerel/reader.py,sha256=2UXtRIhhZHfI7DbBOqJ2DoUcgfOanQObOyk-tU-BPXI,2072
187
- arekit/contrib/source/nerel/utils.py,sha256=_2da8rUCsg19CChb8Aj7nh0s_tWbrCVqz7wO1bUFmCI,756
188
- arekit/contrib/source/nerel/versions.py,sha256=XYibavD7cPM8TV7DnDl_8daPg80Sb_twBpR4LtsRuqA,182
189
- arekit/contrib/source/nerel/folding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
190
- arekit/contrib/source/nerel/folding/fixed.py,sha256=jcArXQfawEtmIMYEyzwgMavIeAoyI2EzkY_zF7ZnpHs,2127
191
- arekit/contrib/source/nerelbio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
192
- arekit/contrib/source/nerelbio/io_utils.py,sha256=E_3ReImB57PoUAyxMx6xYaI-pxoCabWLRkleWeAc-X8,2208
193
- arekit/contrib/source/nerelbio/labels.py,sha256=_TYsVW9VWrCLsF3milcfjBKnE0O7Tvvs6Z-kUg8UZeU,2517
194
- arekit/contrib/source/nerelbio/reader.py,sha256=T99emDZRLyqw3EREZ_Eha3moX8UyPW1hFPCCF7-Q_k8,300
195
- arekit/contrib/source/nerelbio/versions.py,sha256=a5q_JM9hOu9btHX9pmzS-wblRs5daG_so1lX-_dTqEs,105
196
- arekit/contrib/source/ruattitudes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
197
- arekit/contrib/source/ruattitudes/collection.py,sha256=AfqmpnmWmQylGAjJcqk4hWmurfO24JdsZijlR6ygbx0,1492
198
- arekit/contrib/source/ruattitudes/doc.py,sha256=_gSyAMEIFklYll-8PU3wpatDZfwg7Gl1h2c9xRUbGFo,1262
199
- arekit/contrib/source/ruattitudes/doc_brat.py,sha256=xzP3KUb-qFgdDydgzHfhPfFHPVUlB6nX5vZSvInz0f0,2196
200
- arekit/contrib/source/ruattitudes/io_utils.py,sha256=ER0OWlDw7pdFIoATEiiXMwBxAjy3I09UXS96A4LGs5Q,1442
201
- arekit/contrib/source/ruattitudes/labels_fmt.py,sha256=rHlDijvLJAVwgs6EpaCUqZHbjjc1oW9TvhwSSgKa1hk,479
202
- arekit/contrib/source/ruattitudes/reader.py,sha256=yNxbU6qFTbWoaSjazIOlfOeDkymnKK7zcBKUxMUAh9I,10182
203
- arekit/contrib/source/ruattitudes/sentence.py,sha256=vIe-HJrd-Dtd9fBMUCPX8x3IBQrwgm2b84ZBfxaZFCE,2079
204
- arekit/contrib/source/ruattitudes/synonyms.py,sha256=l-Oh37Pqf5AhNYVW7XVlulUNoZ3tZYWUYpruKZlp1B4,596
205
- arekit/contrib/source/ruattitudes/text_object.py,sha256=zppsQdEM-ViaP1ufBhds92k_vXGTdz_TvH3Cal5A07Y,1985
206
- arekit/contrib/source/ruattitudes/entity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
207
- arekit/contrib/source/ruattitudes/entity/parser.py,sha256=XRfwL_x-TNbxrSdca4GY5ABj1wZr6HEk_kxgfbuSqlk,248
208
- arekit/contrib/source/ruattitudes/opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
209
- arekit/contrib/source/ruattitudes/opinions/base.py,sha256=tKVjOK2kZxXLSClVPYZYSTqgenTupnQfZrO2g7o9yZg,705
210
- arekit/contrib/source/ruattitudes/opinions/converter.py,sha256=4VVuLuJvpIoz4SPGVxlCDE9m8YbPcoWENV0PqijF2Mg,1560
211
- arekit/contrib/source/rusentiframes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
212
- arekit/contrib/source/rusentiframes/collection.py,sha256=BQXk5G2fazW3Bn5xX5EXP4tA8tA6HbPft5P5gO3L_ew,5822
213
- arekit/contrib/source/rusentiframes/effect.py,sha256=LqBf6e-d32MPMK7rdOEyO22lSbEv8hD9jkdyCYjUAo4,507
214
- arekit/contrib/source/rusentiframes/io_utils.py,sha256=VUdg_XkDotsqOS9gmNYIpz60Zj1ALWXlUYm82-v3QM0,476
215
- arekit/contrib/source/rusentiframes/labels_fmt.py,sha256=XsunvZaspVAqVxHzrvCdwo6fG3tDNaUpUDSybF1JSDw,876
216
- arekit/contrib/source/rusentiframes/polarity.py,sha256=PWHKdmNjyD7-EB-eYQ8h4R2TAAbybAgEut5toICPEuk,953
217
- arekit/contrib/source/rusentiframes/role.py,sha256=ri4EETg_YGygiCkoq9_waCHWhvtQwm_P4bEIF6DBrSQ,369
218
- arekit/contrib/source/rusentiframes/state.py,sha256=J9tuPf52qNgrIj91wH8NHlA11kMwU7doP8aOyvmTMvw,505
219
- arekit/contrib/source/rusentiframes/types.py,sha256=vgMK1CKdr9ytLaivD8seTYJbN4XOQxHVcvHlbEs5zAc,1348
220
- arekit/contrib/source/rusentiframes/value.py,sha256=anL8KDWY4np0oKJb_49ord3Hi_TVViQgD1Stytuli_4,34
221
- arekit/contrib/source/rusentrel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
222
- arekit/contrib/source/rusentrel/const.py,sha256=C2-PlavM4PUaCtL0-J0AVIssseDD-mmAvsTjygKvvks,72
223
- arekit/contrib/source/rusentrel/docs_reader.py,sha256=gPIzSaY5EQfRdRNOUyhWQGWgE_oIACoP3lUEj2uOjPI,2005
224
- arekit/contrib/source/rusentrel/entities.py,sha256=olw6zNo4ii_cBqe0Ns2TmYa4_QWki6aUfw3EToSNvpU,1197
225
- arekit/contrib/source/rusentrel/io_utils.py,sha256=Lxnru6PSNp_Jqti8FFaIOO7pzWqXx4GjIeow9wsG_kg,4105
226
- arekit/contrib/source/rusentrel/labels_fmt.py,sha256=-l_0RvfQoPmAYC5F89o3t8Q_9LbL2hiAjH5Ip_Rrouc,544
227
- arekit/contrib/source/rusentrel/synonyms.py,sha256=wjCPdJCL-pABYa3m7egDSYzUhsGFY9dzcZBX5b7MVC0,576
228
- arekit/contrib/source/rusentrel/opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
229
- arekit/contrib/source/rusentrel/opinions/collection.py,sha256=0rP9csiUZe8rgetS_XHFQtJJGvjhnhunprgLfzkf4sQ,1400
230
- arekit/contrib/source/rusentrel/opinions/converter.py,sha256=I1jqfTAiCs1Gm9KP5l5R7QYhAgS_LIFYO9Cfm5i6Dto,1208
231
- arekit/contrib/source/rusentrel/opinions/provider.py,sha256=oKJ3BVxrgquWJRkKuAawOy6pjSNzDOYefXaQARrefyA,1910
232
- arekit/contrib/source/rusentrel/opinions/writer.py,sha256=dJicPj61HGrxwd0iELkykhtbwTrSWKeTc1KzlOPXG2w,1631
233
- arekit/contrib/source/sentinerel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
234
- arekit/contrib/source/sentinerel/entities.py,sha256=tvy6CsvlVrtsJQkslCyA8KeLo6ihbPpxdI9l1jngNF8,2682
235
- arekit/contrib/source/sentinerel/io_utils.py,sha256=fQ5pc3FDjM7afWxObwavSX76KEucd8pIn7u9V5mFJ9M,2751
236
- arekit/contrib/source/sentinerel/labels.py,sha256=xjQw5KWN8G6LfpVufvY63WFCmrJ8p2caCZNnWqKO5vo,550
237
- arekit/contrib/source/sentinerel/labels_scaler.py,sha256=kHmooy876IAdX-ib-Ou7xVf0U3UWTDowG5uFP5DWNOY,1091
238
- arekit/contrib/source/sentinerel/reader.py,sha256=ZMdGc5pLUkmS0YsvmVR-9rMzme0cvTegVSqvo7HMNfU,2048
239
- arekit/contrib/source/sentinerel/folding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
240
- arekit/contrib/source/sentinerel/folding/factory.py,sha256=juLkpX8f9khJGYHQH2eTCMhW74A5DFFNKBFfIvXpQZQ,1176
241
- arekit/contrib/source/sentinerel/folding/fixed.py,sha256=vWDNidHdRjpPnjspzMhanmbh3S8vWJ9cszQT9mY-cqU,1772
242
- arekit/contrib/source/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
243
- arekit/contrib/source/synonyms/utils.py,sha256=j8iqGw6M3bxgdV4sGRtNj3NCnHSMokq8QSPITuKKsDU,547
244
165
  arekit/contrib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
245
- arekit/contrib/utils/download.py,sha256=Se65pImAU9qmqX1oBRERy1OnPVjBWRCHqom2tR2FWoI,2888
246
- arekit/contrib/utils/resources.py,sha256=xEENsaid0NDBNJzHJvd7M_le5T0lihHjN9DKKuBgnEg,1217
247
- arekit/contrib/utils/serializer.py,sha256=4cc9_asXjcWk-Y1lRtLJM736MYpKyZf9svYiQpdk5-M,1687
166
+ arekit/contrib/utils/serializer.py,sha256=D9LJ2ZXeVx3YntV-HqEnt32xW-s4GauwD97XRVlqr0g,1626
248
167
  arekit/contrib/utils/bert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
249
168
  arekit/contrib/utils/bert/samplers.py,sha256=ZVe3rbUAH0Jw1xR_yHE1DoUJf3CI0pDgbBQQzlLWevc,989
250
- arekit/contrib/utils/connotations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
251
- arekit/contrib/utils/connotations/rusentiframes_sentiment.py,sha256=DYxgoPiiQ0kKXx7Fy4HpozH71kXkaVwuAMU61m7BRGY,1062
252
169
  arekit/contrib/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
253
170
  arekit/contrib/utils/data/contents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
254
- arekit/contrib/utils/data/contents/opinions.py,sha256=PNoZ5Ih2qqHCvfvLprEb_3t9cixiteHDP7aTJo4BclM,1480
171
+ arekit/contrib/utils/data/contents/opinions.py,sha256=MSV7NytEe15adKhhHCq5KiCj6ZBq31nV-u2rcSfFCgE,1738
255
172
  arekit/contrib/utils/data/doc_provider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
256
173
  arekit/contrib/utils/data/doc_provider/dict_based.py,sha256=zUOiiIbj5zby4xqMb0m9N-a6enavJJ7wFmPaGErykWU,371
257
174
  arekit/contrib/utils/data/doc_provider/dir_based.py,sha256=FTw3kLV_CYtPoUoHl39IrP6RjLvTecCno9May95jVXw,1916
@@ -259,18 +176,20 @@ arekit/contrib/utils/data/readers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
259
176
  arekit/contrib/utils/data/readers/base.py,sha256=zAsZLX5ng0_gb_ysL6wQchptmBHlNgqgQilw295Y5Aw,153
260
177
  arekit/contrib/utils/data/readers/csv_pd.py,sha256=Ym49j04Z-_WQN-7xJMiiN1y2TIMnMDtPxy5h0mT3WBQ,1383
261
178
  arekit/contrib/utils/data/readers/jsonl.py,sha256=c2bHwnTfNEwb1c8B9fRwaQyeze5x3nOd2UXXAp4MbxQ,426
179
+ arekit/contrib/utils/data/readers/sqlite.py,sha256=U1138XNCIwqycNivxwzwIUnowj3jDkP4M6J_Kvyedbc,416
262
180
  arekit/contrib/utils/data/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
263
181
  arekit/contrib/utils/data/service/balance.py,sha256=PgA5B6qSPmt8ITPLsQuCkniE8-u2NO_eQ2m-U9Akh98,1547
264
182
  arekit/contrib/utils/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
265
183
  arekit/contrib/utils/data/storages/jsonl_based.py,sha256=Oj5u8aW_UtVDSDxMpIQsgMlZlV-KBD0qVHPVVT3m8nA,450
266
184
  arekit/contrib/utils/data/storages/pandas_based.py,sha256=m8z34tO_7NupYd_zQ4L1miTXJQkmMMB90zPFqEeYCNs,4301
267
- arekit/contrib/utils/data/storages/row_cache.py,sha256=NnWdtRj47xuDBPdpapIya5NnDRb7lEBEssUflMA1Cik,1814
185
+ arekit/contrib/utils/data/storages/row_cache.py,sha256=V1InYIqRf5WMWV_JndHNH9JzAjFS3ZL38f4_pDPLo_8,1985
186
+ arekit/contrib/utils/data/storages/sqlite_based.py,sha256=ARwVisVbPKBap_mVdpvTpp28iXgJbCJ3dAj41UYu03Q,609
268
187
  arekit/contrib/utils/data/writers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
269
188
  arekit/contrib/utils/data/writers/base.py,sha256=JLwf5WVl_U319sdMev8YOn4OoCcrgNIUZtrOuG1JLjI,766
270
189
  arekit/contrib/utils/data/writers/csv_native.py,sha256=7fPxYeu9YDK8Cvjp1n-sbKT63ZuhDIEv3VwghHuKk5k,2252
271
190
  arekit/contrib/utils/data/writers/csv_pd.py,sha256=WhBjDJCHUBy_TabngMF42Qicx0ye8xIus0m6c7qotto,1330
272
191
  arekit/contrib/utils/data/writers/json_opennre.py,sha256=EkhXmONgtMe7A9VKrs9ElFHc8RoMumjFbkKfwuOVOoU,5067
273
- arekit/contrib/utils/data/writers/sqlite_native.py,sha256=xzO-DyZYzdwld5SZbGWWa7EDz87LJnIXFmBmItccQUU,4336
192
+ arekit/contrib/utils/data/writers/sqlite_native.py,sha256=MnbLU8iPvYvpYgEbOXhBKH_G8DJs0W9iSuhr_TPKBAQ,4601
274
193
  arekit/contrib/utils/embeddings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
275
194
  arekit/contrib/utils/embeddings/rusvectores.py,sha256=WA0HejE2U5kgeBvh4_vty2QzoAkFXiMk94BK8FHxoxw,1931
276
195
  arekit/contrib/utils/embeddings/tokens.py,sha256=z3lJ30JTX9zvZtPgzRl3yANECmuA1qboMDTcJsr_4E4,872
@@ -281,15 +200,7 @@ arekit/contrib/utils/entities/formatters/str_display.py,sha256=N8igv7EVaTFayvLXk
281
200
  arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py,sha256=rEUIma9O3kOBWIguGtJ69JH-00Dhm0vUBOd5yNcKweY,653
282
201
  arekit/contrib/utils/io_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
283
202
  arekit/contrib/utils/io_utils/embedding.py,sha256=cBDRv_1LROJ262QaL3QVfGt2W9EvBfbh83oL41PJn60,2543
284
- arekit/contrib/utils/io_utils/opinions.py,sha256=eh925ODPw6_t1N0qDh3THNwj3OE86DNEBXFxkD9mbh0,1395
285
- arekit/contrib/utils/io_utils/samples.py,sha256=ZA3UeURysxeLbilXBNf2PuFoqOxjsXgx-BBMprS9vWw,2737
286
- arekit/contrib/utils/io_utils/utils.py,sha256=NNuebsehP0eVCpqYoZE6hRsYgklKu89SEvm2VqSQvUw,1012
287
- arekit/contrib/utils/lexicons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
288
- arekit/contrib/utils/lexicons/lexicon.py,sha256=VEyORMUzhqDm4lNbKU_ex9pRFHjZMwe2cw0V_TLCBNQ,1153
289
- arekit/contrib/utils/lexicons/relation.py,sha256=PkF6FZsbQz617l5bqUvwZKC3uCYduxpoL-xdy9ZkwWY,1205
290
- arekit/contrib/utils/lexicons/rusentilex.py,sha256=pA_M1OTRr1HpeUa27GuJEBrFsKUX6Vet0iGYqkcCoIY,954
291
- arekit/contrib/utils/nn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
292
- arekit/contrib/utils/nn/rows.py,sha256=xN-AKBrfv-RC08DxKoSsDLMitXcQ-IW33gep-Av1T3I,3966
203
+ arekit/contrib/utils/io_utils/utils.py,sha256=310SIJTsNLn2OZrGPer9W4ZP52PHkjBK3zsyqxVs3h0,537
293
204
  arekit/contrib/utils/np_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
294
205
  arekit/contrib/utils/np_utils/embedding.py,sha256=G7Ls_ClzbskLLy-opRcVzQlfUfhdwbqoXgk0zoGrmHM,798
295
206
  arekit/contrib/utils/np_utils/npz_utils.py,sha256=XoUHNmOlcr2X674R1xKGUJitEpFCIBJ8DOpNEPhtJFk,234
@@ -298,42 +209,19 @@ arekit/contrib/utils/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
298
209
  arekit/contrib/utils/pipelines/opinion_collections.py,sha256=y9-klVJGCN9mPd7t1ECllAiCnAb3MKVXC1PnYddp5sQ,3195
299
210
  arekit/contrib/utils/pipelines/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
300
211
  arekit/contrib/utils/pipelines/items/sampling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
301
- arekit/contrib/utils/pipelines/items/sampling/base.py,sha256=F4XcaJBswvwEso26SHEtXimivl2BlxU7mvEPlYBwu7U,4374
302
- arekit/contrib/utils/pipelines/items/sampling/networks.py,sha256=u1m__iOXuKcmBpE04mXo93g2hVk5_D6KhbWqHzkjy00,2621
212
+ arekit/contrib/utils/pipelines/items/sampling/base.py,sha256=-H-r5GIi9ee7CxxpJs8KnHC91l7Y1dYaWPR_OK17E8g,4245
213
+ arekit/contrib/utils/pipelines/items/sampling/networks.py,sha256=E0EjQ4KRd3oYLFVbie05XJa00JqR26eLRoMrDnuQySQ,2653
303
214
  arekit/contrib/utils/pipelines/items/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
304
- arekit/contrib/utils/pipelines/items/text/entities_default.py,sha256=d9qg-5RxXsZ8J0_ArzRGI7zpaJCciRq5zZ5JYBjt98o,709
305
- arekit/contrib/utils/pipelines/items/text/frames.py,sha256=Ibo5ztHAUw2dI5ToBkhTdHHOTZhJxDoXe9oXDD6zOS4,2569
306
- arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py,sha256=QALLpt6aJR3a0NoPXKSI-SAj78_6pd2OWcrU-yoL_Og,1756
307
- arekit/contrib/utils/pipelines/items/text/frames_negation.py,sha256=ON70iFGU6FozItTr_2Hjxx0yjtLy0mCQrBhDiP86F0Y,1222
308
- arekit/contrib/utils/pipelines/items/text/terms_splitter.py,sha256=PZabs0oMCj67UXuV4awTooxgUyNi1uFgRGhOfWcePQ0,381
309
- arekit/contrib/utils/pipelines/items/text/tokenizer.py,sha256=3Vo5xAtYpcoK-a_HAG1e8Q7F6K7WdXm_enpGtQBQBlE,3246
310
- arekit/contrib/utils/pipelines/items/text/translator.py,sha256=3JtvIVPhSCy1xxl2BEhbJ0kRtwNwFLJXEHe8PQEsLqI,5286
311
- arekit/contrib/utils/pipelines/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
312
- arekit/contrib/utils/pipelines/sources/nerel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
313
- arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py,sha256=0japm9EJoBKawGtfgrHSMfICByQEvPlmEGZkCLv8W18,1105
314
- arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py,sha256=Dc16ioq6SPuYHIHZaKuOrMkRR8QMcR0JCq_k1IYVY1I,3813
315
- arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py,sha256=X410yjP6P4mhHDsAfUthPtzCjDbL64wu5HViQFkMjrw,2624
316
- arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
317
- arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py,sha256=gn3-pGkONybArufwRtH3xl0EcC2HuhmgbJnKGEjKff4,1246
318
- arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py,sha256=BQ5FQfQHITWnTj24580PSXaOEmf5c8NHOwRuXDHS9qw,3835
319
- arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py,sha256=uOJWpQyYBsESw1Z8tOle3MH0HZPVMPUbQxmE1EGdzjs,3457
320
- arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
321
- arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py,sha256=zzlbEMYnqMu9ezQ3AX-71LWIiE4Ln_v9xHTCLYrNvfs,2613
322
- arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py,sha256=Yyyk-RbWtXCnBJ9tNyObZ4rHNICrl5Ho4wP2rtVGvBY,807
323
- arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py,sha256=8CKusj0IyF4RZKNOV0wG4Hcbq34o4kCE_sZmfYjW9_Y,3513
324
- arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
325
- arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py,sha256=2_CtVC_pDr-SDbhuh7GqBY5XYvDQQPR5LV-cGSz_xi4,872
326
- arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py,sha256=LS6DD9yRyuGk6NAOiJHukhO1QmQrKsXIKr8QqoA_c0E,5804
327
- arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
328
- arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py,sha256=LiIOa0p_MOmzVCrvMZR-sNalOUQb8iu_knIADC9pt4E,1304
329
- arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py,sha256=6iROY3G455P_olxhxjhI2WgrOywbTpuNYO9wiNzPPck,1832
330
- arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py,sha256=Kom_EwzTiQRggJFof1e1OI0RFJQSeylH3ad-HVnvwrA,9841
331
- arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py,sha256=OzU65CVF4E_7JJRknexknFjRGB8qD9n25Nk-G4g91hM,1649
215
+ arekit/contrib/utils/pipelines/items/text/entities_default.py,sha256=vNx5ir2mf7a1gg_OeqUsf_p1Fu2k7QIFxVpe-CuwZ84,727
216
+ arekit/contrib/utils/pipelines/items/text/frames.py,sha256=pZQybYfgEQB1DM3PtmsgrtB2Xl0HejmP4rhT0nR_YKE,2586
217
+ arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py,sha256=4rIAAB-_GeWNbu5KyaDm5qttH4o2Bzpdvy-D9YR5bRk,1776
218
+ arekit/contrib/utils/pipelines/items/text/frames_negation.py,sha256=AdoY7lqSAT0RApp0DbqeI7xxyRVF6NPJLAfR59lsIec,1303
219
+ arekit/contrib/utils/pipelines/items/text/tokenizer.py,sha256=FmV5flziDLCNttxrUzRr-FGCcKK6venZEcZ-KwcqwNE,3147
220
+ arekit/contrib/utils/pipelines/items/text/translator.py,sha256=TkXVyZYRbS8P4S2Pnn2GzQMRa-9ba-nS4_zXvsf16vU,5365
332
221
  arekit/contrib/utils/pipelines/text_opinion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
333
- arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=JJTxD1_qLznrvxnAL-6OUWW0R-C8WsX0i1tDobKT4-Q,4294
222
+ arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=QoK0-dfMl27uOOfUhvnbvzYX23jCpZbm97Qs27Na7VA,4133
334
223
  arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
335
224
  arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py,sha256=bwS-UR2x3rgp_xqnf6z-73T-eIZE_kltRSGYxgd_WpU,1751
336
- arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py,sha256=h8usYdA-selpDdd55NLe1LsJLlI5Bf79e3DXCk4eFVc,3819
337
225
  arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
338
226
  arekit/contrib/utils/pipelines/text_opinion/filters/base.py,sha256=GnKnJB4MKqiMSJny3a9Na7l7Csm7abbt6GADBCY18Mw,143
339
227
  arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py,sha256=3Pjq4IJJMT7dYpK266lN66WQJUnQO3P0rG6wcAvJOOA,649
@@ -357,18 +245,15 @@ arekit/contrib/utils/processing/pos/mystem_wrap.py,sha256=C9AnRIAZL4e8DMNte9LDuv
357
245
  arekit/contrib/utils/processing/pos/russian.py,sha256=POCo6xKmK7vAEq-kWlODg611kLOtOj37OVc3L_GWL-8,229
358
246
  arekit/contrib/utils/processing/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
359
247
  arekit/contrib/utils/processing/text/tokens.py,sha256=_3u5Oy1MG_QfHH8wi0x0nA588qSaCp3Wmnp2SzMWjXY,3573
360
- arekit/contrib/utils/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
361
- arekit/contrib/utils/sources/sentinerel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
362
- arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
363
- arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py,sha256=rtKdSDdaiwrCcSIdvzUIZwjaOm68xoq0TWpy3ISe6o0,2747
364
248
  arekit/contrib/utils/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
365
249
  arekit/contrib/utils/synonyms/simple.py,sha256=ST9EwuWP88FzbyV8Gi0-biTPgGOsZ7OWyaBWHL_U_eo,557
366
250
  arekit/contrib/utils/synonyms/stemmer_based.py,sha256=q19P_XOCWN2_JrBtybAt7ToMIr1ambw4ahr0fSEEHmQ,1400
367
251
  arekit/contrib/utils/vectorizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
368
252
  arekit/contrib/utils/vectorizers/bpe.py,sha256=bFS5MZytvU1L21YS5aAeb3FZl7RMjyog4lWwysvKD-8,3047
369
253
  arekit/contrib/utils/vectorizers/random_norm.py,sha256=TL86Kz6p59lJqoLg8RwQRTvfhr0e-tiULGHhO4vhBbo,1339
370
- arekit-0.24.0.dist-info/LICENSE,sha256=JO9tIbxAvhwDv73cX-gUStr9yA-TY7wusUeLHRx7JuY,1076
371
- arekit-0.24.0.dist-info/METADATA,sha256=Vr2Kb1uxb7PgJhUd14_dKQietezD0j391gEfUhj5tWU,844
372
- arekit-0.24.0.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
373
- arekit-0.24.0.dist-info/top_level.txt,sha256=4pXuFE8IE0lBsqi6ZsR7figx0H939VIX4_-76YIbkOQ,7
374
- arekit-0.24.0.dist-info/RECORD,,
254
+ arekit-0.25.0.data/data/logo.png,sha256=S8OZ4MGGD72Pf5co7ngYbXKkJH1EUhbErUXv1ZjUWiU,45718
255
+ arekit-0.25.0.dist-info/LICENSE,sha256=JO9tIbxAvhwDv73cX-gUStr9yA-TY7wusUeLHRx7JuY,1076
256
+ arekit-0.25.0.dist-info/METADATA,sha256=4DSUy6aTidHG9jFR7jMwQe3uJGER-e8E9vU0q2G20Uo,3145
257
+ arekit-0.25.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
258
+ arekit-0.25.0.dist-info/top_level.txt,sha256=4pXuFE8IE0lBsqi6ZsR7figx0H939VIX4_-76YIbkOQ,7
259
+ arekit-0.25.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.37.1)
2
+ Generator: bdist_wheel (0.44.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,37 +0,0 @@
1
- from arekit.common.pipeline.items.base import BasePipelineItem
2
- from arekit.common.text.partitioning.base import BasePartitioning
3
- from arekit.common.pipeline.context import PipelineContext
4
-
5
-
6
- class SentenceObjectsParserPipelineItem(BasePipelineItem):
7
-
8
- def __init__(self, partitioning):
9
- assert(isinstance(partitioning, BasePartitioning))
10
- self.__partitioning = partitioning
11
-
12
- # region protected
13
-
14
- def _get_text(self, pipeline_ctx):
15
- return None
16
-
17
- def _get_parts_provider_func(self, input_data, pipeline_ctx):
18
- raise NotImplementedError()
19
-
20
- # endregion
21
-
22
- def apply_core(self, input_data, pipeline_ctx):
23
- assert(isinstance(pipeline_ctx, PipelineContext))
24
- external_input = self._get_text(pipeline_ctx)
25
- actual_input = input_data if external_input is None else external_input
26
- parts_it = self._get_parts_provider_func(input_data=actual_input, pipeline_ctx=pipeline_ctx)
27
- return self.__partitioning.provide(text=actual_input, parts_it=parts_it)
28
-
29
- # region base
30
-
31
- def __enter__(self):
32
- return self
33
-
34
- def __exit__(self, exc_type, exc_val, exc_tb):
35
- pass
36
-
37
- # endregion
@@ -1,12 +0,0 @@
1
- from arekit.common.pipeline.base import BasePipeline
2
- from arekit.common.text.parsed import BaseParsedText
3
-
4
-
5
- class BaseTextParser(BasePipeline):
6
-
7
- def run(self, input_data, params_dict=None, parent_ctx=None):
8
- output_data = super(BaseTextParser, self).run(input_data=input_data,
9
- params_dict=params_dict,
10
- parent_ctx=parent_ctx)
11
-
12
- return BaseParsedText(terms=output_data)
@@ -1,4 +0,0 @@
1
- class BasePartitioning(object):
2
-
3
- def provide(self, text, parts_it):
4
- raise NotImplementedError()