arekit 0.24.0__py3-none-any.whl → 0.25.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arekit/common/docs/entities_grouping.py +2 -1
- arekit/common/docs/parser.py +52 -20
- arekit/common/pipeline/base.py +12 -16
- arekit/common/pipeline/batching.py +28 -0
- arekit/common/pipeline/context.py +5 -1
- arekit/common/pipeline/items/base.py +38 -1
- arekit/common/pipeline/items/flatten.py +5 -1
- arekit/common/pipeline/items/handle.py +2 -1
- arekit/common/pipeline/items/iter.py +2 -1
- arekit/common/pipeline/items/map.py +2 -1
- arekit/common/pipeline/items/map_nested.py +4 -0
- arekit/common/pipeline/utils.py +32 -0
- arekit/common/service/sqlite.py +36 -0
- arekit/common/text/{partitioning/str.py → partitioning.py} +14 -9
- arekit/common/utils.py +0 -44
- arekit/contrib/utils/data/contents/opinions.py +13 -3
- arekit/contrib/utils/data/readers/sqlite.py +14 -0
- arekit/contrib/utils/data/storages/row_cache.py +6 -1
- arekit/contrib/utils/data/storages/sqlite_based.py +17 -0
- arekit/contrib/utils/data/writers/sqlite_native.py +4 -0
- arekit/contrib/utils/io_utils/utils.py +1 -18
- arekit/contrib/utils/pipelines/items/sampling/base.py +7 -12
- arekit/contrib/utils/pipelines/items/sampling/networks.py +3 -2
- arekit/contrib/utils/pipelines/items/text/entities_default.py +2 -2
- arekit/contrib/utils/pipelines/items/text/frames.py +2 -3
- arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +2 -2
- arekit/contrib/utils/pipelines/items/text/frames_negation.py +2 -1
- arekit/contrib/utils/pipelines/items/text/tokenizer.py +2 -4
- arekit/contrib/utils/pipelines/items/text/translator.py +2 -1
- arekit/contrib/utils/pipelines/text_opinion/extraction.py +6 -9
- arekit/contrib/utils/serializer.py +1 -2
- arekit-0.25.0.data/data/logo.png +0 -0
- arekit-0.25.0.dist-info/METADATA +82 -0
- {arekit-0.24.0.dist-info → arekit-0.25.0.dist-info}/RECORD +38 -153
- {arekit-0.24.0.dist-info → arekit-0.25.0.dist-info}/WHEEL +1 -1
- arekit/common/docs/objects_parser.py +0 -37
- arekit/common/text/parser.py +0 -12
- arekit/common/text/partitioning/base.py +0 -4
- arekit/common/text/partitioning/terms.py +0 -35
- arekit/contrib/source/__init__.py +0 -0
- arekit/contrib/source/brat/__init__.py +0 -0
- arekit/contrib/source/brat/annot.py +0 -84
- arekit/contrib/source/brat/doc.py +0 -28
- arekit/contrib/source/brat/entities/__init__.py +0 -0
- arekit/contrib/source/brat/entities/compound.py +0 -13
- arekit/contrib/source/brat/entities/entity.py +0 -42
- arekit/contrib/source/brat/entities/parser.py +0 -53
- arekit/contrib/source/brat/opinions/__init__.py +0 -0
- arekit/contrib/source/brat/opinions/converter.py +0 -19
- arekit/contrib/source/brat/relation.py +0 -32
- arekit/contrib/source/brat/sentence.py +0 -69
- arekit/contrib/source/brat/sentences_reader.py +0 -128
- arekit/contrib/source/download.py +0 -41
- arekit/contrib/source/nerel/__init__.py +0 -0
- arekit/contrib/source/nerel/entities.py +0 -55
- arekit/contrib/source/nerel/folding/__init__.py +0 -0
- arekit/contrib/source/nerel/folding/fixed.py +0 -74
- arekit/contrib/source/nerel/io_utils.py +0 -62
- arekit/contrib/source/nerel/labels.py +0 -241
- arekit/contrib/source/nerel/reader.py +0 -46
- arekit/contrib/source/nerel/utils.py +0 -24
- arekit/contrib/source/nerel/versions.py +0 -12
- arekit/contrib/source/nerelbio/__init__.py +0 -0
- arekit/contrib/source/nerelbio/io_utils.py +0 -62
- arekit/contrib/source/nerelbio/labels.py +0 -265
- arekit/contrib/source/nerelbio/reader.py +0 -8
- arekit/contrib/source/nerelbio/versions.py +0 -8
- arekit/contrib/source/ruattitudes/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/collection.py +0 -36
- arekit/contrib/source/ruattitudes/doc.py +0 -51
- arekit/contrib/source/ruattitudes/doc_brat.py +0 -44
- arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/entity/parser.py +0 -7
- arekit/contrib/source/ruattitudes/io_utils.py +0 -56
- arekit/contrib/source/ruattitudes/labels_fmt.py +0 -12
- arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/opinions/base.py +0 -28
- arekit/contrib/source/ruattitudes/opinions/converter.py +0 -37
- arekit/contrib/source/ruattitudes/reader.py +0 -268
- arekit/contrib/source/ruattitudes/sentence.py +0 -73
- arekit/contrib/source/ruattitudes/synonyms.py +0 -17
- arekit/contrib/source/ruattitudes/text_object.py +0 -59
- arekit/contrib/source/rusentiframes/__init__.py +0 -0
- arekit/contrib/source/rusentiframes/collection.py +0 -157
- arekit/contrib/source/rusentiframes/effect.py +0 -24
- arekit/contrib/source/rusentiframes/io_utils.py +0 -19
- arekit/contrib/source/rusentiframes/labels_fmt.py +0 -22
- arekit/contrib/source/rusentiframes/polarity.py +0 -35
- arekit/contrib/source/rusentiframes/role.py +0 -15
- arekit/contrib/source/rusentiframes/state.py +0 -24
- arekit/contrib/source/rusentiframes/types.py +0 -42
- arekit/contrib/source/rusentiframes/value.py +0 -2
- arekit/contrib/source/rusentrel/__init__.py +0 -0
- arekit/contrib/source/rusentrel/const.py +0 -3
- arekit/contrib/source/rusentrel/docs_reader.py +0 -51
- arekit/contrib/source/rusentrel/entities.py +0 -26
- arekit/contrib/source/rusentrel/io_utils.py +0 -125
- arekit/contrib/source/rusentrel/labels_fmt.py +0 -12
- arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
- arekit/contrib/source/rusentrel/opinions/collection.py +0 -30
- arekit/contrib/source/rusentrel/opinions/converter.py +0 -40
- arekit/contrib/source/rusentrel/opinions/provider.py +0 -54
- arekit/contrib/source/rusentrel/opinions/writer.py +0 -42
- arekit/contrib/source/rusentrel/synonyms.py +0 -17
- arekit/contrib/source/sentinerel/__init__.py +0 -0
- arekit/contrib/source/sentinerel/entities.py +0 -52
- arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
- arekit/contrib/source/sentinerel/folding/factory.py +0 -31
- arekit/contrib/source/sentinerel/folding/fixed.py +0 -70
- arekit/contrib/source/sentinerel/io_utils.py +0 -87
- arekit/contrib/source/sentinerel/labels.py +0 -53
- arekit/contrib/source/sentinerel/labels_scaler.py +0 -30
- arekit/contrib/source/sentinerel/reader.py +0 -42
- arekit/contrib/source/synonyms/__init__.py +0 -0
- arekit/contrib/source/synonyms/utils.py +0 -19
- arekit/contrib/source/zip_utils.py +0 -47
- arekit/contrib/utils/connotations/__init__.py +0 -0
- arekit/contrib/utils/connotations/rusentiframes_sentiment.py +0 -23
- arekit/contrib/utils/download.py +0 -77
- arekit/contrib/utils/io_utils/opinions.py +0 -37
- arekit/contrib/utils/io_utils/samples.py +0 -79
- arekit/contrib/utils/lexicons/__init__.py +0 -0
- arekit/contrib/utils/lexicons/lexicon.py +0 -41
- arekit/contrib/utils/lexicons/relation.py +0 -42
- arekit/contrib/utils/lexicons/rusentilex.py +0 -37
- arekit/contrib/utils/nn/__init__.py +0 -0
- arekit/contrib/utils/nn/rows.py +0 -83
- arekit/contrib/utils/pipelines/items/text/terms_splitter.py +0 -10
- arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py +0 -27
- arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +0 -65
- arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +0 -60
- arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py +0 -29
- arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +0 -64
- arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +0 -79
- arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py +0 -56
- arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +0 -20
- arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +0 -65
- arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py +0 -21
- arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +0 -107
- arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py +0 -29
- arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +0 -62
- arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +0 -180
- arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +0 -50
- arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +0 -88
- arekit/contrib/utils/resources.py +0 -25
- arekit/contrib/utils/sources/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +0 -63
- arekit/download_data.py +0 -11
- arekit-0.24.0.dist-info/METADATA +0 -23
- /arekit/common/{text/partitioning → service}/__init__.py +0 -0
- {arekit-0.24.0.dist-info → arekit-0.25.0.dist-info}/LICENSE +0 -0
- {arekit-0.24.0.dist-info → arekit-0.25.0.dist-info}/top_level.txt +0 -0
|
@@ -8,7 +8,7 @@ from arekit.contrib.utils.pipelines.items.sampling.base import BaseSerializerPip
|
|
|
8
8
|
|
|
9
9
|
class NetworksInputSerializerPipelineItem(BaseSerializerPipelineItem):
|
|
10
10
|
|
|
11
|
-
def __init__(self, save_labels_func, rows_provider, samples_io, emb_io, storage, save_embedding=True):
|
|
11
|
+
def __init__(self, save_labels_func, rows_provider, samples_io, emb_io, storage, save_embedding=True, **kwargs):
|
|
12
12
|
""" This pipeline item allows to perform a data preparation for neural network models.
|
|
13
13
|
|
|
14
14
|
considering a list of the whole data_types with the related pipelines,
|
|
@@ -23,7 +23,8 @@ class NetworksInputSerializerPipelineItem(BaseSerializerPipelineItem):
|
|
|
23
23
|
rows_provider=rows_provider,
|
|
24
24
|
samples_io=samples_io,
|
|
25
25
|
save_labels_func=save_labels_func,
|
|
26
|
-
storage=storage
|
|
26
|
+
storage=storage,
|
|
27
|
+
**kwargs)
|
|
27
28
|
|
|
28
29
|
self.__emb_io = emb_io
|
|
29
30
|
self.__save_embedding = save_embedding
|
|
@@ -4,8 +4,8 @@ from arekit.common.pipeline.items.base import BasePipelineItem
|
|
|
4
4
|
|
|
5
5
|
class TextEntitiesParser(BasePipelineItem):
|
|
6
6
|
|
|
7
|
-
def __init__(self):
|
|
8
|
-
super(TextEntitiesParser, self).__init__()
|
|
7
|
+
def __init__(self, **kwargs):
|
|
8
|
+
super(TextEntitiesParser, self).__init__(**kwargs)
|
|
9
9
|
|
|
10
10
|
@staticmethod
|
|
11
11
|
def __process_word(word):
|
|
@@ -6,11 +6,10 @@ from arekit.common.pipeline.items.base import BasePipelineItem
|
|
|
6
6
|
|
|
7
7
|
class FrameVariantsParser(BasePipelineItem):
|
|
8
8
|
|
|
9
|
-
def __init__(self, frame_variants):
|
|
9
|
+
def __init__(self, frame_variants, **kwargs):
|
|
10
10
|
assert(isinstance(frame_variants, FrameVariantsCollection))
|
|
11
11
|
assert(len(frame_variants) > 0)
|
|
12
|
-
|
|
13
|
-
super(FrameVariantsParser, self).__init__()
|
|
12
|
+
super(FrameVariantsParser, self).__init__(**kwargs)
|
|
14
13
|
|
|
15
14
|
self.__frame_variants = frame_variants
|
|
16
15
|
self.__max_variant_len = max([len(variant) for _, variant in frame_variants.iter_variants()])
|
|
@@ -5,10 +5,10 @@ from arekit.contrib.utils.processing.languages.ru.mods import RussianLanguageMod
|
|
|
5
5
|
|
|
6
6
|
class LemmasBasedFrameVariantsParser(FrameVariantsParser):
|
|
7
7
|
|
|
8
|
-
def __init__(self, frame_variants, stemmer, locale_mods=RussianLanguageMods, save_lemmas=False):
|
|
8
|
+
def __init__(self, frame_variants, stemmer, locale_mods=RussianLanguageMods, save_lemmas=False, **kwargs):
|
|
9
9
|
assert(isinstance(stemmer, Stemmer))
|
|
10
10
|
assert(isinstance(save_lemmas, bool))
|
|
11
|
-
super(LemmasBasedFrameVariantsParser, self).__init__(frame_variants=frame_variants)
|
|
11
|
+
super(LemmasBasedFrameVariantsParser, self).__init__(frame_variants=frame_variants, **kwargs)
|
|
12
12
|
|
|
13
13
|
self.__frame_variants = frame_variants
|
|
14
14
|
self.__stemmer = stemmer
|
|
@@ -7,8 +7,9 @@ from arekit.contrib.utils.processing.languages.ru.mods import RussianLanguageMod
|
|
|
7
7
|
|
|
8
8
|
class FrameVariantsSentimentNegation(BasePipelineItem):
|
|
9
9
|
|
|
10
|
-
def __init__(self, locale_mods=RussianLanguageMods):
|
|
10
|
+
def __init__(self, locale_mods=RussianLanguageMods, **kwargs):
|
|
11
11
|
assert(issubclass(locale_mods, BaseLanguageMods))
|
|
12
|
+
super(FrameVariantsSentimentNegation, self).__init__(**kwargs)
|
|
12
13
|
self._locale_mods = locale_mods
|
|
13
14
|
|
|
14
15
|
@staticmethod
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
3
|
from arekit.common.context.token import Token
|
|
4
|
-
from arekit.common.pipeline.context import PipelineContext
|
|
5
4
|
from arekit.common.pipeline.items.base import BasePipelineItem
|
|
6
5
|
from arekit.common.utils import split_by_whitespaces
|
|
7
6
|
from arekit.contrib.utils.processing.text.tokens import Tokens
|
|
@@ -14,14 +13,13 @@ class DefaultTextTokenizer(BasePipelineItem):
|
|
|
14
13
|
""" Default parser implementation.
|
|
15
14
|
"""
|
|
16
15
|
|
|
17
|
-
def __init__(self, keep_tokens=True):
|
|
18
|
-
super(DefaultTextTokenizer, self).__init__()
|
|
16
|
+
def __init__(self, keep_tokens=True, **kwargs):
|
|
17
|
+
super(DefaultTextTokenizer, self).__init__(**kwargs)
|
|
19
18
|
self.__keep_tokens = keep_tokens
|
|
20
19
|
|
|
21
20
|
# region protected methods
|
|
22
21
|
|
|
23
22
|
def apply_core(self, input_data, pipeline_ctx):
|
|
24
|
-
assert(isinstance(pipeline_ctx, PipelineContext))
|
|
25
23
|
output_data = self.__process_parts(input_data)
|
|
26
24
|
if not self.__keep_tokens:
|
|
27
25
|
output_data = [word for word in output_data if not isinstance(word, Token)]
|
|
@@ -9,10 +9,11 @@ class MLTextTranslatorPipelineItem(BasePipelineItem):
|
|
|
9
9
|
""" Machine learning based translator pipeline item.
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
-
def __init__(self, batch_translate_model, do_translate_entity=True):
|
|
12
|
+
def __init__(self, batch_translate_model, do_translate_entity=True, **kwargs):
|
|
13
13
|
""" Model, which is based on translation of the text,
|
|
14
14
|
represented as a list of words.
|
|
15
15
|
"""
|
|
16
|
+
super(MLTextTranslatorPipelineItem, self).__init__(**kwargs)
|
|
16
17
|
self.__do_translate_entity = do_translate_entity
|
|
17
18
|
self.__translate = batch_translate_model
|
|
18
19
|
|
|
@@ -3,12 +3,10 @@ from arekit.common.linkage.text_opinions import TextOpinionsLinkage
|
|
|
3
3
|
from arekit.common.docs.parsed.base import ParsedDocument
|
|
4
4
|
from arekit.common.docs.parsed.providers.entity_service import EntityServiceProvider
|
|
5
5
|
from arekit.common.docs.parsed.service import ParsedDocumentService
|
|
6
|
-
from arekit.common.docs.parser import
|
|
7
|
-
from arekit.common.pipeline.base import BasePipeline
|
|
6
|
+
from arekit.common.docs.parser import DocumentParsers
|
|
8
7
|
from arekit.common.pipeline.items.flatten import FlattenIterPipelineItem
|
|
9
8
|
from arekit.common.pipeline.items.map import MapPipelineItem
|
|
10
9
|
from arekit.common.pipeline.items.map_nested import MapNestedPipelineItem
|
|
11
|
-
from arekit.common.text.parser import BaseTextParser
|
|
12
10
|
from arekit.common.text_opinions.base import TextOpinion
|
|
13
11
|
from arekit.contrib.utils.pipelines.text_opinion.filters.base import TextOpinionFilter
|
|
14
12
|
from arekit.contrib.utils.pipelines.text_opinion.filters.limitation import FrameworkLimitationsTextOpinionFilter
|
|
@@ -64,9 +62,8 @@ def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
|
|
|
64
62
|
yield MetaEmptyLinkedDataWrapper(doc_id=parsed_doc.RelatedDocID)
|
|
65
63
|
|
|
66
64
|
|
|
67
|
-
def text_opinion_extraction_pipeline(
|
|
65
|
+
def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotators, entity_index_func,
|
|
68
66
|
text_opinion_filters=None, use_meta_between_docs=True):
|
|
69
|
-
assert(isinstance(text_parser, BaseTextParser))
|
|
70
67
|
assert(callable(get_doc_by_id_func))
|
|
71
68
|
assert(isinstance(annotators, list))
|
|
72
69
|
assert(isinstance(text_opinion_filters, list) or text_opinion_filters is None)
|
|
@@ -75,13 +72,13 @@ def text_opinion_extraction_pipeline(text_parser, get_doc_by_id_func, annotators
|
|
|
75
72
|
extra_filters = [] if text_opinion_filters is None else text_opinion_filters
|
|
76
73
|
actual_text_opinion_filters = [FrameworkLimitationsTextOpinionFilter()] + extra_filters
|
|
77
74
|
|
|
78
|
-
return
|
|
75
|
+
return [
|
|
79
76
|
# (doc_id) -> (doc)
|
|
80
77
|
MapPipelineItem(map_func=lambda doc_id: get_doc_by_id_func(doc_id)),
|
|
81
78
|
|
|
82
79
|
# (doc, ppl_ctx) -> (parsed_doc)
|
|
83
|
-
MapNestedPipelineItem(map_func=lambda doc, ppl_ctx:
|
|
84
|
-
doc=doc,
|
|
80
|
+
MapNestedPipelineItem(map_func=lambda doc, ppl_ctx: DocumentParsers.parse(
|
|
81
|
+
doc=doc, pipeline_items=pipeline_items, parent_ppl_ctx=ppl_ctx)),
|
|
85
82
|
|
|
86
83
|
# (parsed_doc) -> (text_opinions)
|
|
87
84
|
MapPipelineItem(map_func=lambda parsed_doc: __iter_text_opinion_linkages(
|
|
@@ -90,4 +87,4 @@ def text_opinion_extraction_pipeline(text_parser, get_doc_by_id_func, annotators
|
|
|
90
87
|
|
|
91
88
|
# linkages[] -> linkages
|
|
92
89
|
FlattenIterPipelineItem()
|
|
93
|
-
]
|
|
90
|
+
]
|
|
@@ -7,7 +7,6 @@ from arekit.common.data.input.providers.rows.base import BaseRowProvider
|
|
|
7
7
|
from arekit.common.data.input.repositories.base import BaseInputRepository
|
|
8
8
|
from arekit.common.data.input.repositories.sample import BaseInputSamplesRepository
|
|
9
9
|
from arekit.common.data.storages.base import BaseRowsStorage
|
|
10
|
-
from arekit.common.pipeline.base import BasePipeline
|
|
11
10
|
from arekit.contrib.utils.data.contents.opinions import InputTextOpinionProvider
|
|
12
11
|
|
|
13
12
|
logger = logging.getLogger(__name__)
|
|
@@ -28,7 +27,7 @@ class InputDataSerializationHelper(object):
|
|
|
28
27
|
|
|
29
28
|
@staticmethod
|
|
30
29
|
def fill_and_write(pipeline, repo, target, writer, doc_ids_iter, desc=""):
|
|
31
|
-
assert(isinstance(pipeline,
|
|
30
|
+
assert(isinstance(pipeline, list))
|
|
32
31
|
assert(isinstance(doc_ids_iter, Iterable))
|
|
33
32
|
assert(isinstance(repo, BaseInputRepository))
|
|
34
33
|
|
|
Binary file
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: arekit
|
|
3
|
+
Version: 0.25.0
|
|
4
|
+
Summary: Document level Attitude and Relation Extraction toolkit (AREkit) for sampling and prompting mass-media news into datasets for ML-model training
|
|
5
|
+
Home-page: https://github.com/nicolay-r/AREkit
|
|
6
|
+
Author: Nicolay Rusnachenko
|
|
7
|
+
Author-email: rusnicolay@gmail.com
|
|
8
|
+
License: MIT License
|
|
9
|
+
Keywords: natural language processing,relation extraction,sentiment analysis
|
|
10
|
+
Classifier: Programming Language :: Python
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.6
|
|
12
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
14
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
15
|
+
Requires-Python: >=3.6
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: tqdm
|
|
19
|
+
Requires-Dist: enum34==1.1.10
|
|
20
|
+
Requires-Dist: numpy>=1.14.5
|
|
21
|
+
Requires-Dist: pymystem3==0.2.0
|
|
22
|
+
|
|
23
|
+
# AREkit 0.25.0
|
|
24
|
+
|
|
25
|
+

|
|
26
|
+
|
|
27
|
+
<p align="center">
|
|
28
|
+
<img src="logo.png"/>
|
|
29
|
+
</p>
|
|
30
|
+
|
|
31
|
+
**AREkit** (Attitude and Relation Extraction Toolkit) --
|
|
32
|
+
is a python toolkit, devoted to document level Attitude and Relation Extraction between text objects from mass-media news.
|
|
33
|
+
|
|
34
|
+
## Description
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
This toolkit aims at memory-effective data processing in Relation Extraction (RE) related tasks.
|
|
38
|
+
|
|
39
|
+
<p align="center">
|
|
40
|
+
<img src="docs/arekit-pipeline-concept.png"/>
|
|
41
|
+
</p>
|
|
42
|
+
|
|
43
|
+
> Figure: AREkit pipelines design. More on
|
|
44
|
+
> **[ARElight: Context Sampling of Large Texts for Deep Learning Relation Extraction](https://link.springer.com/chapter/10.1007/978-3-031-56069-9_23)** paper
|
|
45
|
+
|
|
46
|
+
In particular, this framework serves the following features:
|
|
47
|
+
* ➿ [pipelines](https://github.com/nicolay-r/AREkit/wiki/Pipelines:-Text-Opinion-Annotation) and iterators for handling large-scale collections serialization without out-of-memory issues.
|
|
48
|
+
* 🔗 EL (entity-linking) API support for objects,
|
|
49
|
+
* ➰ avoidance of cyclic connections,
|
|
50
|
+
* :straight_ruler: distance consideration between relation participants (in `terms` or `sentences`),
|
|
51
|
+
* 📑 relations annotations and filtering rules,
|
|
52
|
+
* *️⃣ entities formatting or masking, and more.
|
|
53
|
+
|
|
54
|
+
The core functionality includes:
|
|
55
|
+
* API for document presentation with EL (Entity Linking, i.e. Object Synonymy) support
|
|
56
|
+
for sentence level relations preparation (dubbed as contexts);
|
|
57
|
+
* API for contexts extraction;
|
|
58
|
+
* Relations transferring from sentence-level onto document-level, and more.
|
|
59
|
+
|
|
60
|
+
## Installation
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install git+https://github.com/nicolay-r/AREkit.git@0.25.0-rc
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Usage
|
|
67
|
+
|
|
68
|
+
Please follow the **[tutorial section on project Wiki](https://github.com/nicolay-r/AREkit/wiki/Tutorials)** for mode details.
|
|
69
|
+
|
|
70
|
+
## How to cite
|
|
71
|
+
A great research is also accompanied by the faithful reference.
|
|
72
|
+
if you use or extend our work, please cite as follows:
|
|
73
|
+
|
|
74
|
+
```bibtex
|
|
75
|
+
@inproceedings{rusnachenko2024arelight,
|
|
76
|
+
title={ARElight: Context Sampling of Large Texts for Deep Learning Relation Extraction},
|
|
77
|
+
author={Rusnachenko, Nicolay and Liang, Huizhi and Kolomeets, Maxim and Shi, Lei},
|
|
78
|
+
booktitle={European Conference on Information Retrieval},
|
|
79
|
+
year={2024},
|
|
80
|
+
organization={Springer}
|
|
81
|
+
}
|
|
82
|
+
```
|
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
arekit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
arekit/download_data.py,sha256=4hrNTLk2j6w-p7SZzBGsMh4V3OHvJGhFvw_hsZ-iu8A,194
|
|
3
2
|
arekit/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
3
|
arekit/common/bound.py,sha256=lPpHY6ct_CU9e4qXeYjhJfWbTj6Sb_NVtZ1CJheQPNE,1402
|
|
5
4
|
arekit/common/log_utils.py,sha256=OfEQxbExkuRAl9dxlgFEqcFhI4HHoMYT7WE8ud0IPOM,924
|
|
6
|
-
arekit/common/utils.py,sha256=
|
|
5
|
+
arekit/common/utils.py,sha256=eVRGhRy882ow-63Glncc3pJ-_43KSI0ukBePjC8ogAY,2394
|
|
7
6
|
arekit/common/context/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
7
|
arekit/common/context/terms_mapper.py,sha256=QA02Cv7D2JKTlXkez_0w0J8HuvNziNF2vrqLgy4Bwc8,1447
|
|
9
8
|
arekit/common/context/token.py,sha256=CpWAlvprUnJfCtYvO8lwdfU_ofSKAOGOudXTwppyzSk,459
|
|
@@ -45,10 +44,9 @@ arekit/common/data/views/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ
|
|
|
45
44
|
arekit/common/data/views/samples.py,sha256=LDqUDqArGt90ujRB4kDFgDHLmR2_AQoUnzhxpXYWYaM,882
|
|
46
45
|
arekit/common/docs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
46
|
arekit/common/docs/base.py,sha256=uXUOtpR9BEsDBfDHg4eLqOjfSVOV_o9VPii3nSxLZuY,734
|
|
48
|
-
arekit/common/docs/entities_grouping.py,sha256=
|
|
47
|
+
arekit/common/docs/entities_grouping.py,sha256=_r254fNr0j6BjHuLZBLjj21yWm4_k__5aOcBXcAaQUQ,704
|
|
49
48
|
arekit/common/docs/entity.py,sha256=TxrZMdIEgjk-PgCyskCkVis2KAw_M7vTBp3ppP6G05M,662
|
|
50
|
-
arekit/common/docs/
|
|
51
|
-
arekit/common/docs/parser.py,sha256=26nOcfXzfsoYCwxjBms_qBtO4T_b66G4nLvmNJeSZmE,1505
|
|
49
|
+
arekit/common/docs/parser.py,sha256=514lQNrZiwU_mxgyuWBkDhqjS5SVAvcIHx9GQUTuVG8,2883
|
|
52
50
|
arekit/common/docs/sentence.py,sha256=nZCCFj2yk71POoXCBfEMN3pteM2qQdj60eEzxMVY_3k,302
|
|
53
51
|
arekit/common/docs/parsed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
52
|
arekit/common/docs/parsed/base.py,sha256=WPstqOpBuLKjtz6UO_bI0DpOPF3Sm0wYEVwjtldbPXE,3175
|
|
@@ -111,28 +109,28 @@ arekit/common/opinions/annot/algo/base.py,sha256=ymll-4-SplCY7CLswjOZEC1vsVHIEzU
|
|
|
111
109
|
arekit/common/opinions/annot/algo/pair_based.py,sha256=HbYn1mAsn5g11NiC9pfrMqNtJn_GzvqPFGpafMqqB2o,4419
|
|
112
110
|
arekit/common/opinions/annot/algo/predefined.py,sha256=zU39SADPKnykHCNB-Bmn_0bvd6gYWWYmfgfi-68hHSs,741
|
|
113
111
|
arekit/common/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
114
|
-
arekit/common/pipeline/base.py,sha256=
|
|
115
|
-
arekit/common/pipeline/
|
|
112
|
+
arekit/common/pipeline/base.py,sha256=8TgWNy5QrnKEp1bq3lhyGSgIfYe5ZIZU3c-DYBJ9LPA,957
|
|
113
|
+
arekit/common/pipeline/batching.py,sha256=DdOvOladOo2aEv3JZ8NQnCvsNGcWk4TFzENrZqTGyXk,1239
|
|
114
|
+
arekit/common/pipeline/context.py,sha256=Fw25lBVakHNAXjtkdEqopR-Jh59cDKGWD2jCJxBrj7Y,1126
|
|
116
115
|
arekit/common/pipeline/conts.py,sha256=NAQNsHt1kK3HnxWv3M6yXi0c7C6Mx6ZZ6KZc0yE0eas,70
|
|
116
|
+
arekit/common/pipeline/utils.py,sha256=5VqH1LtRa4tYUbyiRvWdBmP4biFhTKq9vhr8QiRFFkY,882
|
|
117
117
|
arekit/common/pipeline/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
|
-
arekit/common/pipeline/items/base.py,sha256=
|
|
119
|
-
arekit/common/pipeline/items/flatten.py,sha256=
|
|
120
|
-
arekit/common/pipeline/items/handle.py,sha256=
|
|
121
|
-
arekit/common/pipeline/items/iter.py,sha256=
|
|
122
|
-
arekit/common/pipeline/items/map.py,sha256=
|
|
123
|
-
arekit/common/pipeline/items/map_nested.py,sha256=
|
|
118
|
+
arekit/common/pipeline/items/base.py,sha256=dWIZVGJjYuURLCiZj8YQHWtsS725SOi9SPZaCPV7NvI,1694
|
|
119
|
+
arekit/common/pipeline/items/flatten.py,sha256=9T4jWqPGv4UDxajlM0Nm0-gvwUgqqYB8XH0efTum9a0,542
|
|
120
|
+
arekit/common/pipeline/items/handle.py,sha256=QS5Byj7-o5jmFi0ag58NE3zm2-JzVIunIgc3Pn1ij6g,578
|
|
121
|
+
arekit/common/pipeline/items/iter.py,sha256=Tk9WdUMPOq20s7jEWEpU4PmillnVtQ8nIa2ct7iw-3s,406
|
|
122
|
+
arekit/common/pipeline/items/map.py,sha256=G5wBdjaaxePD0pijrxsfpJACeP7kzj7HerjCkNIhmII,381
|
|
123
|
+
arekit/common/pipeline/items/map_nested.py,sha256=vs0GdJNr3qSF9p2yd1nWji5E1HGzECbvOfN2MqoHc2A,630
|
|
124
|
+
arekit/common/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
125
|
+
arekit/common/service/sqlite.py,sha256=1jLIszkcJGeT0hUos8Y0Chp3o9XRUfljG2P9q0T2_Ds,1440
|
|
124
126
|
arekit/common/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
125
127
|
arekit/common/synonyms/base.py,sha256=YxD-CKCjlEtar1zTdumnfC3vKgbP2wLODR9mMEwbbnA,4237
|
|
126
128
|
arekit/common/synonyms/grouping.py,sha256=fi7QQbBvsTvvP2CPTesSPEsPNmGfc6euqj-HPhVvtlg,698
|
|
127
129
|
arekit/common/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
128
130
|
arekit/common/text/enums.py,sha256=nelEI7B-szLUtl8xds8Kw_vgK5JWg_Aj7IadEj2q_1Y,141
|
|
129
131
|
arekit/common/text/parsed.py,sha256=YxGRHtozDd3sDVI3hMT_hOO7Wmsy7_zLkblfnSXeJ9g,1104
|
|
130
|
-
arekit/common/text/
|
|
132
|
+
arekit/common/text/partitioning.py,sha256=OL8r3-xaMafnT7FuPXDHINlA-BQgx6cLaMqm366WKCU,1153
|
|
131
133
|
arekit/common/text/stemmer.py,sha256=OJ5XelxLN-7m3uLPDU9C7CWdkXDeK-xieexQN6RYLXc,341
|
|
132
|
-
arekit/common/text/partitioning/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
133
|
-
arekit/common/text/partitioning/base.py,sha256=R6YPCN71SZa9sQJsxEaAY6IdgFRlt7tXstrjA79ADtI,108
|
|
134
|
-
arekit/common/text/partitioning/str.py,sha256=gHadcKtELxvrk_XurVsMk63ZSNmvKRqTMtN2OtTw_Fs,1002
|
|
135
|
-
arekit/common/text/partitioning/terms.py,sha256=_5P-xZJOHqOwSij_bi-b0hATCmOme5a38n1fAh67iAo,969
|
|
136
134
|
arekit/common/text_opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
137
135
|
arekit/common/text_opinions/base.py,sha256=KootNvGAbUVCV5uFgLjK-bm9bbQSIvZUz0q9CBToGa8,3447
|
|
138
136
|
arekit/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -164,94 +162,13 @@ arekit/contrib/networks/input/providers/term_connotation.py,sha256=Q90pVN4hQgYAk
|
|
|
164
162
|
arekit/contrib/networks/input/providers/text.py,sha256=kucezKm6Ilmy5wuM2jUP5xk9zh1K1Pf8KcMd1prrp8k,917
|
|
165
163
|
arekit/contrib/prompt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
166
164
|
arekit/contrib/prompt/sample.py,sha256=MxpbDR0ww7WmdtuPu74B8R6QKVXeuzO0CKGOJIYwbRk,3164
|
|
167
|
-
arekit/contrib/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
168
|
-
arekit/contrib/source/download.py,sha256=O5epK1eGC-ZeLzG4Q7jJDiFfpxur-vX8X6XqDtfjswM,2438
|
|
169
|
-
arekit/contrib/source/zip_utils.py,sha256=cS7aCPOUkTOfZyqDwI23U1Aqplq-yXW8E1ayrTQiBl0,1468
|
|
170
|
-
arekit/contrib/source/brat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
171
|
-
arekit/contrib/source/brat/annot.py,sha256=wrPU88mqmud0CYeHVxR3U55f_QhiWU5p6qvCJnWPSkQ,2556
|
|
172
|
-
arekit/contrib/source/brat/doc.py,sha256=MvFu8arXvyHRsW7jsIcdryfg_dLL-SBY3RQZ5uravVo,1095
|
|
173
|
-
arekit/contrib/source/brat/relation.py,sha256=6FA5gYcaD0_X5vzhF0ft42gzfDcq9qzDKALSBmw8N5E,733
|
|
174
|
-
arekit/contrib/source/brat/sentence.py,sha256=q10cCwQtvXCMv2y_lSrdGH_5FzR5etsNPc50GGKO0KY,2633
|
|
175
|
-
arekit/contrib/source/brat/sentences_reader.py,sha256=kWCAQqlIfQXP925bRLDeK9e2UusVxhGE_1IIDCLrjGQ,4677
|
|
176
|
-
arekit/contrib/source/brat/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
177
|
-
arekit/contrib/source/brat/entities/compound.py,sha256=GnZC2dlgMdoJecCIm-uUoQ7fppyWYiwki5ClPeVvwfY,505
|
|
178
|
-
arekit/contrib/source/brat/entities/entity.py,sha256=rHm15x2AaK8SzkhrKTIu298N0huFiGPRlY4kGfiQYYA,1337
|
|
179
|
-
arekit/contrib/source/brat/entities/parser.py,sha256=W9S2TmreDIAf4jdhuA3SSp_LFYCOsglRKJb5xQQ9_1E,1934
|
|
180
|
-
arekit/contrib/source/brat/opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
181
|
-
arekit/contrib/source/brat/opinions/converter.py,sha256=bRzzI_fEojWCGsmwx-Z_dqISeWTaR1ZBJAHcALNViWo,822
|
|
182
|
-
arekit/contrib/source/nerel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
183
|
-
arekit/contrib/source/nerel/entities.py,sha256=f_qgvArMdiBikjr9QZIgWfIDEp4TADpiGymqPIpwY14,2806
|
|
184
|
-
arekit/contrib/source/nerel/io_utils.py,sha256=Nb2PwC3udoHOLVXDuIuMd8rQiVMc8z7LeGYZjCAQ1Kg,2181
|
|
185
|
-
arekit/contrib/source/nerel/labels.py,sha256=nUjSmmR6nDvjr08jIEDWf7kCuw3NzoPwvzMqn3BFycU,2297
|
|
186
|
-
arekit/contrib/source/nerel/reader.py,sha256=2UXtRIhhZHfI7DbBOqJ2DoUcgfOanQObOyk-tU-BPXI,2072
|
|
187
|
-
arekit/contrib/source/nerel/utils.py,sha256=_2da8rUCsg19CChb8Aj7nh0s_tWbrCVqz7wO1bUFmCI,756
|
|
188
|
-
arekit/contrib/source/nerel/versions.py,sha256=XYibavD7cPM8TV7DnDl_8daPg80Sb_twBpR4LtsRuqA,182
|
|
189
|
-
arekit/contrib/source/nerel/folding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
190
|
-
arekit/contrib/source/nerel/folding/fixed.py,sha256=jcArXQfawEtmIMYEyzwgMavIeAoyI2EzkY_zF7ZnpHs,2127
|
|
191
|
-
arekit/contrib/source/nerelbio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
192
|
-
arekit/contrib/source/nerelbio/io_utils.py,sha256=E_3ReImB57PoUAyxMx6xYaI-pxoCabWLRkleWeAc-X8,2208
|
|
193
|
-
arekit/contrib/source/nerelbio/labels.py,sha256=_TYsVW9VWrCLsF3milcfjBKnE0O7Tvvs6Z-kUg8UZeU,2517
|
|
194
|
-
arekit/contrib/source/nerelbio/reader.py,sha256=T99emDZRLyqw3EREZ_Eha3moX8UyPW1hFPCCF7-Q_k8,300
|
|
195
|
-
arekit/contrib/source/nerelbio/versions.py,sha256=a5q_JM9hOu9btHX9pmzS-wblRs5daG_so1lX-_dTqEs,105
|
|
196
|
-
arekit/contrib/source/ruattitudes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
197
|
-
arekit/contrib/source/ruattitudes/collection.py,sha256=AfqmpnmWmQylGAjJcqk4hWmurfO24JdsZijlR6ygbx0,1492
|
|
198
|
-
arekit/contrib/source/ruattitudes/doc.py,sha256=_gSyAMEIFklYll-8PU3wpatDZfwg7Gl1h2c9xRUbGFo,1262
|
|
199
|
-
arekit/contrib/source/ruattitudes/doc_brat.py,sha256=xzP3KUb-qFgdDydgzHfhPfFHPVUlB6nX5vZSvInz0f0,2196
|
|
200
|
-
arekit/contrib/source/ruattitudes/io_utils.py,sha256=ER0OWlDw7pdFIoATEiiXMwBxAjy3I09UXS96A4LGs5Q,1442
|
|
201
|
-
arekit/contrib/source/ruattitudes/labels_fmt.py,sha256=rHlDijvLJAVwgs6EpaCUqZHbjjc1oW9TvhwSSgKa1hk,479
|
|
202
|
-
arekit/contrib/source/ruattitudes/reader.py,sha256=yNxbU6qFTbWoaSjazIOlfOeDkymnKK7zcBKUxMUAh9I,10182
|
|
203
|
-
arekit/contrib/source/ruattitudes/sentence.py,sha256=vIe-HJrd-Dtd9fBMUCPX8x3IBQrwgm2b84ZBfxaZFCE,2079
|
|
204
|
-
arekit/contrib/source/ruattitudes/synonyms.py,sha256=l-Oh37Pqf5AhNYVW7XVlulUNoZ3tZYWUYpruKZlp1B4,596
|
|
205
|
-
arekit/contrib/source/ruattitudes/text_object.py,sha256=zppsQdEM-ViaP1ufBhds92k_vXGTdz_TvH3Cal5A07Y,1985
|
|
206
|
-
arekit/contrib/source/ruattitudes/entity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
207
|
-
arekit/contrib/source/ruattitudes/entity/parser.py,sha256=XRfwL_x-TNbxrSdca4GY5ABj1wZr6HEk_kxgfbuSqlk,248
|
|
208
|
-
arekit/contrib/source/ruattitudes/opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
209
|
-
arekit/contrib/source/ruattitudes/opinions/base.py,sha256=tKVjOK2kZxXLSClVPYZYSTqgenTupnQfZrO2g7o9yZg,705
|
|
210
|
-
arekit/contrib/source/ruattitudes/opinions/converter.py,sha256=4VVuLuJvpIoz4SPGVxlCDE9m8YbPcoWENV0PqijF2Mg,1560
|
|
211
|
-
arekit/contrib/source/rusentiframes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
212
|
-
arekit/contrib/source/rusentiframes/collection.py,sha256=BQXk5G2fazW3Bn5xX5EXP4tA8tA6HbPft5P5gO3L_ew,5822
|
|
213
|
-
arekit/contrib/source/rusentiframes/effect.py,sha256=LqBf6e-d32MPMK7rdOEyO22lSbEv8hD9jkdyCYjUAo4,507
|
|
214
|
-
arekit/contrib/source/rusentiframes/io_utils.py,sha256=VUdg_XkDotsqOS9gmNYIpz60Zj1ALWXlUYm82-v3QM0,476
|
|
215
|
-
arekit/contrib/source/rusentiframes/labels_fmt.py,sha256=XsunvZaspVAqVxHzrvCdwo6fG3tDNaUpUDSybF1JSDw,876
|
|
216
|
-
arekit/contrib/source/rusentiframes/polarity.py,sha256=PWHKdmNjyD7-EB-eYQ8h4R2TAAbybAgEut5toICPEuk,953
|
|
217
|
-
arekit/contrib/source/rusentiframes/role.py,sha256=ri4EETg_YGygiCkoq9_waCHWhvtQwm_P4bEIF6DBrSQ,369
|
|
218
|
-
arekit/contrib/source/rusentiframes/state.py,sha256=J9tuPf52qNgrIj91wH8NHlA11kMwU7doP8aOyvmTMvw,505
|
|
219
|
-
arekit/contrib/source/rusentiframes/types.py,sha256=vgMK1CKdr9ytLaivD8seTYJbN4XOQxHVcvHlbEs5zAc,1348
|
|
220
|
-
arekit/contrib/source/rusentiframes/value.py,sha256=anL8KDWY4np0oKJb_49ord3Hi_TVViQgD1Stytuli_4,34
|
|
221
|
-
arekit/contrib/source/rusentrel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
222
|
-
arekit/contrib/source/rusentrel/const.py,sha256=C2-PlavM4PUaCtL0-J0AVIssseDD-mmAvsTjygKvvks,72
|
|
223
|
-
arekit/contrib/source/rusentrel/docs_reader.py,sha256=gPIzSaY5EQfRdRNOUyhWQGWgE_oIACoP3lUEj2uOjPI,2005
|
|
224
|
-
arekit/contrib/source/rusentrel/entities.py,sha256=olw6zNo4ii_cBqe0Ns2TmYa4_QWki6aUfw3EToSNvpU,1197
|
|
225
|
-
arekit/contrib/source/rusentrel/io_utils.py,sha256=Lxnru6PSNp_Jqti8FFaIOO7pzWqXx4GjIeow9wsG_kg,4105
|
|
226
|
-
arekit/contrib/source/rusentrel/labels_fmt.py,sha256=-l_0RvfQoPmAYC5F89o3t8Q_9LbL2hiAjH5Ip_Rrouc,544
|
|
227
|
-
arekit/contrib/source/rusentrel/synonyms.py,sha256=wjCPdJCL-pABYa3m7egDSYzUhsGFY9dzcZBX5b7MVC0,576
|
|
228
|
-
arekit/contrib/source/rusentrel/opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
229
|
-
arekit/contrib/source/rusentrel/opinions/collection.py,sha256=0rP9csiUZe8rgetS_XHFQtJJGvjhnhunprgLfzkf4sQ,1400
|
|
230
|
-
arekit/contrib/source/rusentrel/opinions/converter.py,sha256=I1jqfTAiCs1Gm9KP5l5R7QYhAgS_LIFYO9Cfm5i6Dto,1208
|
|
231
|
-
arekit/contrib/source/rusentrel/opinions/provider.py,sha256=oKJ3BVxrgquWJRkKuAawOy6pjSNzDOYefXaQARrefyA,1910
|
|
232
|
-
arekit/contrib/source/rusentrel/opinions/writer.py,sha256=dJicPj61HGrxwd0iELkykhtbwTrSWKeTc1KzlOPXG2w,1631
|
|
233
|
-
arekit/contrib/source/sentinerel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
234
|
-
arekit/contrib/source/sentinerel/entities.py,sha256=tvy6CsvlVrtsJQkslCyA8KeLo6ihbPpxdI9l1jngNF8,2682
|
|
235
|
-
arekit/contrib/source/sentinerel/io_utils.py,sha256=fQ5pc3FDjM7afWxObwavSX76KEucd8pIn7u9V5mFJ9M,2751
|
|
236
|
-
arekit/contrib/source/sentinerel/labels.py,sha256=xjQw5KWN8G6LfpVufvY63WFCmrJ8p2caCZNnWqKO5vo,550
|
|
237
|
-
arekit/contrib/source/sentinerel/labels_scaler.py,sha256=kHmooy876IAdX-ib-Ou7xVf0U3UWTDowG5uFP5DWNOY,1091
|
|
238
|
-
arekit/contrib/source/sentinerel/reader.py,sha256=ZMdGc5pLUkmS0YsvmVR-9rMzme0cvTegVSqvo7HMNfU,2048
|
|
239
|
-
arekit/contrib/source/sentinerel/folding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
240
|
-
arekit/contrib/source/sentinerel/folding/factory.py,sha256=juLkpX8f9khJGYHQH2eTCMhW74A5DFFNKBFfIvXpQZQ,1176
|
|
241
|
-
arekit/contrib/source/sentinerel/folding/fixed.py,sha256=vWDNidHdRjpPnjspzMhanmbh3S8vWJ9cszQT9mY-cqU,1772
|
|
242
|
-
arekit/contrib/source/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
243
|
-
arekit/contrib/source/synonyms/utils.py,sha256=j8iqGw6M3bxgdV4sGRtNj3NCnHSMokq8QSPITuKKsDU,547
|
|
244
165
|
arekit/contrib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
245
|
-
arekit/contrib/utils/
|
|
246
|
-
arekit/contrib/utils/resources.py,sha256=xEENsaid0NDBNJzHJvd7M_le5T0lihHjN9DKKuBgnEg,1217
|
|
247
|
-
arekit/contrib/utils/serializer.py,sha256=4cc9_asXjcWk-Y1lRtLJM736MYpKyZf9svYiQpdk5-M,1687
|
|
166
|
+
arekit/contrib/utils/serializer.py,sha256=D9LJ2ZXeVx3YntV-HqEnt32xW-s4GauwD97XRVlqr0g,1626
|
|
248
167
|
arekit/contrib/utils/bert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
249
168
|
arekit/contrib/utils/bert/samplers.py,sha256=ZVe3rbUAH0Jw1xR_yHE1DoUJf3CI0pDgbBQQzlLWevc,989
|
|
250
|
-
arekit/contrib/utils/connotations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
251
|
-
arekit/contrib/utils/connotations/rusentiframes_sentiment.py,sha256=DYxgoPiiQ0kKXx7Fy4HpozH71kXkaVwuAMU61m7BRGY,1062
|
|
252
169
|
arekit/contrib/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
253
170
|
arekit/contrib/utils/data/contents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
254
|
-
arekit/contrib/utils/data/contents/opinions.py,sha256=
|
|
171
|
+
arekit/contrib/utils/data/contents/opinions.py,sha256=MSV7NytEe15adKhhHCq5KiCj6ZBq31nV-u2rcSfFCgE,1738
|
|
255
172
|
arekit/contrib/utils/data/doc_provider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
256
173
|
arekit/contrib/utils/data/doc_provider/dict_based.py,sha256=zUOiiIbj5zby4xqMb0m9N-a6enavJJ7wFmPaGErykWU,371
|
|
257
174
|
arekit/contrib/utils/data/doc_provider/dir_based.py,sha256=FTw3kLV_CYtPoUoHl39IrP6RjLvTecCno9May95jVXw,1916
|
|
@@ -259,18 +176,20 @@ arekit/contrib/utils/data/readers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
|
|
|
259
176
|
arekit/contrib/utils/data/readers/base.py,sha256=zAsZLX5ng0_gb_ysL6wQchptmBHlNgqgQilw295Y5Aw,153
|
|
260
177
|
arekit/contrib/utils/data/readers/csv_pd.py,sha256=Ym49j04Z-_WQN-7xJMiiN1y2TIMnMDtPxy5h0mT3WBQ,1383
|
|
261
178
|
arekit/contrib/utils/data/readers/jsonl.py,sha256=c2bHwnTfNEwb1c8B9fRwaQyeze5x3nOd2UXXAp4MbxQ,426
|
|
179
|
+
arekit/contrib/utils/data/readers/sqlite.py,sha256=U1138XNCIwqycNivxwzwIUnowj3jDkP4M6J_Kvyedbc,416
|
|
262
180
|
arekit/contrib/utils/data/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
263
181
|
arekit/contrib/utils/data/service/balance.py,sha256=PgA5B6qSPmt8ITPLsQuCkniE8-u2NO_eQ2m-U9Akh98,1547
|
|
264
182
|
arekit/contrib/utils/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
265
183
|
arekit/contrib/utils/data/storages/jsonl_based.py,sha256=Oj5u8aW_UtVDSDxMpIQsgMlZlV-KBD0qVHPVVT3m8nA,450
|
|
266
184
|
arekit/contrib/utils/data/storages/pandas_based.py,sha256=m8z34tO_7NupYd_zQ4L1miTXJQkmMMB90zPFqEeYCNs,4301
|
|
267
|
-
arekit/contrib/utils/data/storages/row_cache.py,sha256=
|
|
185
|
+
arekit/contrib/utils/data/storages/row_cache.py,sha256=V1InYIqRf5WMWV_JndHNH9JzAjFS3ZL38f4_pDPLo_8,1985
|
|
186
|
+
arekit/contrib/utils/data/storages/sqlite_based.py,sha256=ARwVisVbPKBap_mVdpvTpp28iXgJbCJ3dAj41UYu03Q,609
|
|
268
187
|
arekit/contrib/utils/data/writers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
269
188
|
arekit/contrib/utils/data/writers/base.py,sha256=JLwf5WVl_U319sdMev8YOn4OoCcrgNIUZtrOuG1JLjI,766
|
|
270
189
|
arekit/contrib/utils/data/writers/csv_native.py,sha256=7fPxYeu9YDK8Cvjp1n-sbKT63ZuhDIEv3VwghHuKk5k,2252
|
|
271
190
|
arekit/contrib/utils/data/writers/csv_pd.py,sha256=WhBjDJCHUBy_TabngMF42Qicx0ye8xIus0m6c7qotto,1330
|
|
272
191
|
arekit/contrib/utils/data/writers/json_opennre.py,sha256=EkhXmONgtMe7A9VKrs9ElFHc8RoMumjFbkKfwuOVOoU,5067
|
|
273
|
-
arekit/contrib/utils/data/writers/sqlite_native.py,sha256=
|
|
192
|
+
arekit/contrib/utils/data/writers/sqlite_native.py,sha256=MnbLU8iPvYvpYgEbOXhBKH_G8DJs0W9iSuhr_TPKBAQ,4601
|
|
274
193
|
arekit/contrib/utils/embeddings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
275
194
|
arekit/contrib/utils/embeddings/rusvectores.py,sha256=WA0HejE2U5kgeBvh4_vty2QzoAkFXiMk94BK8FHxoxw,1931
|
|
276
195
|
arekit/contrib/utils/embeddings/tokens.py,sha256=z3lJ30JTX9zvZtPgzRl3yANECmuA1qboMDTcJsr_4E4,872
|
|
@@ -281,15 +200,7 @@ arekit/contrib/utils/entities/formatters/str_display.py,sha256=N8igv7EVaTFayvLXk
|
|
|
281
200
|
arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py,sha256=rEUIma9O3kOBWIguGtJ69JH-00Dhm0vUBOd5yNcKweY,653
|
|
282
201
|
arekit/contrib/utils/io_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
283
202
|
arekit/contrib/utils/io_utils/embedding.py,sha256=cBDRv_1LROJ262QaL3QVfGt2W9EvBfbh83oL41PJn60,2543
|
|
284
|
-
arekit/contrib/utils/io_utils/
|
|
285
|
-
arekit/contrib/utils/io_utils/samples.py,sha256=ZA3UeURysxeLbilXBNf2PuFoqOxjsXgx-BBMprS9vWw,2737
|
|
286
|
-
arekit/contrib/utils/io_utils/utils.py,sha256=NNuebsehP0eVCpqYoZE6hRsYgklKu89SEvm2VqSQvUw,1012
|
|
287
|
-
arekit/contrib/utils/lexicons/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
288
|
-
arekit/contrib/utils/lexicons/lexicon.py,sha256=VEyORMUzhqDm4lNbKU_ex9pRFHjZMwe2cw0V_TLCBNQ,1153
|
|
289
|
-
arekit/contrib/utils/lexicons/relation.py,sha256=PkF6FZsbQz617l5bqUvwZKC3uCYduxpoL-xdy9ZkwWY,1205
|
|
290
|
-
arekit/contrib/utils/lexicons/rusentilex.py,sha256=pA_M1OTRr1HpeUa27GuJEBrFsKUX6Vet0iGYqkcCoIY,954
|
|
291
|
-
arekit/contrib/utils/nn/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
292
|
-
arekit/contrib/utils/nn/rows.py,sha256=xN-AKBrfv-RC08DxKoSsDLMitXcQ-IW33gep-Av1T3I,3966
|
|
203
|
+
arekit/contrib/utils/io_utils/utils.py,sha256=310SIJTsNLn2OZrGPer9W4ZP52PHkjBK3zsyqxVs3h0,537
|
|
293
204
|
arekit/contrib/utils/np_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
294
205
|
arekit/contrib/utils/np_utils/embedding.py,sha256=G7Ls_ClzbskLLy-opRcVzQlfUfhdwbqoXgk0zoGrmHM,798
|
|
295
206
|
arekit/contrib/utils/np_utils/npz_utils.py,sha256=XoUHNmOlcr2X674R1xKGUJitEpFCIBJ8DOpNEPhtJFk,234
|
|
@@ -298,42 +209,19 @@ arekit/contrib/utils/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
298
209
|
arekit/contrib/utils/pipelines/opinion_collections.py,sha256=y9-klVJGCN9mPd7t1ECllAiCnAb3MKVXC1PnYddp5sQ,3195
|
|
299
210
|
arekit/contrib/utils/pipelines/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
300
211
|
arekit/contrib/utils/pipelines/items/sampling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
301
|
-
arekit/contrib/utils/pipelines/items/sampling/base.py,sha256
|
|
302
|
-
arekit/contrib/utils/pipelines/items/sampling/networks.py,sha256=
|
|
212
|
+
arekit/contrib/utils/pipelines/items/sampling/base.py,sha256=-H-r5GIi9ee7CxxpJs8KnHC91l7Y1dYaWPR_OK17E8g,4245
|
|
213
|
+
arekit/contrib/utils/pipelines/items/sampling/networks.py,sha256=E0EjQ4KRd3oYLFVbie05XJa00JqR26eLRoMrDnuQySQ,2653
|
|
303
214
|
arekit/contrib/utils/pipelines/items/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
304
|
-
arekit/contrib/utils/pipelines/items/text/entities_default.py,sha256=
|
|
305
|
-
arekit/contrib/utils/pipelines/items/text/frames.py,sha256=
|
|
306
|
-
arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py,sha256=
|
|
307
|
-
arekit/contrib/utils/pipelines/items/text/frames_negation.py,sha256=
|
|
308
|
-
arekit/contrib/utils/pipelines/items/text/
|
|
309
|
-
arekit/contrib/utils/pipelines/items/text/
|
|
310
|
-
arekit/contrib/utils/pipelines/items/text/translator.py,sha256=3JtvIVPhSCy1xxl2BEhbJ0kRtwNwFLJXEHe8PQEsLqI,5286
|
|
311
|
-
arekit/contrib/utils/pipelines/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
312
|
-
arekit/contrib/utils/pipelines/sources/nerel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
313
|
-
arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py,sha256=0japm9EJoBKawGtfgrHSMfICByQEvPlmEGZkCLv8W18,1105
|
|
314
|
-
arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py,sha256=Dc16ioq6SPuYHIHZaKuOrMkRR8QMcR0JCq_k1IYVY1I,3813
|
|
315
|
-
arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py,sha256=X410yjP6P4mhHDsAfUthPtzCjDbL64wu5HViQFkMjrw,2624
|
|
316
|
-
arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
317
|
-
arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py,sha256=gn3-pGkONybArufwRtH3xl0EcC2HuhmgbJnKGEjKff4,1246
|
|
318
|
-
arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py,sha256=BQ5FQfQHITWnTj24580PSXaOEmf5c8NHOwRuXDHS9qw,3835
|
|
319
|
-
arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py,sha256=uOJWpQyYBsESw1Z8tOle3MH0HZPVMPUbQxmE1EGdzjs,3457
|
|
320
|
-
arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
321
|
-
arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py,sha256=zzlbEMYnqMu9ezQ3AX-71LWIiE4Ln_v9xHTCLYrNvfs,2613
|
|
322
|
-
arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py,sha256=Yyyk-RbWtXCnBJ9tNyObZ4rHNICrl5Ho4wP2rtVGvBY,807
|
|
323
|
-
arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py,sha256=8CKusj0IyF4RZKNOV0wG4Hcbq34o4kCE_sZmfYjW9_Y,3513
|
|
324
|
-
arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
325
|
-
arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py,sha256=2_CtVC_pDr-SDbhuh7GqBY5XYvDQQPR5LV-cGSz_xi4,872
|
|
326
|
-
arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py,sha256=LS6DD9yRyuGk6NAOiJHukhO1QmQrKsXIKr8QqoA_c0E,5804
|
|
327
|
-
arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
328
|
-
arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py,sha256=LiIOa0p_MOmzVCrvMZR-sNalOUQb8iu_knIADC9pt4E,1304
|
|
329
|
-
arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py,sha256=6iROY3G455P_olxhxjhI2WgrOywbTpuNYO9wiNzPPck,1832
|
|
330
|
-
arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py,sha256=Kom_EwzTiQRggJFof1e1OI0RFJQSeylH3ad-HVnvwrA,9841
|
|
331
|
-
arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py,sha256=OzU65CVF4E_7JJRknexknFjRGB8qD9n25Nk-G4g91hM,1649
|
|
215
|
+
arekit/contrib/utils/pipelines/items/text/entities_default.py,sha256=vNx5ir2mf7a1gg_OeqUsf_p1Fu2k7QIFxVpe-CuwZ84,727
|
|
216
|
+
arekit/contrib/utils/pipelines/items/text/frames.py,sha256=pZQybYfgEQB1DM3PtmsgrtB2Xl0HejmP4rhT0nR_YKE,2586
|
|
217
|
+
arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py,sha256=4rIAAB-_GeWNbu5KyaDm5qttH4o2Bzpdvy-D9YR5bRk,1776
|
|
218
|
+
arekit/contrib/utils/pipelines/items/text/frames_negation.py,sha256=AdoY7lqSAT0RApp0DbqeI7xxyRVF6NPJLAfR59lsIec,1303
|
|
219
|
+
arekit/contrib/utils/pipelines/items/text/tokenizer.py,sha256=FmV5flziDLCNttxrUzRr-FGCcKK6venZEcZ-KwcqwNE,3147
|
|
220
|
+
arekit/contrib/utils/pipelines/items/text/translator.py,sha256=TkXVyZYRbS8P4S2Pnn2GzQMRa-9ba-nS4_zXvsf16vU,5365
|
|
332
221
|
arekit/contrib/utils/pipelines/text_opinion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
333
|
-
arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=
|
|
222
|
+
arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=QoK0-dfMl27uOOfUhvnbvzYX23jCpZbm97Qs27Na7VA,4133
|
|
334
223
|
arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
335
224
|
arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py,sha256=bwS-UR2x3rgp_xqnf6z-73T-eIZE_kltRSGYxgd_WpU,1751
|
|
336
|
-
arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py,sha256=h8usYdA-selpDdd55NLe1LsJLlI5Bf79e3DXCk4eFVc,3819
|
|
337
225
|
arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
338
226
|
arekit/contrib/utils/pipelines/text_opinion/filters/base.py,sha256=GnKnJB4MKqiMSJny3a9Na7l7Csm7abbt6GADBCY18Mw,143
|
|
339
227
|
arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py,sha256=3Pjq4IJJMT7dYpK266lN66WQJUnQO3P0rG6wcAvJOOA,649
|
|
@@ -357,18 +245,15 @@ arekit/contrib/utils/processing/pos/mystem_wrap.py,sha256=C9AnRIAZL4e8DMNte9LDuv
|
|
|
357
245
|
arekit/contrib/utils/processing/pos/russian.py,sha256=POCo6xKmK7vAEq-kWlODg611kLOtOj37OVc3L_GWL-8,229
|
|
358
246
|
arekit/contrib/utils/processing/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
359
247
|
arekit/contrib/utils/processing/text/tokens.py,sha256=_3u5Oy1MG_QfHH8wi0x0nA588qSaCp3Wmnp2SzMWjXY,3573
|
|
360
|
-
arekit/contrib/utils/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
361
|
-
arekit/contrib/utils/sources/sentinerel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
362
|
-
arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
363
|
-
arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py,sha256=rtKdSDdaiwrCcSIdvzUIZwjaOm68xoq0TWpy3ISe6o0,2747
|
|
364
248
|
arekit/contrib/utils/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
365
249
|
arekit/contrib/utils/synonyms/simple.py,sha256=ST9EwuWP88FzbyV8Gi0-biTPgGOsZ7OWyaBWHL_U_eo,557
|
|
366
250
|
arekit/contrib/utils/synonyms/stemmer_based.py,sha256=q19P_XOCWN2_JrBtybAt7ToMIr1ambw4ahr0fSEEHmQ,1400
|
|
367
251
|
arekit/contrib/utils/vectorizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
368
252
|
arekit/contrib/utils/vectorizers/bpe.py,sha256=bFS5MZytvU1L21YS5aAeb3FZl7RMjyog4lWwysvKD-8,3047
|
|
369
253
|
arekit/contrib/utils/vectorizers/random_norm.py,sha256=TL86Kz6p59lJqoLg8RwQRTvfhr0e-tiULGHhO4vhBbo,1339
|
|
370
|
-
arekit-0.
|
|
371
|
-
arekit-0.
|
|
372
|
-
arekit-0.
|
|
373
|
-
arekit-0.
|
|
374
|
-
arekit-0.
|
|
254
|
+
arekit-0.25.0.data/data/logo.png,sha256=S8OZ4MGGD72Pf5co7ngYbXKkJH1EUhbErUXv1ZjUWiU,45718
|
|
255
|
+
arekit-0.25.0.dist-info/LICENSE,sha256=JO9tIbxAvhwDv73cX-gUStr9yA-TY7wusUeLHRx7JuY,1076
|
|
256
|
+
arekit-0.25.0.dist-info/METADATA,sha256=4DSUy6aTidHG9jFR7jMwQe3uJGER-e8E9vU0q2G20Uo,3145
|
|
257
|
+
arekit-0.25.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
258
|
+
arekit-0.25.0.dist-info/top_level.txt,sha256=4pXuFE8IE0lBsqi6ZsR7figx0H939VIX4_-76YIbkOQ,7
|
|
259
|
+
arekit-0.25.0.dist-info/RECORD,,
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
from arekit.common.pipeline.items.base import BasePipelineItem
|
|
2
|
-
from arekit.common.text.partitioning.base import BasePartitioning
|
|
3
|
-
from arekit.common.pipeline.context import PipelineContext
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class SentenceObjectsParserPipelineItem(BasePipelineItem):
|
|
7
|
-
|
|
8
|
-
def __init__(self, partitioning):
|
|
9
|
-
assert(isinstance(partitioning, BasePartitioning))
|
|
10
|
-
self.__partitioning = partitioning
|
|
11
|
-
|
|
12
|
-
# region protected
|
|
13
|
-
|
|
14
|
-
def _get_text(self, pipeline_ctx):
|
|
15
|
-
return None
|
|
16
|
-
|
|
17
|
-
def _get_parts_provider_func(self, input_data, pipeline_ctx):
|
|
18
|
-
raise NotImplementedError()
|
|
19
|
-
|
|
20
|
-
# endregion
|
|
21
|
-
|
|
22
|
-
def apply_core(self, input_data, pipeline_ctx):
|
|
23
|
-
assert(isinstance(pipeline_ctx, PipelineContext))
|
|
24
|
-
external_input = self._get_text(pipeline_ctx)
|
|
25
|
-
actual_input = input_data if external_input is None else external_input
|
|
26
|
-
parts_it = self._get_parts_provider_func(input_data=actual_input, pipeline_ctx=pipeline_ctx)
|
|
27
|
-
return self.__partitioning.provide(text=actual_input, parts_it=parts_it)
|
|
28
|
-
|
|
29
|
-
# region base
|
|
30
|
-
|
|
31
|
-
def __enter__(self):
|
|
32
|
-
return self
|
|
33
|
-
|
|
34
|
-
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
35
|
-
pass
|
|
36
|
-
|
|
37
|
-
# endregion
|
arekit/common/text/parser.py
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
from arekit.common.pipeline.base import BasePipeline
|
|
2
|
-
from arekit.common.text.parsed import BaseParsedText
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class BaseTextParser(BasePipeline):
|
|
6
|
-
|
|
7
|
-
def run(self, input_data, params_dict=None, parent_ctx=None):
|
|
8
|
-
output_data = super(BaseTextParser, self).run(input_data=input_data,
|
|
9
|
-
params_dict=params_dict,
|
|
10
|
-
parent_ctx=parent_ctx)
|
|
11
|
-
|
|
12
|
-
return BaseParsedText(terms=output_data)
|