arekit 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arekit/common/context/terms_mapper.py +2 -2
- arekit/common/data/const.py +5 -4
- arekit/common/{experiment/api/ops_doc.py → data/doc_provider.py} +1 -1
- arekit/common/data/input/providers/columns/sample.py +6 -1
- arekit/common/data/input/providers/instances/base.py +1 -1
- arekit/common/data/input/providers/rows/base.py +36 -13
- arekit/common/data/input/providers/rows/samples.py +57 -55
- arekit/common/data/input/providers/sample/cropped.py +2 -2
- arekit/common/data/input/sample.py +1 -1
- arekit/common/data/rows_fmt.py +82 -0
- arekit/common/data/rows_parser.py +43 -0
- arekit/common/data/storages/base.py +23 -18
- arekit/common/data/views/samples.py +2 -8
- arekit/common/{news → docs}/base.py +2 -2
- arekit/common/{news → docs}/entities_grouping.py +2 -1
- arekit/common/{news → docs}/entity.py +2 -1
- arekit/common/{news → docs}/parsed/base.py +5 -5
- arekit/common/docs/parsed/providers/base.py +68 -0
- arekit/common/{news → docs}/parsed/providers/base_pairs.py +2 -2
- arekit/common/{news → docs}/parsed/providers/entity_service.py +27 -22
- arekit/common/{news → docs}/parsed/providers/opinion_pairs.py +2 -2
- arekit/common/{news → docs}/parsed/providers/text_opinion_pairs.py +6 -6
- arekit/common/docs/parsed/service.py +31 -0
- arekit/common/docs/parser.py +66 -0
- arekit/common/{news → docs}/sentence.py +1 -1
- arekit/common/entities/base.py +11 -2
- arekit/common/experiment/api/base_samples_io.py +1 -1
- arekit/common/frames/variants/collection.py +2 -2
- arekit/common/linkage/base.py +2 -2
- arekit/common/linkage/meta.py +23 -0
- arekit/common/linkage/opinions.py +1 -1
- arekit/common/linkage/text_opinions.py +2 -2
- arekit/common/opinions/annot/algo/base.py +1 -1
- arekit/common/opinions/annot/algo/pair_based.py +15 -13
- arekit/common/opinions/annot/algo/predefined.py +4 -4
- arekit/common/opinions/annot/algo_based.py +5 -5
- arekit/common/opinions/annot/base.py +3 -3
- arekit/common/opinions/base.py +7 -7
- arekit/common/opinions/collection.py +3 -3
- arekit/common/pipeline/base.py +12 -16
- arekit/common/pipeline/batching.py +28 -0
- arekit/common/pipeline/context.py +5 -1
- arekit/common/pipeline/items/base.py +38 -1
- arekit/common/pipeline/items/flatten.py +5 -1
- arekit/common/pipeline/items/handle.py +2 -1
- arekit/common/pipeline/items/iter.py +2 -1
- arekit/common/pipeline/items/map.py +2 -1
- arekit/common/pipeline/items/map_nested.py +4 -0
- arekit/common/pipeline/utils.py +32 -0
- arekit/common/service/sqlite.py +36 -0
- arekit/common/synonyms/base.py +2 -2
- arekit/common/text/{partitioning/str.py → partitioning.py} +16 -11
- arekit/common/text_opinions/base.py +11 -11
- arekit/common/utils.py +33 -46
- arekit/contrib/networks/embedding.py +3 -3
- arekit/contrib/networks/embedding_io.py +5 -5
- arekit/contrib/networks/input/const.py +0 -2
- arekit/contrib/networks/input/providers/sample.py +15 -29
- arekit/contrib/networks/input/rows_parser.py +47 -134
- arekit/contrib/prompt/sample.py +18 -16
- arekit/contrib/utils/data/contents/opinions.py +17 -5
- arekit/contrib/utils/data/doc_provider/dict_based.py +13 -0
- arekit/contrib/utils/data/{doc_ops → doc_provider}/dir_based.py +7 -7
- arekit/contrib/utils/data/readers/base.py +3 -0
- arekit/contrib/utils/data/readers/csv_pd.py +10 -4
- arekit/contrib/utils/data/readers/jsonl.py +3 -0
- arekit/contrib/utils/data/readers/sqlite.py +14 -0
- arekit/contrib/utils/data/service/balance.py +0 -1
- arekit/contrib/utils/data/storages/pandas_based.py +3 -5
- arekit/contrib/utils/data/storages/row_cache.py +18 -6
- arekit/contrib/utils/data/storages/sqlite_based.py +17 -0
- arekit/contrib/utils/data/writers/base.py +5 -0
- arekit/contrib/utils/data/writers/csv_native.py +3 -0
- arekit/contrib/utils/data/writers/csv_pd.py +3 -0
- arekit/contrib/utils/data/writers/json_opennre.py +31 -13
- arekit/contrib/utils/data/writers/sqlite_native.py +114 -0
- arekit/contrib/utils/io_utils/embedding.py +25 -33
- arekit/contrib/utils/io_utils/utils.py +3 -24
- arekit/contrib/utils/pipelines/items/sampling/base.py +31 -26
- arekit/contrib/utils/pipelines/items/sampling/networks.py +7 -10
- arekit/contrib/utils/pipelines/items/text/entities_default.py +2 -2
- arekit/contrib/utils/pipelines/items/text/frames.py +2 -3
- arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +3 -3
- arekit/contrib/utils/pipelines/items/text/frames_negation.py +2 -1
- arekit/contrib/utils/pipelines/items/text/tokenizer.py +3 -5
- arekit/contrib/utils/pipelines/items/text/translator.py +136 -0
- arekit/contrib/utils/pipelines/opinion_collections.py +5 -5
- arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +7 -7
- arekit/contrib/utils/pipelines/text_opinion/extraction.py +34 -22
- arekit/contrib/utils/pipelines/text_opinion/filters/base.py +1 -1
- arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py +1 -1
- arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py +3 -3
- arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py +4 -4
- arekit/contrib/utils/serializer.py +4 -23
- arekit-0.25.0.data/data/logo.png +0 -0
- arekit-0.25.0.dist-info/METADATA +82 -0
- arekit-0.25.0.dist-info/RECORD +259 -0
- {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/WHEEL +1 -1
- arekit/common/data/row_ids/base.py +0 -79
- arekit/common/data/row_ids/binary.py +0 -38
- arekit/common/data/row_ids/multiple.py +0 -14
- arekit/common/folding/base.py +0 -36
- arekit/common/folding/fixed.py +0 -42
- arekit/common/folding/nofold.py +0 -15
- arekit/common/folding/united.py +0 -46
- arekit/common/news/objects_parser.py +0 -37
- arekit/common/news/parsed/providers/base.py +0 -48
- arekit/common/news/parsed/service.py +0 -31
- arekit/common/news/parser.py +0 -34
- arekit/common/text/parser.py +0 -12
- arekit/common/text/partitioning/__init__.py +0 -0
- arekit/common/text/partitioning/base.py +0 -4
- arekit/common/text/partitioning/terms.py +0 -35
- arekit/contrib/source/__init__.py +0 -0
- arekit/contrib/source/brat/__init__.py +0 -0
- arekit/contrib/source/brat/annot.py +0 -83
- arekit/contrib/source/brat/entities/__init__.py +0 -0
- arekit/contrib/source/brat/entities/compound.py +0 -33
- arekit/contrib/source/brat/entities/entity.py +0 -42
- arekit/contrib/source/brat/entities/parser.py +0 -53
- arekit/contrib/source/brat/news.py +0 -28
- arekit/contrib/source/brat/opinions/__init__.py +0 -0
- arekit/contrib/source/brat/opinions/converter.py +0 -19
- arekit/contrib/source/brat/relation.py +0 -32
- arekit/contrib/source/brat/sentence.py +0 -69
- arekit/contrib/source/brat/sentences_reader.py +0 -128
- arekit/contrib/source/download.py +0 -41
- arekit/contrib/source/nerel/__init__.py +0 -0
- arekit/contrib/source/nerel/entities.py +0 -55
- arekit/contrib/source/nerel/folding/__init__.py +0 -0
- arekit/contrib/source/nerel/folding/fixed.py +0 -75
- arekit/contrib/source/nerel/io_utils.py +0 -62
- arekit/contrib/source/nerel/labels.py +0 -241
- arekit/contrib/source/nerel/reader.py +0 -46
- arekit/contrib/source/nerel/utils.py +0 -24
- arekit/contrib/source/nerel/versions.py +0 -12
- arekit/contrib/source/nerelbio/__init__.py +0 -0
- arekit/contrib/source/nerelbio/io_utils.py +0 -62
- arekit/contrib/source/nerelbio/labels.py +0 -265
- arekit/contrib/source/nerelbio/reader.py +0 -8
- arekit/contrib/source/nerelbio/versions.py +0 -8
- arekit/contrib/source/ruattitudes/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/collection.py +0 -36
- arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/entity/parser.py +0 -7
- arekit/contrib/source/ruattitudes/io_utils.py +0 -56
- arekit/contrib/source/ruattitudes/labels_fmt.py +0 -12
- arekit/contrib/source/ruattitudes/news.py +0 -51
- arekit/contrib/source/ruattitudes/news_brat.py +0 -44
- arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/opinions/base.py +0 -28
- arekit/contrib/source/ruattitudes/opinions/converter.py +0 -37
- arekit/contrib/source/ruattitudes/reader.py +0 -268
- arekit/contrib/source/ruattitudes/sentence.py +0 -73
- arekit/contrib/source/ruattitudes/synonyms.py +0 -17
- arekit/contrib/source/ruattitudes/text_object.py +0 -57
- arekit/contrib/source/rusentiframes/__init__.py +0 -0
- arekit/contrib/source/rusentiframes/collection.py +0 -157
- arekit/contrib/source/rusentiframes/effect.py +0 -24
- arekit/contrib/source/rusentiframes/io_utils.py +0 -19
- arekit/contrib/source/rusentiframes/labels_fmt.py +0 -22
- arekit/contrib/source/rusentiframes/polarity.py +0 -35
- arekit/contrib/source/rusentiframes/role.py +0 -15
- arekit/contrib/source/rusentiframes/state.py +0 -24
- arekit/contrib/source/rusentiframes/types.py +0 -42
- arekit/contrib/source/rusentiframes/value.py +0 -2
- arekit/contrib/source/rusentrel/__init__.py +0 -0
- arekit/contrib/source/rusentrel/const.py +0 -3
- arekit/contrib/source/rusentrel/entities.py +0 -26
- arekit/contrib/source/rusentrel/io_utils.py +0 -125
- arekit/contrib/source/rusentrel/labels_fmt.py +0 -12
- arekit/contrib/source/rusentrel/news_reader.py +0 -51
- arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
- arekit/contrib/source/rusentrel/opinions/collection.py +0 -30
- arekit/contrib/source/rusentrel/opinions/converter.py +0 -40
- arekit/contrib/source/rusentrel/opinions/provider.py +0 -54
- arekit/contrib/source/rusentrel/opinions/writer.py +0 -42
- arekit/contrib/source/rusentrel/synonyms.py +0 -17
- arekit/contrib/source/sentinerel/__init__.py +0 -0
- arekit/contrib/source/sentinerel/entities.py +0 -52
- arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
- arekit/contrib/source/sentinerel/folding/factory.py +0 -32
- arekit/contrib/source/sentinerel/folding/fixed.py +0 -73
- arekit/contrib/source/sentinerel/io_utils.py +0 -87
- arekit/contrib/source/sentinerel/labels.py +0 -53
- arekit/contrib/source/sentinerel/labels_scaler.py +0 -30
- arekit/contrib/source/sentinerel/reader.py +0 -42
- arekit/contrib/source/synonyms/__init__.py +0 -0
- arekit/contrib/source/synonyms/utils.py +0 -19
- arekit/contrib/source/zip_utils.py +0 -47
- arekit/contrib/utils/bert/rows.py +0 -0
- arekit/contrib/utils/bert/text_b_rus.py +0 -18
- arekit/contrib/utils/connotations/__init__.py +0 -0
- arekit/contrib/utils/connotations/rusentiframes_sentiment.py +0 -23
- arekit/contrib/utils/cv/__init__.py +0 -0
- arekit/contrib/utils/cv/doc_stat/__init__.py +0 -0
- arekit/contrib/utils/cv/doc_stat/base.py +0 -37
- arekit/contrib/utils/cv/doc_stat/sentence.py +0 -12
- arekit/contrib/utils/cv/splitters/__init__.py +0 -0
- arekit/contrib/utils/cv/splitters/base.py +0 -4
- arekit/contrib/utils/cv/splitters/default.py +0 -53
- arekit/contrib/utils/cv/splitters/statistical.py +0 -57
- arekit/contrib/utils/cv/two_class.py +0 -77
- arekit/contrib/utils/data/doc_ops/__init__.py +0 -0
- arekit/contrib/utils/data/doc_ops/dict_based.py +0 -13
- arekit/contrib/utils/data/ext.py +0 -31
- arekit/contrib/utils/data/views/__init__.py +0 -0
- arekit/contrib/utils/data/views/linkages/__init__.py +0 -0
- arekit/contrib/utils/data/views/linkages/base.py +0 -58
- arekit/contrib/utils/data/views/linkages/multilabel.py +0 -48
- arekit/contrib/utils/data/views/linkages/utils.py +0 -24
- arekit/contrib/utils/data/views/opinions.py +0 -14
- arekit/contrib/utils/download.py +0 -78
- arekit/contrib/utils/entities/formatters/str_rus_cased_fmt.py +0 -78
- arekit/contrib/utils/entities/formatters/str_rus_nocased_fmt.py +0 -15
- arekit/contrib/utils/entities/formatters/str_simple_fmt.py +0 -24
- arekit/contrib/utils/entities/formatters/str_simple_uppercase_fmt.py +0 -21
- arekit/contrib/utils/io_utils/opinions.py +0 -39
- arekit/contrib/utils/io_utils/samples.py +0 -78
- arekit/contrib/utils/lexicons/__init__.py +0 -0
- arekit/contrib/utils/lexicons/lexicon.py +0 -43
- arekit/contrib/utils/lexicons/relation.py +0 -45
- arekit/contrib/utils/lexicons/rusentilex.py +0 -34
- arekit/contrib/utils/nn/__init__.py +0 -0
- arekit/contrib/utils/nn/rows.py +0 -83
- arekit/contrib/utils/pipelines/items/sampling/bert.py +0 -5
- arekit/contrib/utils/pipelines/items/text/terms_splitter.py +0 -10
- arekit/contrib/utils/pipelines/items/to_output.py +0 -101
- arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/doc_ops.py +0 -27
- arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +0 -59
- arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +0 -60
- arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/doc_ops.py +0 -29
- arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +0 -59
- arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +0 -79
- arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/doc_ops.py +0 -56
- arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +0 -19
- arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +0 -58
- arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/doc_ops.py +0 -21
- arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +0 -100
- arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/doc_ops.py +0 -29
- arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +0 -62
- arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +0 -175
- arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +0 -50
- arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +0 -88
- arekit/contrib/utils/resources.py +0 -26
- arekit/contrib/utils/sources/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +0 -63
- arekit/contrib/utils/utils_folding.py +0 -19
- arekit/download_data.py +0 -11
- arekit-0.23.1.dist-info/METADATA +0 -23
- arekit-0.23.1.dist-info/RECORD +0 -403
- /arekit/common/{data/row_ids → docs}/__init__.py +0 -0
- /arekit/common/{folding → docs/parsed}/__init__.py +0 -0
- /arekit/common/{news → docs/parsed/providers}/__init__.py +0 -0
- /arekit/common/{news → docs}/parsed/term_position.py +0 -0
- /arekit/common/{news/parsed → service}/__init__.py +0 -0
- /arekit/{common/news/parsed/providers → contrib/utils/data/doc_provider}/__init__.py +0 -0
- {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/LICENSE +0 -0
- {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
from collections.abc import Iterable
|
|
2
2
|
|
|
3
3
|
from arekit.common.labels.scaler.base import BaseLabelScaler
|
|
4
4
|
from arekit.common.linkage.base import LinkedDataWrapper
|
|
@@ -14,11 +14,11 @@ def __create_labeled_opinion(item, label):
|
|
|
14
14
|
assert(isinstance(item, Opinion))
|
|
15
15
|
return Opinion(source_value=item.SourceValue,
|
|
16
16
|
target_value=item.TargetValue,
|
|
17
|
-
|
|
17
|
+
label=label)
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
def __linkages_to_opinions(linkages_iter, labels_helper, label_calc_mode):
|
|
21
|
-
assert(isinstance(linkages_iter,
|
|
21
|
+
assert(isinstance(linkages_iter, Iterable))
|
|
22
22
|
|
|
23
23
|
for linkage in linkages_iter:
|
|
24
24
|
assert(isinstance(linkage, LinkedDataWrapper))
|
|
@@ -31,7 +31,7 @@ def __linkages_to_opinions(linkages_iter, labels_helper, label_calc_mode):
|
|
|
31
31
|
|
|
32
32
|
|
|
33
33
|
def __fill_opinion_collection(opinions_iter, collection, supported_labels):
|
|
34
|
-
assert(isinstance(opinions_iter,
|
|
34
|
+
assert(isinstance(opinions_iter, Iterable))
|
|
35
35
|
assert(isinstance(collection, OpinionCollection))
|
|
36
36
|
assert(isinstance(supported_labels, set) or supported_labels is None)
|
|
37
37
|
|
|
@@ -39,7 +39,7 @@ def __fill_opinion_collection(opinions_iter, collection, supported_labels):
|
|
|
39
39
|
assert(isinstance(opinion, Opinion))
|
|
40
40
|
|
|
41
41
|
if supported_labels is not None:
|
|
42
|
-
if opinion.
|
|
42
|
+
if opinion.Label not in supported_labels:
|
|
43
43
|
continue
|
|
44
44
|
|
|
45
45
|
if collection.has_synonymous_opinion(opinion):
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from arekit.common.
|
|
2
|
-
from arekit.common.
|
|
1
|
+
from arekit.common.docs.parsed.providers.text_opinion_pairs import TextOpinionPairsProvider
|
|
2
|
+
from arekit.common.docs.parsed.service import ParsedDocumentService
|
|
3
3
|
from arekit.common.opinions.annot.algo_based import AlgorithmBasedOpinionAnnotator
|
|
4
4
|
|
|
5
5
|
|
|
@@ -21,14 +21,14 @@ class AlgorithmBasedTextOpinionAnnotator(AlgorithmBasedOpinionAnnotator):
|
|
|
21
21
|
get_doc_existed_opinions_func=get_doc_existed_opinions_func)
|
|
22
22
|
self.__value_to_group_id_func = value_to_group_id_func
|
|
23
23
|
|
|
24
|
-
def __create_service(self,
|
|
25
|
-
return
|
|
24
|
+
def __create_service(self, parsed_doc):
|
|
25
|
+
return ParsedDocumentService(parsed_doc=parsed_doc, providers=[
|
|
26
26
|
TextOpinionPairsProvider(self.__value_to_group_id_func)
|
|
27
27
|
])
|
|
28
28
|
|
|
29
|
-
def annotate_collection(self,
|
|
30
|
-
service = self.__create_service(
|
|
29
|
+
def annotate_collection(self, parsed_doc):
|
|
30
|
+
service = self.__create_service(parsed_doc)
|
|
31
31
|
topp = service.get_provider(TextOpinionPairsProvider.NAME)
|
|
32
|
-
for opinion in super(AlgorithmBasedTextOpinionAnnotator, self).annotate_collection(
|
|
32
|
+
for opinion in super(AlgorithmBasedTextOpinionAnnotator, self).annotate_collection(parsed_doc):
|
|
33
33
|
for text_opinion in topp.iter_from_opinion(opinion):
|
|
34
34
|
yield text_opinion
|
|
@@ -1,39 +1,45 @@
|
|
|
1
|
+
from arekit.common.linkage.meta import MetaEmptyLinkedDataWrapper
|
|
1
2
|
from arekit.common.linkage.text_opinions import TextOpinionsLinkage
|
|
2
|
-
from arekit.common.
|
|
3
|
-
from arekit.common.
|
|
4
|
-
from arekit.common.
|
|
5
|
-
from arekit.common.
|
|
6
|
-
from arekit.common.pipeline.base import BasePipeline
|
|
3
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
4
|
+
from arekit.common.docs.parsed.providers.entity_service import EntityServiceProvider
|
|
5
|
+
from arekit.common.docs.parsed.service import ParsedDocumentService
|
|
6
|
+
from arekit.common.docs.parser import DocumentParsers
|
|
7
7
|
from arekit.common.pipeline.items.flatten import FlattenIterPipelineItem
|
|
8
8
|
from arekit.common.pipeline.items.map import MapPipelineItem
|
|
9
9
|
from arekit.common.pipeline.items.map_nested import MapNestedPipelineItem
|
|
10
|
-
from arekit.common.text.parser import BaseTextParser
|
|
11
10
|
from arekit.common.text_opinions.base import TextOpinion
|
|
12
11
|
from arekit.contrib.utils.pipelines.text_opinion.filters.base import TextOpinionFilter
|
|
13
12
|
from arekit.contrib.utils.pipelines.text_opinion.filters.limitation import FrameworkLimitationsTextOpinionFilter
|
|
14
13
|
|
|
15
14
|
|
|
16
|
-
def __iter_text_opinion_linkages(
|
|
15
|
+
def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
|
|
16
|
+
text_opinion_filters, use_meta):
|
|
17
|
+
""" use_meta: bool
|
|
18
|
+
this is mainly for tqdm and other console parameters to stay up-to-date
|
|
19
|
+
with the state in the case we do not have that much output results
|
|
20
|
+
across multiple amount of documents.
|
|
21
|
+
"""
|
|
17
22
|
assert(isinstance(annotators, list))
|
|
18
|
-
assert(isinstance(
|
|
23
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
19
24
|
assert(isinstance(text_opinion_filters, list))
|
|
25
|
+
assert(isinstance(use_meta, bool))
|
|
20
26
|
|
|
21
27
|
def __to_id(text_opinion):
|
|
22
28
|
return "{}_{}".format(text_opinion.SourceId, text_opinion.TargetId)
|
|
23
29
|
|
|
24
|
-
service =
|
|
30
|
+
service = ParsedDocumentService(parsed_doc=parsed_doc, providers=[EntityServiceProvider(entity_index_func)])
|
|
25
31
|
esp = service.get_provider(EntityServiceProvider.NAME)
|
|
26
32
|
|
|
27
33
|
predefined = set()
|
|
28
34
|
|
|
29
35
|
for annotator in annotators:
|
|
30
|
-
for text_opinion in annotator.annotate_collection(
|
|
36
|
+
for text_opinion in annotator.annotate_collection(parsed_doc=parsed_doc):
|
|
31
37
|
assert(isinstance(text_opinion, TextOpinion))
|
|
32
38
|
|
|
33
39
|
passed = True
|
|
34
40
|
for f in text_opinion_filters:
|
|
35
41
|
assert(isinstance(f, TextOpinionFilter))
|
|
36
|
-
if not f.filter(text_opinion=text_opinion,
|
|
42
|
+
if not f.filter(text_opinion=text_opinion, parsed_doc=parsed_doc, entity_service_provider=esp):
|
|
37
43
|
passed = False
|
|
38
44
|
break
|
|
39
45
|
|
|
@@ -51,28 +57,34 @@ def __iter_text_opinion_linkages(parsed_news, annotators, text_opinion_filters):
|
|
|
51
57
|
text_opinion_linkage.set_tag(service)
|
|
52
58
|
yield text_opinion_linkage
|
|
53
59
|
|
|
60
|
+
# This is the case to consider the end of the document.
|
|
61
|
+
if use_meta:
|
|
62
|
+
yield MetaEmptyLinkedDataWrapper(doc_id=parsed_doc.RelatedDocID)
|
|
54
63
|
|
|
55
|
-
|
|
56
|
-
|
|
64
|
+
|
|
65
|
+
def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotators, entity_index_func,
|
|
66
|
+
text_opinion_filters=None, use_meta_between_docs=True):
|
|
57
67
|
assert(callable(get_doc_by_id_func))
|
|
58
68
|
assert(isinstance(annotators, list))
|
|
59
69
|
assert(isinstance(text_opinion_filters, list) or text_opinion_filters is None)
|
|
70
|
+
assert(isinstance(use_meta_between_docs, bool))
|
|
60
71
|
|
|
61
72
|
extra_filters = [] if text_opinion_filters is None else text_opinion_filters
|
|
62
73
|
actual_text_opinion_filters = [FrameworkLimitationsTextOpinionFilter()] + extra_filters
|
|
63
74
|
|
|
64
|
-
return
|
|
65
|
-
# (doc_id) -> (
|
|
75
|
+
return [
|
|
76
|
+
# (doc_id) -> (doc)
|
|
66
77
|
MapPipelineItem(map_func=lambda doc_id: get_doc_by_id_func(doc_id)),
|
|
67
78
|
|
|
68
|
-
# (
|
|
69
|
-
MapNestedPipelineItem(map_func=lambda
|
|
70
|
-
|
|
79
|
+
# (doc, ppl_ctx) -> (parsed_doc)
|
|
80
|
+
MapNestedPipelineItem(map_func=lambda doc, ppl_ctx: DocumentParsers.parse(
|
|
81
|
+
doc=doc, pipeline_items=pipeline_items, parent_ppl_ctx=ppl_ctx)),
|
|
71
82
|
|
|
72
|
-
# (
|
|
73
|
-
MapPipelineItem(map_func=lambda
|
|
74
|
-
annotators=annotators,
|
|
83
|
+
# (parsed_doc) -> (text_opinions)
|
|
84
|
+
MapPipelineItem(map_func=lambda parsed_doc: __iter_text_opinion_linkages(
|
|
85
|
+
annotators=annotators, parsed_doc=parsed_doc, entity_index_func=entity_index_func,
|
|
86
|
+
text_opinion_filters=actual_text_opinion_filters, use_meta=use_meta_between_docs)),
|
|
75
87
|
|
|
76
88
|
# linkages[] -> linkages
|
|
77
89
|
FlattenIterPipelineItem()
|
|
78
|
-
]
|
|
90
|
+
]
|
|
@@ -8,7 +8,7 @@ class DistanceLimitedTextOpinionFilter(TextOpinionFilter):
|
|
|
8
8
|
super(DistanceLimitedTextOpinionFilter, self).__init__()
|
|
9
9
|
self.__terms_per_context = terms_per_context
|
|
10
10
|
|
|
11
|
-
def filter(self, text_opinion,
|
|
11
|
+
def filter(self, text_opinion, parsed_doc, entity_service_provider):
|
|
12
12
|
|
|
13
13
|
return InputSampleBase.check_ability_to_create_sample(
|
|
14
14
|
entity_service=entity_service_provider,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from arekit.common.entities.types import OpinionEntityType
|
|
2
|
-
from arekit.common.
|
|
2
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
3
3
|
from arekit.common.text_opinions.base import TextOpinion
|
|
4
4
|
|
|
5
5
|
from arekit.contrib.utils.entities.filter import EntityFilter
|
|
@@ -13,9 +13,9 @@ class EntityBasedTextOpinionFilter(TextOpinionFilter):
|
|
|
13
13
|
assert(isinstance(entity_filter, EntityFilter) or entity_filter is None)
|
|
14
14
|
self.__entity_filter = entity_filter
|
|
15
15
|
|
|
16
|
-
def filter(self, text_opinion,
|
|
16
|
+
def filter(self, text_opinion, parsed_doc, entity_service_provider):
|
|
17
17
|
assert(isinstance(text_opinion, TextOpinion))
|
|
18
|
-
assert(isinstance(
|
|
18
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
19
19
|
|
|
20
20
|
if self.__entity_filter is not None:
|
|
21
21
|
e_source = entity_service_provider._doc_entities[text_opinion.SourceId]
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from arekit.common.
|
|
2
|
-
from arekit.common.
|
|
1
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
2
|
+
from arekit.common.docs.parsed.term_position import TermPositionTypes
|
|
3
3
|
from arekit.common.text_opinions.base import TextOpinion
|
|
4
4
|
from arekit.contrib.utils.pipelines.text_opinion.filters.base import TextOpinionFilter
|
|
5
5
|
|
|
@@ -10,9 +10,9 @@ class FrameworkLimitationsTextOpinionFilter(TextOpinionFilter):
|
|
|
10
10
|
It is require to hide and provide known limitations.
|
|
11
11
|
"""
|
|
12
12
|
|
|
13
|
-
def filter(self, text_opinion,
|
|
13
|
+
def filter(self, text_opinion, parsed_doc, entity_service_provider):
|
|
14
14
|
assert(isinstance(text_opinion, TextOpinion))
|
|
15
|
-
assert(isinstance(
|
|
15
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
16
16
|
|
|
17
17
|
s_ind = entity_service_provider.get_entity_position(
|
|
18
18
|
text_opinion.SourceId, position_type=TermPositionTypes.SentenceIndex)
|
|
@@ -1,17 +1,13 @@
|
|
|
1
|
-
import collections
|
|
2
1
|
import logging
|
|
3
2
|
|
|
4
|
-
from
|
|
3
|
+
from collections.abc import Iterable
|
|
5
4
|
|
|
6
5
|
from arekit.common.data.input.providers.columns.sample import SampleColumnsProvider
|
|
7
6
|
from arekit.common.data.input.providers.rows.base import BaseRowProvider
|
|
8
7
|
from arekit.common.data.input.repositories.base import BaseInputRepository
|
|
9
8
|
from arekit.common.data.input.repositories.sample import BaseInputSamplesRepository
|
|
10
9
|
from arekit.common.data.storages.base import BaseRowsStorage
|
|
11
|
-
from arekit.common.pipeline.base import BasePipeline
|
|
12
10
|
from arekit.contrib.utils.data.contents.opinions import InputTextOpinionProvider
|
|
13
|
-
from arekit.contrib.utils.data.readers.csv_pd import PandasCsvReader
|
|
14
|
-
from arekit.contrib.utils.data.service.balance import PandasBasedStorageBalancing
|
|
15
11
|
|
|
16
12
|
logger = logging.getLogger(__name__)
|
|
17
13
|
logging.basicConfig(level=logging.INFO)
|
|
@@ -30,11 +26,10 @@ class InputDataSerializationHelper(object):
|
|
|
30
26
|
storage=storage)
|
|
31
27
|
|
|
32
28
|
@staticmethod
|
|
33
|
-
def fill_and_write(pipeline, repo, target, writer, doc_ids_iter, desc=""
|
|
34
|
-
assert(isinstance(pipeline,
|
|
35
|
-
assert(isinstance(doc_ids_iter,
|
|
29
|
+
def fill_and_write(pipeline, repo, target, writer, doc_ids_iter, desc=""):
|
|
30
|
+
assert(isinstance(pipeline, list))
|
|
31
|
+
assert(isinstance(doc_ids_iter, Iterable))
|
|
36
32
|
assert(isinstance(repo, BaseInputRepository))
|
|
37
|
-
assert(isinstance(do_balance, bool))
|
|
38
33
|
|
|
39
34
|
doc_ids = list(doc_ids_iter)
|
|
40
35
|
|
|
@@ -45,17 +40,3 @@ class InputDataSerializationHelper(object):
|
|
|
45
40
|
target=target)
|
|
46
41
|
|
|
47
42
|
repo.push(writer=writer, target=target)
|
|
48
|
-
|
|
49
|
-
if do_balance:
|
|
50
|
-
|
|
51
|
-
# We perform a complete and clean data reading from scratch.
|
|
52
|
-
reader = PandasCsvReader()
|
|
53
|
-
balanced_storage = PandasBasedStorageBalancing.create_balanced_from(
|
|
54
|
-
storage=reader.read(target=target), column_name=const.LABEL, free_origin=True)
|
|
55
|
-
|
|
56
|
-
# Initializing the new repository instance.
|
|
57
|
-
repo = BaseInputSamplesRepository(columns_provider=repo._columns_provider,
|
|
58
|
-
rows_provider=repo._rows_provider,
|
|
59
|
-
storage=balanced_storage)
|
|
60
|
-
|
|
61
|
-
repo.push(writer=writer, target=target)
|
|
Binary file
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: arekit
|
|
3
|
+
Version: 0.25.0
|
|
4
|
+
Summary: Document level Attitude and Relation Extraction toolkit (AREkit) for sampling and prompting mass-media news into datasets for ML-model training
|
|
5
|
+
Home-page: https://github.com/nicolay-r/AREkit
|
|
6
|
+
Author: Nicolay Rusnachenko
|
|
7
|
+
Author-email: rusnicolay@gmail.com
|
|
8
|
+
License: MIT License
|
|
9
|
+
Keywords: natural language processing,relation extraction,sentiment analysis
|
|
10
|
+
Classifier: Programming Language :: Python
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.6
|
|
12
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
14
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
15
|
+
Requires-Python: >=3.6
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
License-File: LICENSE
|
|
18
|
+
Requires-Dist: tqdm
|
|
19
|
+
Requires-Dist: enum34==1.1.10
|
|
20
|
+
Requires-Dist: numpy>=1.14.5
|
|
21
|
+
Requires-Dist: pymystem3==0.2.0
|
|
22
|
+
|
|
23
|
+
# AREkit 0.25.0
|
|
24
|
+
|
|
25
|
+

|
|
26
|
+
|
|
27
|
+
<p align="center">
|
|
28
|
+
<img src="logo.png"/>
|
|
29
|
+
</p>
|
|
30
|
+
|
|
31
|
+
**AREkit** (Attitude and Relation Extraction Toolkit) --
|
|
32
|
+
is a python toolkit, devoted to document level Attitude and Relation Extraction between text objects from mass-media news.
|
|
33
|
+
|
|
34
|
+
## Description
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
This toolkit aims at memory-effective data processing in Relation Extraction (RE) related tasks.
|
|
38
|
+
|
|
39
|
+
<p align="center">
|
|
40
|
+
<img src="docs/arekit-pipeline-concept.png"/>
|
|
41
|
+
</p>
|
|
42
|
+
|
|
43
|
+
> Figure: AREkit pipelines design. More on
|
|
44
|
+
> **[ARElight: Context Sampling of Large Texts for Deep Learning Relation Extraction](https://link.springer.com/chapter/10.1007/978-3-031-56069-9_23)** paper
|
|
45
|
+
|
|
46
|
+
In particular, this framework serves the following features:
|
|
47
|
+
* ➿ [pipelines](https://github.com/nicolay-r/AREkit/wiki/Pipelines:-Text-Opinion-Annotation) and iterators for handling large-scale collections serialization without out-of-memory issues.
|
|
48
|
+
* 🔗 EL (entity-linking) API support for objects,
|
|
49
|
+
* ➰ avoidance of cyclic connections,
|
|
50
|
+
* :straight_ruler: distance consideration between relation participants (in `terms` or `sentences`),
|
|
51
|
+
* 📑 relations annotations and filtering rules,
|
|
52
|
+
* *️⃣ entities formatting or masking, and more.
|
|
53
|
+
|
|
54
|
+
The core functionality includes:
|
|
55
|
+
* API for document presentation with EL (Entity Linking, i.e. Object Synonymy) support
|
|
56
|
+
for sentence level relations preparation (dubbed as contexts);
|
|
57
|
+
* API for contexts extraction;
|
|
58
|
+
* Relations transferring from sentence-level onto document-level, and more.
|
|
59
|
+
|
|
60
|
+
## Installation
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install git+https://github.com/nicolay-r/AREkit.git@0.25.0-rc
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Usage
|
|
67
|
+
|
|
68
|
+
Please follow the **[tutorial section on project Wiki](https://github.com/nicolay-r/AREkit/wiki/Tutorials)** for mode details.
|
|
69
|
+
|
|
70
|
+
## How to cite
|
|
71
|
+
A great research is also accompanied by the faithful reference.
|
|
72
|
+
if you use or extend our work, please cite as follows:
|
|
73
|
+
|
|
74
|
+
```bibtex
|
|
75
|
+
@inproceedings{rusnachenko2024arelight,
|
|
76
|
+
title={ARElight: Context Sampling of Large Texts for Deep Learning Relation Extraction},
|
|
77
|
+
author={Rusnachenko, Nicolay and Liang, Huizhi and Kolomeets, Maxim and Shi, Lei},
|
|
78
|
+
booktitle={European Conference on Information Retrieval},
|
|
79
|
+
year={2024},
|
|
80
|
+
organization={Springer}
|
|
81
|
+
}
|
|
82
|
+
```
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
arekit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
arekit/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
arekit/common/bound.py,sha256=lPpHY6ct_CU9e4qXeYjhJfWbTj6Sb_NVtZ1CJheQPNE,1402
|
|
4
|
+
arekit/common/log_utils.py,sha256=OfEQxbExkuRAl9dxlgFEqcFhI4HHoMYT7WE8ud0IPOM,924
|
|
5
|
+
arekit/common/utils.py,sha256=eVRGhRy882ow-63Glncc3pJ-_43KSI0ukBePjC8ogAY,2394
|
|
6
|
+
arekit/common/context/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
arekit/common/context/terms_mapper.py,sha256=QA02Cv7D2JKTlXkez_0w0J8HuvNziNF2vrqLgy4Bwc8,1447
|
|
8
|
+
arekit/common/context/token.py,sha256=CpWAlvprUnJfCtYvO8lwdfU_ofSKAOGOudXTwppyzSk,459
|
|
9
|
+
arekit/common/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
+
arekit/common/data/const.py,sha256=J74zim3CGJlLJp-AVn5z9TTuBfmttjiM_8sRW1Pc-iE,457
|
|
11
|
+
arekit/common/data/doc_provider.py,sha256=KU6Q2-B8_cUuFhSBHYp-cDI8OCwFk3fwOahv2QLIR2c,149
|
|
12
|
+
arekit/common/data/rows_fmt.py,sha256=klq9HdzSnhbRBhOw7O4ctp3PZ5L6ZVy-0eIV2vLLYY8,2694
|
|
13
|
+
arekit/common/data/rows_parser.py,sha256=qYSEETvhX_0_JuAqm0bjK_V28_53qq7OY9JAnBdRC78,1513
|
|
14
|
+
arekit/common/data/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
+
arekit/common/data/input/sample.py,sha256=6JeGxsLbEUXVKPWA1hIlkTDNOaYg4bHCJWw0ULrLByg,2143
|
|
16
|
+
arekit/common/data/input/terms_mapper.py,sha256=DUOMbGwiQETY7qhztoU8uU30d1cQPsIsgNLldpjcufg,3197
|
|
17
|
+
arekit/common/data/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
+
arekit/common/data/input/providers/const.py,sha256=GDvPkgP7hllHW3QiueMBQgQyu2CtNFI4JYNNja2Im6Q,187
|
|
19
|
+
arekit/common/data/input/providers/contents.py,sha256=jT1LJE_5Igw5H2e1jKsWWciHSbPVg649phT177SzhEA,261
|
|
20
|
+
arekit/common/data/input/providers/columns/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
+
arekit/common/data/input/providers/columns/base.py,sha256=Ar4GkC1L8YFMgeVNM-pIkOOUvKqf2CgIIdh5DA0V8uI,225
|
|
22
|
+
arekit/common/data/input/providers/columns/sample.py,sha256=3onDT6LGkFwU3GOAm6M1MvgjD3fEgapTslAV6-9gvIE,1756
|
|
23
|
+
arekit/common/data/input/providers/instances/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
+
arekit/common/data/input/providers/instances/base.py,sha256=ybaHQNRpuebdHGU937yzkgZ0E7mO-S7Dm41NwFj44ew,420
|
|
25
|
+
arekit/common/data/input/providers/instances/multiple.py,sha256=6agaTA3srLiLEhBTU0RnD01GUFqMcsITV5NjVkUgR10,1144
|
|
26
|
+
arekit/common/data/input/providers/instances/single.py,sha256=bZKIn_Kw79c8pH1a3aUq1dmOsDu__BoFwQDLGjEtg5I,253
|
|
27
|
+
arekit/common/data/input/providers/label/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
+
arekit/common/data/input/providers/label/base.py,sha256=1MOCKw_OP_IbYT5OR5C3b9VZdYnLGg-TxPc_qHpuZJs,620
|
|
29
|
+
arekit/common/data/input/providers/label/binary.py,sha256=jPD6Jn8DYMrdI3jN8ueoWvuGMouUKbelmI07sP9Wau4,337
|
|
30
|
+
arekit/common/data/input/providers/label/multiple.py,sha256=HWbHF_CwwbiLQbYm5dgvnXAm0b6tJOyFYFEUBxuWAqI,492
|
|
31
|
+
arekit/common/data/input/providers/rows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
|
+
arekit/common/data/input/providers/rows/base.py,sha256=syH7ZEW3Agwfb1IR0G7n_Amy3Kkg0EZk2V7kH3r7ADg,2517
|
|
33
|
+
arekit/common/data/input/providers/rows/samples.py,sha256=uqLTP8fnz-0wC7ALLlIDUYtXTG4OpnRqp70Fgv_1Iiw,9427
|
|
34
|
+
arekit/common/data/input/providers/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
+
arekit/common/data/input/providers/sample/cropped.py,sha256=jJSos4Si-qy-wb-QmomXxxgURR1UhJnvY0tZoowlfVc,1885
|
|
36
|
+
arekit/common/data/input/providers/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
+
arekit/common/data/input/providers/text/single.py,sha256=vm3sShIYZcmses-hmZX9cOfveWXCYGwvKLgQ0qs3VXQ,1604
|
|
38
|
+
arekit/common/data/input/repositories/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
+
arekit/common/data/input/repositories/base.py,sha256=4DmLVORc85gu6bxtXVZgxi176NxnIaqHz2tVebMyGZ8,2557
|
|
40
|
+
arekit/common/data/input/repositories/sample.py,sha256=LAdpaA1N_nq1iInLwkWQVvL6HGH64JYWSJ9tywU0llY,784
|
|
41
|
+
arekit/common/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
+
arekit/common/data/storages/base.py,sha256=L9OLpVOZwlAXZION0YP1T6ZN1t_dfQpnAPAU4ztSs48,2956
|
|
43
|
+
arekit/common/data/views/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
|
+
arekit/common/data/views/samples.py,sha256=LDqUDqArGt90ujRB4kDFgDHLmR2_AQoUnzhxpXYWYaM,882
|
|
45
|
+
arekit/common/docs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
|
+
arekit/common/docs/base.py,sha256=uXUOtpR9BEsDBfDHg4eLqOjfSVOV_o9VPii3nSxLZuY,734
|
|
47
|
+
arekit/common/docs/entities_grouping.py,sha256=_r254fNr0j6BjHuLZBLjj21yWm4_k__5aOcBXcAaQUQ,704
|
|
48
|
+
arekit/common/docs/entity.py,sha256=TxrZMdIEgjk-PgCyskCkVis2KAw_M7vTBp3ppP6G05M,662
|
|
49
|
+
arekit/common/docs/parser.py,sha256=514lQNrZiwU_mxgyuWBkDhqjS5SVAvcIHx9GQUTuVG8,2883
|
|
50
|
+
arekit/common/docs/sentence.py,sha256=nZCCFj2yk71POoXCBfEMN3pteM2qQdj60eEzxMVY_3k,302
|
|
51
|
+
arekit/common/docs/parsed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
|
+
arekit/common/docs/parsed/base.py,sha256=WPstqOpBuLKjtz6UO_bI0DpOPF3Sm0wYEVwjtldbPXE,3175
|
|
53
|
+
arekit/common/docs/parsed/service.py,sha256=fSzwtRcSvmvlW8LyK6XPf7wJAx66GWlbRgH_3oQf-BU,1029
|
|
54
|
+
arekit/common/docs/parsed/term_position.py,sha256=H9eQQeanLxwP6og30TQUnpcXymGEPwXClRpaE8VnpLs,1040
|
|
55
|
+
arekit/common/docs/parsed/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
|
+
arekit/common/docs/parsed/providers/base.py,sha256=IjnG7c6Q78cYYAPTrwuZCOiMQDfMaujDQ6U0gK7JCcw,2587
|
|
57
|
+
arekit/common/docs/parsed/providers/base_pairs.py,sha256=RDYjspkENPQU2pn7Jp5mFrL9566eVWgXMEzWBQlMdRo,2195
|
|
58
|
+
arekit/common/docs/parsed/providers/entity_service.py,sha256=oaBfferpkDXfAFL17vpecSZUsV1Pjvq6lqgHDHsIEZY,6657
|
|
59
|
+
arekit/common/docs/parsed/providers/opinion_pairs.py,sha256=ibeFmvpMBBARtqQ3EKEocIOulgzavv0DeYxePGQK5-U,633
|
|
60
|
+
arekit/common/docs/parsed/providers/text_opinion_pairs.py,sha256=BC4uVgFxy3oZTkCq9VgOlqoqhODia2Z3anoGyGoy0ao,3139
|
|
61
|
+
arekit/common/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
|
+
arekit/common/entities/base.py,sha256=kpJFo4pCRVBQX6T8PibLKspp9UwoIrkHDoFMTM9KkUs,1646
|
|
63
|
+
arekit/common/entities/collection.py,sha256=ySSriMYP6zzdto1mC0V9VPXmkAqyJN3mmGoqoNValGI,1931
|
|
64
|
+
arekit/common/entities/str_fmt.py,sha256=gAPeS8RXdhh8Px_u5eOAPbtLREiiyMueid0lQoa4EbQ,250
|
|
65
|
+
arekit/common/entities/types.py,sha256=pxFB0gsevdsmnduN_Ffk7_P2TRiMt6NAHyrutuKOFvs,145
|
|
66
|
+
arekit/common/experiment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
|
+
arekit/common/experiment/data_type.py,sha256=DezUkfwLTf6XLYheqPiaWyx3ZwcldsJ8wDV8aNgJtDk,227
|
|
68
|
+
arekit/common/experiment/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
|
+
arekit/common/experiment/api/base_samples_io.py,sha256=SN8CnbEYaazE3SldvnENfjoNRHsTejtrg4jJfqfZLMs,516
|
|
70
|
+
arekit/common/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
|
+
arekit/common/frames/text_variant.py,sha256=TlWR4jnuF7HW9BMHhOTKkr768V_Ub0wd0E5A4YTwD0c,875
|
|
72
|
+
arekit/common/frames/connotations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
|
+
arekit/common/frames/connotations/descriptor.py,sha256=yow1Wo-Hf52rx2hiQlpeSkpP4WFFcFB25ewgXtwm588,408
|
|
74
|
+
arekit/common/frames/connotations/provider.py,sha256=Zm-NFL-aVKJM_NhvTWizIAiNENt6B1tegTrj0k2afoc,114
|
|
75
|
+
arekit/common/frames/variants/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
|
+
arekit/common/frames/variants/base.py,sha256=PhFxJZl-g9bGLfg1OlPKIUETAsTx4wwSPuBS5yOEPg8,489
|
|
77
|
+
arekit/common/frames/variants/collection.py,sha256=28_DRBny_iAWMdHpupdCnLvBp0FtF2tjz-uUctyrmhY,1935
|
|
78
|
+
arekit/common/labels/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
79
|
+
arekit/common/labels/base.py,sha256=m7EjvPcQPHtzZ0txVqNXIQPUzgNuaU2FmDyND7K4yTE,412
|
|
80
|
+
arekit/common/labels/str_fmt.py,sha256=ecDsP1-7NNHk_aEaBlPaNaNoA_aqy28QBOHoIxtEnDk,1707
|
|
81
|
+
arekit/common/labels/provider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
82
|
+
arekit/common/labels/provider/base.py,sha256=KIWvRwXGWNWYhrzEV8A0g9r0Yk7N2E0qQpf9-UpVnbw,151
|
|
83
|
+
arekit/common/labels/provider/constant.py,sha256=bU6DCm1iuk_W2fMkg-NxABMJqgS9DtwxnoHpD_vSnLc,462
|
|
84
|
+
arekit/common/labels/scaler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
85
|
+
arekit/common/labels/scaler/base.py,sha256=FTZ7eTPTVK9IXLcZaXbpidsTTqTjX0-l1Qt-N1bpqWg,2349
|
|
86
|
+
arekit/common/labels/scaler/sentiment.py,sha256=TbYdM9mdtFTQL_fgh9rS9TEc-7U4Fpskp8JvnvN8TAA,180
|
|
87
|
+
arekit/common/labels/scaler/single.py,sha256=tybF3-fO4CHd_QUFnDCEmTbfbljfJA9aZEv9MtpM5Ss,308
|
|
88
|
+
arekit/common/linkage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
89
|
+
arekit/common/linkage/base.py,sha256=toZmKjTr444eHhvNLMSNU23KXtfH5DtOGtS99qGNcOo,1014
|
|
90
|
+
arekit/common/linkage/meta.py,sha256=LFHHhAkzQzym8rha4uuXb0BKwIb61SVGtxnU4iF_Nuk,692
|
|
91
|
+
arekit/common/linkage/opinions.py,sha256=8OQscnh1-5JJL3KX_lCm_6ayGCezDuFnvidfuwkjClI,255
|
|
92
|
+
arekit/common/linkage/text_opinions.py,sha256=qR1-zGEYaVPSpNISnGGXnABpdP6Qx8tc1i5DsEyn9wo,571
|
|
93
|
+
arekit/common/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
94
|
+
arekit/common/model/labeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
|
+
arekit/common/model/labeling/base.py,sha256=uj7_igCWEU23OjnzabNy0LyxoZ6S_qSfCA-ZaoL1erA,727
|
|
96
|
+
arekit/common/model/labeling/modes.py,sha256=DiwC6Aomke-ojwwpR2pcd4qgQSwmRdGCvQlyHHhN3YY,127
|
|
97
|
+
arekit/common/model/labeling/single.py,sha256=Eggi0obocjiT9ofv_U0zLiFoEIeUQhaMCqjCWn14Fh8,773
|
|
98
|
+
arekit/common/opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
99
|
+
arekit/common/opinions/base.py,sha256=eIx1RzsngCkpnF2Utju5i_Qp7gqF_rDIe_UDeMGXtmo,2112
|
|
100
|
+
arekit/common/opinions/collection.py,sha256=bdx-CIYYdE-DrjyB1mRTGtkLb-lrGPTSLl25xv5EHnM,4938
|
|
101
|
+
arekit/common/opinions/enums.py,sha256=TE5AGN_xb0NdZ636UtHuYFRMNl24iwXzmyf8WUfvr6w,83
|
|
102
|
+
arekit/common/opinions/provider.py,sha256=q4hXRFDuGoo9fGOf_L9CM048YBtel1v3__ZqfSXL8Xc,168
|
|
103
|
+
arekit/common/opinions/writer.py,sha256=-IbWTIVlX2rhLpSP_8iuQ3_WyzzGwhto7ujfnNL6jhA,173
|
|
104
|
+
arekit/common/opinions/annot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
105
|
+
arekit/common/opinions/annot/algo_based.py,sha256=cvDGDmUoUaQ1Xcbyouxrjs0CkHRfRogW8Mfs5O5cOlc,2240
|
|
106
|
+
arekit/common/opinions/annot/base.py,sha256=IvwrwT8O3s6b2_R0arpMR4Uog7kuWQZUAyRP5cq_27A,382
|
|
107
|
+
arekit/common/opinions/annot/algo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
108
|
+
arekit/common/opinions/annot/algo/base.py,sha256=ymll-4-SplCY7CLswjOZEC1vsVHIEzUP0JMYgvL8hbo,124
|
|
109
|
+
arekit/common/opinions/annot/algo/pair_based.py,sha256=HbYn1mAsn5g11NiC9pfrMqNtJn_GzvqPFGpafMqqB2o,4419
|
|
110
|
+
arekit/common/opinions/annot/algo/predefined.py,sha256=zU39SADPKnykHCNB-Bmn_0bvd6gYWWYmfgfi-68hHSs,741
|
|
111
|
+
arekit/common/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
112
|
+
arekit/common/pipeline/base.py,sha256=8TgWNy5QrnKEp1bq3lhyGSgIfYe5ZIZU3c-DYBJ9LPA,957
|
|
113
|
+
arekit/common/pipeline/batching.py,sha256=DdOvOladOo2aEv3JZ8NQnCvsNGcWk4TFzENrZqTGyXk,1239
|
|
114
|
+
arekit/common/pipeline/context.py,sha256=Fw25lBVakHNAXjtkdEqopR-Jh59cDKGWD2jCJxBrj7Y,1126
|
|
115
|
+
arekit/common/pipeline/conts.py,sha256=NAQNsHt1kK3HnxWv3M6yXi0c7C6Mx6ZZ6KZc0yE0eas,70
|
|
116
|
+
arekit/common/pipeline/utils.py,sha256=5VqH1LtRa4tYUbyiRvWdBmP4biFhTKq9vhr8QiRFFkY,882
|
|
117
|
+
arekit/common/pipeline/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
|
+
arekit/common/pipeline/items/base.py,sha256=dWIZVGJjYuURLCiZj8YQHWtsS725SOi9SPZaCPV7NvI,1694
|
|
119
|
+
arekit/common/pipeline/items/flatten.py,sha256=9T4jWqPGv4UDxajlM0Nm0-gvwUgqqYB8XH0efTum9a0,542
|
|
120
|
+
arekit/common/pipeline/items/handle.py,sha256=QS5Byj7-o5jmFi0ag58NE3zm2-JzVIunIgc3Pn1ij6g,578
|
|
121
|
+
arekit/common/pipeline/items/iter.py,sha256=Tk9WdUMPOq20s7jEWEpU4PmillnVtQ8nIa2ct7iw-3s,406
|
|
122
|
+
arekit/common/pipeline/items/map.py,sha256=G5wBdjaaxePD0pijrxsfpJACeP7kzj7HerjCkNIhmII,381
|
|
123
|
+
arekit/common/pipeline/items/map_nested.py,sha256=vs0GdJNr3qSF9p2yd1nWji5E1HGzECbvOfN2MqoHc2A,630
|
|
124
|
+
arekit/common/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
125
|
+
arekit/common/service/sqlite.py,sha256=1jLIszkcJGeT0hUos8Y0Chp3o9XRUfljG2P9q0T2_Ds,1440
|
|
126
|
+
arekit/common/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
127
|
+
arekit/common/synonyms/base.py,sha256=YxD-CKCjlEtar1zTdumnfC3vKgbP2wLODR9mMEwbbnA,4237
|
|
128
|
+
arekit/common/synonyms/grouping.py,sha256=fi7QQbBvsTvvP2CPTesSPEsPNmGfc6euqj-HPhVvtlg,698
|
|
129
|
+
arekit/common/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
130
|
+
arekit/common/text/enums.py,sha256=nelEI7B-szLUtl8xds8Kw_vgK5JWg_Aj7IadEj2q_1Y,141
|
|
131
|
+
arekit/common/text/parsed.py,sha256=YxGRHtozDd3sDVI3hMT_hOO7Wmsy7_zLkblfnSXeJ9g,1104
|
|
132
|
+
arekit/common/text/partitioning.py,sha256=OL8r3-xaMafnT7FuPXDHINlA-BQgx6cLaMqm366WKCU,1153
|
|
133
|
+
arekit/common/text/stemmer.py,sha256=OJ5XelxLN-7m3uLPDU9C7CWdkXDeK-xieexQN6RYLXc,341
|
|
134
|
+
arekit/common/text_opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
135
|
+
arekit/common/text_opinions/base.py,sha256=KootNvGAbUVCV5uFgLjK-bm9bbQSIvZUz0q9CBToGa8,3447
|
|
136
|
+
arekit/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
137
|
+
arekit/contrib/bert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
138
|
+
arekit/contrib/bert/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
139
|
+
arekit/contrib/bert/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
140
|
+
arekit/contrib/bert/input/providers/cropped_sample.py,sha256=46uHHhAe8cGxV2JlfO3thog5XV6T2niUIflFghfUSBM,866
|
|
141
|
+
arekit/contrib/bert/input/providers/text_pair.py,sha256=_1d-he0n42y3ksj8RjJlNHgHnaQUEq0aQhUdTPRMKgg,2817
|
|
142
|
+
arekit/contrib/bert/terms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
143
|
+
arekit/contrib/bert/terms/mapper.py,sha256=oHX-lsaZYjBFLjngzSKT5z_JPJCHbclUsEe4i4fup_8,992
|
|
144
|
+
arekit/contrib/networks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
145
|
+
arekit/contrib/networks/embedding.py,sha256=lrLdB6CdmnmzwavAL6MZuLHceNM3PsZZiWLZ4BjGeXc,3845
|
|
146
|
+
arekit/contrib/networks/embedding_io.py,sha256=hV1MBr9wu9-10gQgnAzLuC-l897aB-8KNcw4h69B5VM,460
|
|
147
|
+
arekit/contrib/networks/vectorizer.py,sha256=KKV_f0GZD10ZpeYgqZfvMapJtsKa3NBddR6W_GdYqrM,155
|
|
148
|
+
arekit/contrib/networks/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
149
|
+
arekit/contrib/networks/input/const.py,sha256=nPeuO-G6MILNlIkGc5HzSDj_RmTwLflReF7n5htFAUI,176
|
|
150
|
+
arekit/contrib/networks/input/ctx_serialization.py,sha256=eCOw4xjp8A7Z2WFanshooS3MqSy7dbZ8ywf_DA2LZO8,982
|
|
151
|
+
arekit/contrib/networks/input/rows_parser.py,sha256=6_43LbAelveY9yEWMU5BdvQlpWwm4RDOjUEmqHuPYdE,1807
|
|
152
|
+
arekit/contrib/networks/input/term_types.py,sha256=P8E5LKegZE5ZEh4vNtC55Lu8USbQt8_Eo14op_anmvU,348
|
|
153
|
+
arekit/contrib/networks/input/terms_mapping.py,sha256=NAnuTAbj7tBTe1Ga4js2IfnUdAWlTV9fcgSQEgYqQUQ,2129
|
|
154
|
+
arekit/contrib/networks/input/embedding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
155
|
+
arekit/contrib/networks/input/embedding/matrix.py,sha256=BFn7eXhiqXY7c4tUfy1fzemIqRnZYx_GiEv873QnIEs,952
|
|
156
|
+
arekit/contrib/networks/input/embedding/offsets.py,sha256=HrBfbFD03o_Y0ZvEGTd-FRxmPx55_5vqItTranMFy88,1313
|
|
157
|
+
arekit/contrib/networks/input/formatters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
158
|
+
arekit/contrib/networks/input/formatters/pos_mapper.py,sha256=yftPKYU7noVb_q0KAflHf7bqjuUXt5siIgbnwMEoWrw,773
|
|
159
|
+
arekit/contrib/networks/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
160
|
+
arekit/contrib/networks/input/providers/sample.py,sha256=MHtXhhBD-kM0yzTACTbY14KMPIuhiLgUKEYXfhvumfo,5445
|
|
161
|
+
arekit/contrib/networks/input/providers/term_connotation.py,sha256=Q90pVN4hQgYAk3oBSCPYc6_1xQUQE1b6ksiU_k8frcM,1157
|
|
162
|
+
arekit/contrib/networks/input/providers/text.py,sha256=kucezKm6Ilmy5wuM2jUP5xk9zh1K1Pf8KcMd1prrp8k,917
|
|
163
|
+
arekit/contrib/prompt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
164
|
+
arekit/contrib/prompt/sample.py,sha256=MxpbDR0ww7WmdtuPu74B8R6QKVXeuzO0CKGOJIYwbRk,3164
|
|
165
|
+
arekit/contrib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
166
|
+
arekit/contrib/utils/serializer.py,sha256=D9LJ2ZXeVx3YntV-HqEnt32xW-s4GauwD97XRVlqr0g,1626
|
|
167
|
+
arekit/contrib/utils/bert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
168
|
+
arekit/contrib/utils/bert/samplers.py,sha256=ZVe3rbUAH0Jw1xR_yHE1DoUJf3CI0pDgbBQQzlLWevc,989
|
|
169
|
+
arekit/contrib/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
170
|
+
arekit/contrib/utils/data/contents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
171
|
+
arekit/contrib/utils/data/contents/opinions.py,sha256=MSV7NytEe15adKhhHCq5KiCj6ZBq31nV-u2rcSfFCgE,1738
|
|
172
|
+
arekit/contrib/utils/data/doc_provider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
173
|
+
arekit/contrib/utils/data/doc_provider/dict_based.py,sha256=zUOiiIbj5zby4xqMb0m9N-a6enavJJ7wFmPaGErykWU,371
|
|
174
|
+
arekit/contrib/utils/data/doc_provider/dir_based.py,sha256=FTw3kLV_CYtPoUoHl39IrP6RjLvTecCno9May95jVXw,1916
|
|
175
|
+
arekit/contrib/utils/data/readers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
176
|
+
arekit/contrib/utils/data/readers/base.py,sha256=zAsZLX5ng0_gb_ysL6wQchptmBHlNgqgQilw295Y5Aw,153
|
|
177
|
+
arekit/contrib/utils/data/readers/csv_pd.py,sha256=Ym49j04Z-_WQN-7xJMiiN1y2TIMnMDtPxy5h0mT3WBQ,1383
|
|
178
|
+
arekit/contrib/utils/data/readers/jsonl.py,sha256=c2bHwnTfNEwb1c8B9fRwaQyeze5x3nOd2UXXAp4MbxQ,426
|
|
179
|
+
arekit/contrib/utils/data/readers/sqlite.py,sha256=U1138XNCIwqycNivxwzwIUnowj3jDkP4M6J_Kvyedbc,416
|
|
180
|
+
arekit/contrib/utils/data/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
181
|
+
arekit/contrib/utils/data/service/balance.py,sha256=PgA5B6qSPmt8ITPLsQuCkniE8-u2NO_eQ2m-U9Akh98,1547
|
|
182
|
+
arekit/contrib/utils/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
183
|
+
arekit/contrib/utils/data/storages/jsonl_based.py,sha256=Oj5u8aW_UtVDSDxMpIQsgMlZlV-KBD0qVHPVVT3m8nA,450
|
|
184
|
+
arekit/contrib/utils/data/storages/pandas_based.py,sha256=m8z34tO_7NupYd_zQ4L1miTXJQkmMMB90zPFqEeYCNs,4301
|
|
185
|
+
arekit/contrib/utils/data/storages/row_cache.py,sha256=V1InYIqRf5WMWV_JndHNH9JzAjFS3ZL38f4_pDPLo_8,1985
|
|
186
|
+
arekit/contrib/utils/data/storages/sqlite_based.py,sha256=ARwVisVbPKBap_mVdpvTpp28iXgJbCJ3dAj41UYu03Q,609
|
|
187
|
+
arekit/contrib/utils/data/writers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
188
|
+
arekit/contrib/utils/data/writers/base.py,sha256=JLwf5WVl_U319sdMev8YOn4OoCcrgNIUZtrOuG1JLjI,766
|
|
189
|
+
arekit/contrib/utils/data/writers/csv_native.py,sha256=7fPxYeu9YDK8Cvjp1n-sbKT63ZuhDIEv3VwghHuKk5k,2252
|
|
190
|
+
arekit/contrib/utils/data/writers/csv_pd.py,sha256=WhBjDJCHUBy_TabngMF42Qicx0ye8xIus0m6c7qotto,1330
|
|
191
|
+
arekit/contrib/utils/data/writers/json_opennre.py,sha256=EkhXmONgtMe7A9VKrs9ElFHc8RoMumjFbkKfwuOVOoU,5067
|
|
192
|
+
arekit/contrib/utils/data/writers/sqlite_native.py,sha256=MnbLU8iPvYvpYgEbOXhBKH_G8DJs0W9iSuhr_TPKBAQ,4601
|
|
193
|
+
arekit/contrib/utils/embeddings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
194
|
+
arekit/contrib/utils/embeddings/rusvectores.py,sha256=WA0HejE2U5kgeBvh4_vty2QzoAkFXiMk94BK8FHxoxw,1931
|
|
195
|
+
arekit/contrib/utils/embeddings/tokens.py,sha256=z3lJ30JTX9zvZtPgzRl3yANECmuA1qboMDTcJsr_4E4,872
|
|
196
|
+
arekit/contrib/utils/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
197
|
+
arekit/contrib/utils/entities/filter.py,sha256=aHTExIMFaMdy4QL8iYE23eiby3qLImAakXR6gNqG6fs,145
|
|
198
|
+
arekit/contrib/utils/entities/formatters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
199
|
+
arekit/contrib/utils/entities/formatters/str_display.py,sha256=N8igv7EVaTFayvLXkyBGtm67KwHaeP-M-L8d7oqBG9Q,401
|
|
200
|
+
arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py,sha256=rEUIma9O3kOBWIguGtJ69JH-00Dhm0vUBOd5yNcKweY,653
|
|
201
|
+
arekit/contrib/utils/io_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
202
|
+
arekit/contrib/utils/io_utils/embedding.py,sha256=cBDRv_1LROJ262QaL3QVfGt2W9EvBfbh83oL41PJn60,2543
|
|
203
|
+
arekit/contrib/utils/io_utils/utils.py,sha256=310SIJTsNLn2OZrGPer9W4ZP52PHkjBK3zsyqxVs3h0,537
|
|
204
|
+
arekit/contrib/utils/np_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
205
|
+
arekit/contrib/utils/np_utils/embedding.py,sha256=G7Ls_ClzbskLLy-opRcVzQlfUfhdwbqoXgk0zoGrmHM,798
|
|
206
|
+
arekit/contrib/utils/np_utils/npz_utils.py,sha256=XoUHNmOlcr2X674R1xKGUJitEpFCIBJ8DOpNEPhtJFk,234
|
|
207
|
+
arekit/contrib/utils/np_utils/vocab.py,sha256=FsS18chMLU4WfMeGwBbvmfB5Qmoj5tZTOo-4zqWPm3Q,580
|
|
208
|
+
arekit/contrib/utils/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
209
|
+
arekit/contrib/utils/pipelines/opinion_collections.py,sha256=y9-klVJGCN9mPd7t1ECllAiCnAb3MKVXC1PnYddp5sQ,3195
|
|
210
|
+
arekit/contrib/utils/pipelines/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
211
|
+
arekit/contrib/utils/pipelines/items/sampling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
212
|
+
arekit/contrib/utils/pipelines/items/sampling/base.py,sha256=-H-r5GIi9ee7CxxpJs8KnHC91l7Y1dYaWPR_OK17E8g,4245
|
|
213
|
+
arekit/contrib/utils/pipelines/items/sampling/networks.py,sha256=E0EjQ4KRd3oYLFVbie05XJa00JqR26eLRoMrDnuQySQ,2653
|
|
214
|
+
arekit/contrib/utils/pipelines/items/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
215
|
+
arekit/contrib/utils/pipelines/items/text/entities_default.py,sha256=vNx5ir2mf7a1gg_OeqUsf_p1Fu2k7QIFxVpe-CuwZ84,727
|
|
216
|
+
arekit/contrib/utils/pipelines/items/text/frames.py,sha256=pZQybYfgEQB1DM3PtmsgrtB2Xl0HejmP4rhT0nR_YKE,2586
|
|
217
|
+
arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py,sha256=4rIAAB-_GeWNbu5KyaDm5qttH4o2Bzpdvy-D9YR5bRk,1776
|
|
218
|
+
arekit/contrib/utils/pipelines/items/text/frames_negation.py,sha256=AdoY7lqSAT0RApp0DbqeI7xxyRVF6NPJLAfR59lsIec,1303
|
|
219
|
+
arekit/contrib/utils/pipelines/items/text/tokenizer.py,sha256=FmV5flziDLCNttxrUzRr-FGCcKK6venZEcZ-KwcqwNE,3147
|
|
220
|
+
arekit/contrib/utils/pipelines/items/text/translator.py,sha256=TkXVyZYRbS8P4S2Pnn2GzQMRa-9ba-nS4_zXvsf16vU,5365
|
|
221
|
+
arekit/contrib/utils/pipelines/text_opinion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
222
|
+
arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=QoK0-dfMl27uOOfUhvnbvzYX23jCpZbm97Qs27Na7VA,4133
|
|
223
|
+
arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
224
|
+
arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py,sha256=bwS-UR2x3rgp_xqnf6z-73T-eIZE_kltRSGYxgd_WpU,1751
|
|
225
|
+
arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
226
|
+
arekit/contrib/utils/pipelines/text_opinion/filters/base.py,sha256=GnKnJB4MKqiMSJny3a9Na7l7Csm7abbt6GADBCY18Mw,143
|
|
227
|
+
arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py,sha256=3Pjq4IJJMT7dYpK266lN66WQJUnQO3P0rG6wcAvJOOA,649
|
|
228
|
+
arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py,sha256=pdWFJaKh4kKIsUuBNp3WNy5Rj80CjWEy2wp-0axFnrI,1254
|
|
229
|
+
arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py,sha256=4AFS5zhocJuYphGO2ZMWmYTtIhGItKDTkB0--AmjgnA,1151
|
|
230
|
+
arekit/contrib/utils/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
231
|
+
arekit/contrib/utils/processing/languages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
232
|
+
arekit/contrib/utils/processing/languages/mods.py,sha256=OERKcglI4pJEIQxlWMYuYg_uHnNWVpP-mqhnFsQbY7A,263
|
|
233
|
+
arekit/contrib/utils/processing/languages/pos.py,sha256=etC3ueLGgZorgKEc3TWpeIuv46vs392xPi1lM31Cg0s,278
|
|
234
|
+
arekit/contrib/utils/processing/languages/ru/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
235
|
+
arekit/contrib/utils/processing/languages/ru/cases.py,sha256=27sIQsU5_0aT4EVuPtKCK-tfi1Q0TH11phV1x5hIzLs,1492
|
|
236
|
+
arekit/contrib/utils/processing/languages/ru/constants.py,sha256=f4z7ivILKqYju9rkagi9_FIvPm1FnWHbXgxigyb3zm4,147
|
|
237
|
+
arekit/contrib/utils/processing/languages/ru/mods.py,sha256=j4xKgRbCC834i9n-RyU607v9Qph9sP_B31WLrKFByRk,343
|
|
238
|
+
arekit/contrib/utils/processing/languages/ru/number.py,sha256=kHyP0Lp_iHVDwkbN7tkZUJpGFQ40QRm-j_1g0dFU-sM,401
|
|
239
|
+
arekit/contrib/utils/processing/languages/ru/pos_service.py,sha256=BWHLPybjmTVNXjJM2QmrZlEDcl7nZY7keLmXZcG_PFM,1125
|
|
240
|
+
arekit/contrib/utils/processing/lemmatization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
241
|
+
arekit/contrib/utils/processing/lemmatization/mystem.py,sha256=_FRqEGWUlgAbhSJ-dsyoFg_qbbUxePDSAOWWuveRqCo,1340
|
|
242
|
+
arekit/contrib/utils/processing/pos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
243
|
+
arekit/contrib/utils/processing/pos/base.py,sha256=CrMr3u6lRs2NoV7uch5HZgV71A-0M-pwJfwXjfudHBY,259
|
|
244
|
+
arekit/contrib/utils/processing/pos/mystem_wrap.py,sha256=C9AnRIAZL4e8DMNte9LDuvxS-cbEQpo2AYdQtP9uIJ4,4336
|
|
245
|
+
arekit/contrib/utils/processing/pos/russian.py,sha256=POCo6xKmK7vAEq-kWlODg611kLOtOj37OVc3L_GWL-8,229
|
|
246
|
+
arekit/contrib/utils/processing/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
247
|
+
arekit/contrib/utils/processing/text/tokens.py,sha256=_3u5Oy1MG_QfHH8wi0x0nA588qSaCp3Wmnp2SzMWjXY,3573
|
|
248
|
+
arekit/contrib/utils/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
249
|
+
arekit/contrib/utils/synonyms/simple.py,sha256=ST9EwuWP88FzbyV8Gi0-biTPgGOsZ7OWyaBWHL_U_eo,557
|
|
250
|
+
arekit/contrib/utils/synonyms/stemmer_based.py,sha256=q19P_XOCWN2_JrBtybAt7ToMIr1ambw4ahr0fSEEHmQ,1400
|
|
251
|
+
arekit/contrib/utils/vectorizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
252
|
+
arekit/contrib/utils/vectorizers/bpe.py,sha256=bFS5MZytvU1L21YS5aAeb3FZl7RMjyog4lWwysvKD-8,3047
|
|
253
|
+
arekit/contrib/utils/vectorizers/random_norm.py,sha256=TL86Kz6p59lJqoLg8RwQRTvfhr0e-tiULGHhO4vhBbo,1339
|
|
254
|
+
arekit-0.25.0.data/data/logo.png,sha256=S8OZ4MGGD72Pf5co7ngYbXKkJH1EUhbErUXv1ZjUWiU,45718
|
|
255
|
+
arekit-0.25.0.dist-info/LICENSE,sha256=JO9tIbxAvhwDv73cX-gUStr9yA-TY7wusUeLHRx7JuY,1076
|
|
256
|
+
arekit-0.25.0.dist-info/METADATA,sha256=4DSUy6aTidHG9jFR7jMwQe3uJGER-e8E9vU0q2G20Uo,3145
|
|
257
|
+
arekit-0.25.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
258
|
+
arekit-0.25.0.dist-info/top_level.txt,sha256=4pXuFE8IE0lBsqi6ZsR7figx0H939VIX4_-76YIbkOQ,7
|
|
259
|
+
arekit-0.25.0.dist-info/RECORD,,
|