arekit 0.25.0__py3-none-any.whl → 0.25.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arekit/common/context/terms_mapper.py +5 -2
- arekit/common/data/input/providers/rows/samples.py +8 -12
- arekit/common/data/input/providers/sample/cropped.py +4 -3
- arekit/common/data/input/terms_mapper.py +4 -8
- arekit/common/data/storages/base.py +4 -18
- arekit/common/docs/entities_grouping.py +5 -3
- arekit/common/docs/parsed/base.py +3 -3
- arekit/common/docs/parsed/providers/base.py +3 -5
- arekit/common/docs/parsed/providers/entity_service.py +7 -28
- arekit/common/docs/parsed/providers/opinion_pairs.py +6 -6
- arekit/common/docs/parsed/providers/text_opinion_pairs.py +4 -4
- arekit/common/docs/parsed/service.py +2 -2
- arekit/common/docs/parser.py +3 -30
- arekit/common/model/labeling/single.py +7 -3
- arekit/common/opinions/annot/algo/pair_based.py +9 -5
- arekit/common/pipeline/base.py +0 -2
- arekit/common/pipeline/batching.py +0 -3
- arekit/common/pipeline/items/base.py +1 -1
- arekit/common/utils.py +11 -8
- arekit/contrib/bert/input/providers/cropped_sample.py +2 -5
- arekit/contrib/bert/terms/mapper.py +2 -2
- arekit/contrib/prompt/sample.py +2 -6
- arekit/contrib/utils/bert/samplers.py +4 -2
- arekit/contrib/utils/data/storages/jsonl_based.py +2 -1
- arekit/contrib/utils/data/storages/row_cache.py +2 -1
- arekit/contrib/utils/data/storages/sqlite_based.py +2 -1
- arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +8 -5
- arekit/contrib/utils/pipelines/text_opinion/extraction.py +16 -8
- {arekit-0.25.0.dist-info → arekit-0.25.2.dist-info}/METADATA +10 -8
- {arekit-0.25.0.dist-info → arekit-0.25.2.dist-info}/RECORD +34 -115
- {arekit-0.25.0.dist-info → arekit-0.25.2.dist-info}/WHEEL +1 -1
- arekit/common/data/input/repositories/__init__.py +0 -0
- arekit/common/data/input/repositories/base.py +0 -68
- arekit/common/data/input/repositories/sample.py +0 -22
- arekit/common/data/views/__init__.py +0 -0
- arekit/common/data/views/samples.py +0 -26
- arekit/common/experiment/__init__.py +0 -0
- arekit/common/experiment/api/__init__.py +0 -0
- arekit/common/experiment/api/base_samples_io.py +0 -20
- arekit/common/experiment/data_type.py +0 -17
- arekit/common/service/__init__.py +0 -0
- arekit/common/service/sqlite.py +0 -36
- arekit/contrib/networks/__init__.py +0 -0
- arekit/contrib/networks/embedding.py +0 -149
- arekit/contrib/networks/embedding_io.py +0 -18
- arekit/contrib/networks/input/__init__.py +0 -0
- arekit/contrib/networks/input/const.py +0 -6
- arekit/contrib/networks/input/ctx_serialization.py +0 -28
- arekit/contrib/networks/input/embedding/__init__.py +0 -0
- arekit/contrib/networks/input/embedding/matrix.py +0 -29
- arekit/contrib/networks/input/embedding/offsets.py +0 -55
- arekit/contrib/networks/input/formatters/__init__.py +0 -0
- arekit/contrib/networks/input/formatters/pos_mapper.py +0 -22
- arekit/contrib/networks/input/providers/__init__.py +0 -0
- arekit/contrib/networks/input/providers/sample.py +0 -129
- arekit/contrib/networks/input/providers/term_connotation.py +0 -23
- arekit/contrib/networks/input/providers/text.py +0 -24
- arekit/contrib/networks/input/rows_parser.py +0 -47
- arekit/contrib/networks/input/term_types.py +0 -13
- arekit/contrib/networks/input/terms_mapping.py +0 -60
- arekit/contrib/networks/vectorizer.py +0 -6
- arekit/contrib/utils/data/readers/__init__.py +0 -0
- arekit/contrib/utils/data/readers/base.py +0 -7
- arekit/contrib/utils/data/readers/csv_pd.py +0 -38
- arekit/contrib/utils/data/readers/jsonl.py +0 -15
- arekit/contrib/utils/data/readers/sqlite.py +0 -14
- arekit/contrib/utils/data/service/__init__.py +0 -0
- arekit/contrib/utils/data/service/balance.py +0 -50
- arekit/contrib/utils/data/storages/pandas_based.py +0 -123
- arekit/contrib/utils/data/writers/csv_native.py +0 -63
- arekit/contrib/utils/data/writers/csv_pd.py +0 -40
- arekit/contrib/utils/data/writers/json_opennre.py +0 -132
- arekit/contrib/utils/data/writers/sqlite_native.py +0 -114
- arekit/contrib/utils/embeddings/__init__.py +0 -0
- arekit/contrib/utils/embeddings/rusvectores.py +0 -58
- arekit/contrib/utils/embeddings/tokens.py +0 -30
- arekit/contrib/utils/entities/formatters/str_display.py +0 -11
- arekit/contrib/utils/io_utils/embedding.py +0 -72
- arekit/contrib/utils/np_utils/__init__.py +0 -0
- arekit/contrib/utils/np_utils/embedding.py +0 -22
- arekit/contrib/utils/np_utils/npz_utils.py +0 -13
- arekit/contrib/utils/np_utils/vocab.py +0 -20
- arekit/contrib/utils/pipelines/items/sampling/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/sampling/base.py +0 -94
- arekit/contrib/utils/pipelines/items/sampling/networks.py +0 -55
- arekit/contrib/utils/pipelines/items/text/entities_default.py +0 -23
- arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +0 -36
- arekit/contrib/utils/pipelines/items/text/frames_negation.py +0 -33
- arekit/contrib/utils/pipelines/items/text/tokenizer.py +0 -105
- arekit/contrib/utils/pipelines/items/text/translator.py +0 -136
- arekit/contrib/utils/processing/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/mods.py +0 -12
- arekit/contrib/utils/processing/languages/pos.py +0 -23
- arekit/contrib/utils/processing/languages/ru/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/ru/cases.py +0 -78
- arekit/contrib/utils/processing/languages/ru/constants.py +0 -6
- arekit/contrib/utils/processing/languages/ru/mods.py +0 -13
- arekit/contrib/utils/processing/languages/ru/number.py +0 -23
- arekit/contrib/utils/processing/languages/ru/pos_service.py +0 -36
- arekit/contrib/utils/processing/lemmatization/__init__.py +0 -0
- arekit/contrib/utils/processing/lemmatization/mystem.py +0 -51
- arekit/contrib/utils/processing/pos/__init__.py +0 -0
- arekit/contrib/utils/processing/pos/base.py +0 -12
- arekit/contrib/utils/processing/pos/mystem_wrap.py +0 -134
- arekit/contrib/utils/processing/pos/russian.py +0 -10
- arekit/contrib/utils/processing/text/__init__.py +0 -0
- arekit/contrib/utils/processing/text/tokens.py +0 -127
- arekit/contrib/utils/serializer.py +0 -42
- arekit/contrib/utils/vectorizers/__init__.py +0 -0
- arekit/contrib/utils/vectorizers/bpe.py +0 -93
- arekit/contrib/utils/vectorizers/random_norm.py +0 -39
- {arekit-0.25.0.data → arekit-0.25.2.data}/data/logo.png +0 -0
- {arekit-0.25.0.dist-info → arekit-0.25.2.dist-info}/LICENSE +0 -0
- {arekit-0.25.0.dist-info → arekit-0.25.2.dist-info}/top_level.txt +0 -0
|
@@ -5,13 +5,10 @@ from arekit.contrib.bert.input.providers.text_pair import PairTextProvider
|
|
|
5
5
|
|
|
6
6
|
class CroppedBertSampleRowProvider(CroppedSampleRowProvider):
|
|
7
7
|
|
|
8
|
-
def __init__(self,
|
|
8
|
+
def __init__(self, text_b_template, text_terms_mapper, **kwargs):
|
|
9
9
|
|
|
10
10
|
text_provider = BaseSingleTextProvider(text_terms_mapper=text_terms_mapper) \
|
|
11
11
|
if text_b_template is None else PairTextProvider(text_b_prompt=text_b_template,
|
|
12
12
|
text_terms_mapper=text_terms_mapper)
|
|
13
13
|
|
|
14
|
-
super(CroppedBertSampleRowProvider, self).__init__(
|
|
15
|
-
crop_window_size=crop_window_size,
|
|
16
|
-
label_scaler=label_scaler,
|
|
17
|
-
text_provider=text_provider)
|
|
14
|
+
super(CroppedBertSampleRowProvider, self).__init__(text_provider=text_provider, **kwargs)
|
|
@@ -7,11 +7,11 @@ class BertDefaultStringTextTermsMapper(OpinionContainingTextTermsMapper):
|
|
|
7
7
|
a base class assumes to provide an orginal frame variant value.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
def __init__(self,
|
|
10
|
+
def __init__(self, word_separator=' ', **kwargs):
|
|
11
11
|
""" See https://github.com/nicolay-r/AREkit/issues/377
|
|
12
12
|
for a greater details.
|
|
13
13
|
"""
|
|
14
|
-
super(BertDefaultStringTextTermsMapper, self).__init__(
|
|
14
|
+
super(BertDefaultStringTextTermsMapper, self).__init__(**kwargs)
|
|
15
15
|
self.__word_separator = word_separator
|
|
16
16
|
|
|
17
17
|
def map_entity(self, e_ind, entity):
|
arekit/contrib/prompt/sample.py
CHANGED
|
@@ -8,7 +8,7 @@ class PromptedSampleRowProvider(CroppedSampleRowProvider):
|
|
|
8
8
|
""" Sample, enriched with the prompt technique.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
-
def __init__(self,
|
|
11
|
+
def __init__(self, prompt, label_fmt=None, **kwargs):
|
|
12
12
|
""" crop_window_size: int
|
|
13
13
|
crop window size for the original text.
|
|
14
14
|
prompt: str
|
|
@@ -17,12 +17,8 @@ class PromptedSampleRowProvider(CroppedSampleRowProvider):
|
|
|
17
17
|
text, s_ind, t_ind, s_val, t_val, label_uint
|
|
18
18
|
"""
|
|
19
19
|
assert(isinstance(prompt, str))
|
|
20
|
-
assert(isinstance(text_provider, BaseSingleTextProvider))
|
|
21
20
|
assert(isinstance(label_fmt, StringLabelsFormatter) or label_fmt is None)
|
|
22
|
-
|
|
23
|
-
super(PromptedSampleRowProvider, self).__init__(crop_window_size=crop_window_size,
|
|
24
|
-
label_scaler=label_scaler,
|
|
25
|
-
text_provider=text_provider)
|
|
21
|
+
super(PromptedSampleRowProvider, self).__init__(**kwargs)
|
|
26
22
|
|
|
27
23
|
self.__prompt = prompt
|
|
28
24
|
self.__labels_fmt = label_fmt
|
|
@@ -5,7 +5,7 @@ from arekit.common.data.input.terms_mapper import OpinionContainingTextTermsMapp
|
|
|
5
5
|
from arekit.contrib.bert.input.providers.text_pair import PairTextProvider
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
def create_sample_provider(label_scaler, text_terms_mapper, text_b_prompt=None):
|
|
8
|
+
def create_sample_provider(is_entity_func, label_scaler, text_terms_mapper, text_b_prompt=None):
|
|
9
9
|
assert(isinstance(text_terms_mapper, OpinionContainingTextTermsMapper))
|
|
10
10
|
|
|
11
11
|
text_provider = BaseSingleTextProvider(text_terms_mapper=text_terms_mapper) \
|
|
@@ -14,4 +14,6 @@ def create_sample_provider(label_scaler, text_terms_mapper, text_b_prompt=None):
|
|
|
14
14
|
|
|
15
15
|
label_provider = MultipleLabelProvider(label_scaler=label_scaler)
|
|
16
16
|
|
|
17
|
-
return BaseSampleRowProvider(text_provider=text_provider,
|
|
17
|
+
return BaseSampleRowProvider(text_provider=text_provider,
|
|
18
|
+
label_provider=label_provider,
|
|
19
|
+
is_entity_func=is_entity_func)
|
|
@@ -5,8 +5,9 @@ from arekit.common.data.storages.base import BaseRowsStorage
|
|
|
5
5
|
|
|
6
6
|
class JsonlBasedRowsStorage(BaseRowsStorage):
|
|
7
7
|
|
|
8
|
-
def __init__(self, rows):
|
|
8
|
+
def __init__(self, rows, **kwargs):
|
|
9
9
|
assert(isinstance(rows, list))
|
|
10
|
+
super(JsonlBasedRowsStorage, self).__init__(**kwargs)
|
|
10
11
|
self.__rows = rows
|
|
11
12
|
|
|
12
13
|
def _iter_rows(self):
|
|
@@ -6,13 +6,14 @@ class RowCacheStorage(BaseRowsStorage):
|
|
|
6
6
|
""" Row Caching storage kernel, based on python dictionary.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
def __init__(self, force_collect_columns=None):
|
|
9
|
+
def __init__(self, force_collect_columns=None, **kwargs):
|
|
10
10
|
""" This is a particular/related solution for the following issue:
|
|
11
11
|
https://github.com/nicolay-r/AREkit/issues/464
|
|
12
12
|
force_collect_columns: list
|
|
13
13
|
columns that supposed to be additionally considered in output.
|
|
14
14
|
"""
|
|
15
15
|
assert(isinstance(force_collect_columns, list) or force_collect_columns is None)
|
|
16
|
+
super(RowCacheStorage, self).__init__(**kwargs)
|
|
16
17
|
self.__f = None
|
|
17
18
|
self.__row_cache = {}
|
|
18
19
|
self.__column_names = []
|
|
@@ -4,7 +4,8 @@ from arekit.common.data.storages.base import BaseRowsStorage
|
|
|
4
4
|
|
|
5
5
|
class SQliteBasedRowsStorage(BaseRowsStorage):
|
|
6
6
|
|
|
7
|
-
def __init__(self, path, table_name):
|
|
7
|
+
def __init__(self, path, table_name, **kwargs):
|
|
8
|
+
super(SQliteBasedRowsStorage, self).__init__(**kwargs)
|
|
8
9
|
self.__path = path
|
|
9
10
|
self.__table_name = table_name
|
|
10
11
|
self.__conn = None
|
|
@@ -9,7 +9,7 @@ class AlgorithmBasedTextOpinionAnnotator(AlgorithmBasedOpinionAnnotator):
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
def __init__(self, value_to_group_id_func, annot_algo, create_empty_collection_func,
|
|
12
|
-
get_doc_existed_opinions_func=None):
|
|
12
|
+
is_entity_func, get_doc_existed_opinions_func=None):
|
|
13
13
|
""" get_doc_existed_opinions_func: func or None
|
|
14
14
|
function that provides existed opinions for a document;
|
|
15
15
|
if None, then we consider an absence of the existed document-level opinions.
|
|
@@ -20,14 +20,17 @@ class AlgorithmBasedTextOpinionAnnotator(AlgorithmBasedOpinionAnnotator):
|
|
|
20
20
|
create_empty_collection_func=create_empty_collection_func,
|
|
21
21
|
get_doc_existed_opinions_func=get_doc_existed_opinions_func)
|
|
22
22
|
self.__value_to_group_id_func = value_to_group_id_func
|
|
23
|
+
self.__is_entity_func = is_entity_func
|
|
23
24
|
|
|
24
25
|
def __create_service(self, parsed_doc):
|
|
25
|
-
return ParsedDocumentService(
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
return ParsedDocumentService(
|
|
27
|
+
parsed_doc=parsed_doc,
|
|
28
|
+
providers=[TextOpinionPairsProvider(self.__value_to_group_id_func, entity_index_func=None)],
|
|
29
|
+
is_entity_func=self.__is_entity_func
|
|
30
|
+
)
|
|
28
31
|
|
|
29
32
|
def annotate_collection(self, parsed_doc):
|
|
30
|
-
service = self.__create_service(parsed_doc)
|
|
33
|
+
service = self.__create_service(parsed_doc=parsed_doc)
|
|
31
34
|
topp = service.get_provider(TextOpinionPairsProvider.NAME)
|
|
32
35
|
for opinion in super(AlgorithmBasedTextOpinionAnnotator, self).annotate_collection(parsed_doc):
|
|
33
36
|
for text_opinion in topp.iter_from_opinion(opinion):
|
|
@@ -12,10 +12,11 @@ from arekit.contrib.utils.pipelines.text_opinion.filters.base import TextOpinion
|
|
|
12
12
|
from arekit.contrib.utils.pipelines.text_opinion.filters.limitation import FrameworkLimitationsTextOpinionFilter
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def __iter_text_opinion_linkages(parsed_doc, annotators,
|
|
15
|
+
def __iter_text_opinion_linkages(parsed_doc, annotators,
|
|
16
|
+
is_entity_func, entity_index_func,
|
|
16
17
|
text_opinion_filters, use_meta):
|
|
17
18
|
""" use_meta: bool
|
|
18
|
-
this is mainly for
|
|
19
|
+
this is mainly for the progress-bar and other console parameters to stay up-to-date
|
|
19
20
|
with the state in the case we do not have that much output results
|
|
20
21
|
across multiple amount of documents.
|
|
21
22
|
"""
|
|
@@ -27,7 +28,9 @@ def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
|
|
|
27
28
|
def __to_id(text_opinion):
|
|
28
29
|
return "{}_{}".format(text_opinion.SourceId, text_opinion.TargetId)
|
|
29
30
|
|
|
30
|
-
service = ParsedDocumentService(parsed_doc=parsed_doc,
|
|
31
|
+
service = ParsedDocumentService(parsed_doc=parsed_doc,
|
|
32
|
+
providers=[EntityServiceProvider(entity_index_func=entity_index_func)],
|
|
33
|
+
is_entity_func=is_entity_func)
|
|
31
34
|
esp = service.get_provider(EntityServiceProvider.NAME)
|
|
32
35
|
|
|
33
36
|
predefined = set()
|
|
@@ -62,12 +65,16 @@ def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
|
|
|
62
65
|
yield MetaEmptyLinkedDataWrapper(doc_id=parsed_doc.RelatedDocID)
|
|
63
66
|
|
|
64
67
|
|
|
65
|
-
def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotators,
|
|
68
|
+
def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotators,
|
|
69
|
+
is_entity_func, entity_index_func, batch_size,
|
|
66
70
|
text_opinion_filters=None, use_meta_between_docs=True):
|
|
67
71
|
assert(callable(get_doc_by_id_func))
|
|
72
|
+
assert(callable(is_entity_func))
|
|
73
|
+
assert(callable(entity_index_func))
|
|
68
74
|
assert(isinstance(annotators, list))
|
|
69
75
|
assert(isinstance(text_opinion_filters, list) or text_opinion_filters is None)
|
|
70
76
|
assert(isinstance(use_meta_between_docs, bool))
|
|
77
|
+
assert(isinstance(batch_size, int) and batch_size > 0)
|
|
71
78
|
|
|
72
79
|
extra_filters = [] if text_opinion_filters is None else text_opinion_filters
|
|
73
80
|
actual_text_opinion_filters = [FrameworkLimitationsTextOpinionFilter()] + extra_filters
|
|
@@ -77,14 +84,15 @@ def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotat
|
|
|
77
84
|
MapPipelineItem(map_func=lambda doc_id: get_doc_by_id_func(doc_id)),
|
|
78
85
|
|
|
79
86
|
# (doc, ppl_ctx) -> (parsed_doc)
|
|
80
|
-
MapNestedPipelineItem(map_func=lambda doc, ppl_ctx: DocumentParsers.
|
|
81
|
-
doc=doc, pipeline_items=pipeline_items, parent_ppl_ctx=ppl_ctx)),
|
|
87
|
+
MapNestedPipelineItem(map_func=lambda doc, ppl_ctx: DocumentParsers.parse_batch(
|
|
88
|
+
doc=doc, pipeline_items=pipeline_items, parent_ppl_ctx=ppl_ctx, batch_size=batch_size)),
|
|
82
89
|
|
|
83
90
|
# (parsed_doc) -> (text_opinions)
|
|
84
91
|
MapPipelineItem(map_func=lambda parsed_doc: __iter_text_opinion_linkages(
|
|
85
|
-
annotators=annotators, parsed_doc=parsed_doc,
|
|
92
|
+
annotators=annotators, parsed_doc=parsed_doc,
|
|
93
|
+
is_entity_func=is_entity_func, entity_index_func=entity_index_func,
|
|
86
94
|
text_opinion_filters=actual_text_opinion_filters, use_meta=use_meta_between_docs)),
|
|
87
95
|
|
|
88
96
|
# linkages[] -> linkages
|
|
89
97
|
FlattenIterPipelineItem()
|
|
90
|
-
]
|
|
98
|
+
]
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arekit
|
|
3
|
-
Version: 0.25.
|
|
3
|
+
Version: 0.25.2
|
|
4
4
|
Summary: Document level Attitude and Relation Extraction toolkit (AREkit) for sampling and prompting mass-media news into datasets for ML-model training
|
|
5
5
|
Home-page: https://github.com/nicolay-r/AREkit
|
|
6
6
|
Author: Nicolay Rusnachenko
|
|
7
7
|
Author-email: rusnicolay@gmail.com
|
|
8
8
|
License: MIT License
|
|
9
9
|
Keywords: natural language processing,relation extraction,sentiment analysis
|
|
10
|
+
Platform: UNKNOWN
|
|
10
11
|
Classifier: Programming Language :: Python
|
|
11
12
|
Classifier: Programming Language :: Python :: 3.6
|
|
12
13
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
@@ -14,15 +15,14 @@ Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
|
14
15
|
Classifier: Topic :: Text Processing :: Linguistic
|
|
15
16
|
Requires-Python: >=3.6
|
|
16
17
|
Description-Content-Type: text/markdown
|
|
17
|
-
|
|
18
|
+
Requires-Dist: enum34 (==1.1.10)
|
|
18
19
|
Requires-Dist: tqdm
|
|
19
|
-
Requires-Dist: enum34==1.1.10
|
|
20
|
-
Requires-Dist: numpy>=1.14.5
|
|
21
|
-
Requires-Dist: pymystem3==0.2.0
|
|
22
20
|
|
|
23
|
-
# AREkit 0.25.
|
|
21
|
+
# AREkit 0.25.2
|
|
24
22
|
|
|
25
23
|

|
|
24
|
+
[](https://pypistats.org/packages/arekit)
|
|
25
|
+
|
|
26
26
|
|
|
27
27
|
<p align="center">
|
|
28
28
|
<img src="logo.png"/>
|
|
@@ -34,7 +34,7 @@ is a python toolkit, devoted to document level Attitude and Relation Extraction
|
|
|
34
34
|
## Description
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
This toolkit aims at memory-effective data processing in Relation Extraction (RE) related tasks.
|
|
37
|
+
This toolkit aims at memory-effective data processing in [Relation Extraction (RE)](https://nlpprogress.com/english/relationship_extraction.html) related tasks.
|
|
38
38
|
|
|
39
39
|
<p align="center">
|
|
40
40
|
<img src="docs/arekit-pipeline-concept.png"/>
|
|
@@ -60,7 +60,7 @@ for sentence level relations preparation (dubbed as contexts);
|
|
|
60
60
|
## Installation
|
|
61
61
|
|
|
62
62
|
```bash
|
|
63
|
-
pip install git+https://github.com/nicolay-r/AREkit.git@0.25.
|
|
63
|
+
pip install git+https://github.com/nicolay-r/AREkit.git@0.25.2-rc
|
|
64
64
|
```
|
|
65
65
|
|
|
66
66
|
## Usage
|
|
@@ -80,3 +80,5 @@ if you use or extend our work, please cite as follows:
|
|
|
80
80
|
organization={Springer}
|
|
81
81
|
}
|
|
82
82
|
```
|
|
83
|
+
|
|
84
|
+
|
|
@@ -2,9 +2,9 @@ arekit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
2
2
|
arekit/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
arekit/common/bound.py,sha256=lPpHY6ct_CU9e4qXeYjhJfWbTj6Sb_NVtZ1CJheQPNE,1402
|
|
4
4
|
arekit/common/log_utils.py,sha256=OfEQxbExkuRAl9dxlgFEqcFhI4HHoMYT7WE8ud0IPOM,924
|
|
5
|
-
arekit/common/utils.py,sha256=
|
|
5
|
+
arekit/common/utils.py,sha256=N061ENJJgvsB338Q9cixc6RWyuikSPQq4Tc8mmgwy9s,2659
|
|
6
6
|
arekit/common/context/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
arekit/common/context/terms_mapper.py,sha256=
|
|
7
|
+
arekit/common/context/terms_mapper.py,sha256=tBs_dMettLjVrqwPwTMZg3Pgxo6PZJpu-Qh6ZOWWFJA,1532
|
|
8
8
|
arekit/common/context/token.py,sha256=CpWAlvprUnJfCtYvO8lwdfU_ofSKAOGOudXTwppyzSk,459
|
|
9
9
|
arekit/common/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
arekit/common/data/const.py,sha256=J74zim3CGJlLJp-AVn5z9TTuBfmttjiM_8sRW1Pc-iE,457
|
|
@@ -13,7 +13,7 @@ arekit/common/data/rows_fmt.py,sha256=klq9HdzSnhbRBhOw7O4ctp3PZ5L6ZVy-0eIV2vLLYY
|
|
|
13
13
|
arekit/common/data/rows_parser.py,sha256=qYSEETvhX_0_JuAqm0bjK_V28_53qq7OY9JAnBdRC78,1513
|
|
14
14
|
arekit/common/data/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
arekit/common/data/input/sample.py,sha256=6JeGxsLbEUXVKPWA1hIlkTDNOaYg4bHCJWw0ULrLByg,2143
|
|
16
|
-
arekit/common/data/input/terms_mapper.py,sha256=
|
|
16
|
+
arekit/common/data/input/terms_mapper.py,sha256=pOD8lGsdM-23maXr9nlHM1QMJ3hsx_5HGe6X3aQcq6k,3133
|
|
17
17
|
arekit/common/data/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
arekit/common/data/input/providers/const.py,sha256=GDvPkgP7hllHW3QiueMBQgQyu2CtNFI4JYNNja2Im6Q,187
|
|
19
19
|
arekit/common/data/input/providers/contents.py,sha256=jT1LJE_5Igw5H2e1jKsWWciHSbPVg649phT177SzhEA,261
|
|
@@ -30,43 +30,34 @@ arekit/common/data/input/providers/label/binary.py,sha256=jPD6Jn8DYMrdI3jN8ueoWv
|
|
|
30
30
|
arekit/common/data/input/providers/label/multiple.py,sha256=HWbHF_CwwbiLQbYm5dgvnXAm0b6tJOyFYFEUBxuWAqI,492
|
|
31
31
|
arekit/common/data/input/providers/rows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
32
|
arekit/common/data/input/providers/rows/base.py,sha256=syH7ZEW3Agwfb1IR0G7n_Amy3Kkg0EZk2V7kH3r7ADg,2517
|
|
33
|
-
arekit/common/data/input/providers/rows/samples.py,sha256=
|
|
33
|
+
arekit/common/data/input/providers/rows/samples.py,sha256=iUBmKTnevAyfXDb4d6_Wntfw59wWASqSteXOhD5ez64,9334
|
|
34
34
|
arekit/common/data/input/providers/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
arekit/common/data/input/providers/sample/cropped.py,sha256=
|
|
35
|
+
arekit/common/data/input/providers/sample/cropped.py,sha256=RSoDIoqIodANBW7zmj91ltgw4eYGISCWfl6zLuQXwFM,1831
|
|
36
36
|
arekit/common/data/input/providers/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
37
|
arekit/common/data/input/providers/text/single.py,sha256=vm3sShIYZcmses-hmZX9cOfveWXCYGwvKLgQ0qs3VXQ,1604
|
|
38
|
-
arekit/common/data/input/repositories/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
-
arekit/common/data/input/repositories/base.py,sha256=4DmLVORc85gu6bxtXVZgxi176NxnIaqHz2tVebMyGZ8,2557
|
|
40
|
-
arekit/common/data/input/repositories/sample.py,sha256=LAdpaA1N_nq1iInLwkWQVvL6HGH64JYWSJ9tywU0llY,784
|
|
41
38
|
arekit/common/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
42
|
-
arekit/common/data/storages/base.py,sha256=
|
|
43
|
-
arekit/common/data/views/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
|
-
arekit/common/data/views/samples.py,sha256=LDqUDqArGt90ujRB4kDFgDHLmR2_AQoUnzhxpXYWYaM,882
|
|
39
|
+
arekit/common/data/storages/base.py,sha256=xMMfHhG68ZraERLbipCN_OhqpLBSDq_S56qAtxGsU7Y,2595
|
|
45
40
|
arekit/common/docs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
41
|
arekit/common/docs/base.py,sha256=uXUOtpR9BEsDBfDHg4eLqOjfSVOV_o9VPii3nSxLZuY,734
|
|
47
|
-
arekit/common/docs/entities_grouping.py,sha256=
|
|
42
|
+
arekit/common/docs/entities_grouping.py,sha256=9Xr5NsrWD9_jjKLFE7HOqjkOibzjz840ef04CekkXNU,765
|
|
48
43
|
arekit/common/docs/entity.py,sha256=TxrZMdIEgjk-PgCyskCkVis2KAw_M7vTBp3ppP6G05M,662
|
|
49
|
-
arekit/common/docs/parser.py,sha256=
|
|
44
|
+
arekit/common/docs/parser.py,sha256=dzWjpbbYt-C9UU9sSy_Holnm0kQxJqtz1_6va6kS_L4,1780
|
|
50
45
|
arekit/common/docs/sentence.py,sha256=nZCCFj2yk71POoXCBfEMN3pteM2qQdj60eEzxMVY_3k,302
|
|
51
46
|
arekit/common/docs/parsed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
52
|
-
arekit/common/docs/parsed/base.py,sha256=
|
|
53
|
-
arekit/common/docs/parsed/service.py,sha256=
|
|
47
|
+
arekit/common/docs/parsed/base.py,sha256=e43kQyxeO-eaPKr3-5SyZ4N33QIDDePTE_CGmEliO7c,3168
|
|
48
|
+
arekit/common/docs/parsed/service.py,sha256=k_4k9EQ7iFq97bvAZHz6dtxCltiJQMd3Suv5W_t7MBE,1076
|
|
54
49
|
arekit/common/docs/parsed/term_position.py,sha256=H9eQQeanLxwP6og30TQUnpcXymGEPwXClRpaE8VnpLs,1040
|
|
55
50
|
arekit/common/docs/parsed/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
|
-
arekit/common/docs/parsed/providers/base.py,sha256=
|
|
51
|
+
arekit/common/docs/parsed/providers/base.py,sha256=9MPqxC8mTD4naXH_AoOH0bIPNR7wR9GkOL-Nm2D6Kdo,2543
|
|
57
52
|
arekit/common/docs/parsed/providers/base_pairs.py,sha256=RDYjspkENPQU2pn7Jp5mFrL9566eVWgXMEzWBQlMdRo,2195
|
|
58
|
-
arekit/common/docs/parsed/providers/entity_service.py,sha256=
|
|
59
|
-
arekit/common/docs/parsed/providers/opinion_pairs.py,sha256=
|
|
60
|
-
arekit/common/docs/parsed/providers/text_opinion_pairs.py,sha256=
|
|
53
|
+
arekit/common/docs/parsed/providers/entity_service.py,sha256=An_urYXU4r1PKIUNfhlGCjK6UNLwr3EkebkiaodBsRg,5895
|
|
54
|
+
arekit/common/docs/parsed/providers/opinion_pairs.py,sha256=dSd698VSbVefT0VbuQehaErquFixBfs42OAdX3BJH5M,693
|
|
55
|
+
arekit/common/docs/parsed/providers/text_opinion_pairs.py,sha256=MK1-m2_LJgjeis6AvY1hwT2N8rqHRCpIp7oWqXzgk9I,3215
|
|
61
56
|
arekit/common/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
57
|
arekit/common/entities/base.py,sha256=kpJFo4pCRVBQX6T8PibLKspp9UwoIrkHDoFMTM9KkUs,1646
|
|
63
58
|
arekit/common/entities/collection.py,sha256=ySSriMYP6zzdto1mC0V9VPXmkAqyJN3mmGoqoNValGI,1931
|
|
64
59
|
arekit/common/entities/str_fmt.py,sha256=gAPeS8RXdhh8Px_u5eOAPbtLREiiyMueid0lQoa4EbQ,250
|
|
65
60
|
arekit/common/entities/types.py,sha256=pxFB0gsevdsmnduN_Ffk7_P2TRiMt6NAHyrutuKOFvs,145
|
|
66
|
-
arekit/common/experiment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
67
|
-
arekit/common/experiment/data_type.py,sha256=DezUkfwLTf6XLYheqPiaWyx3ZwcldsJ8wDV8aNgJtDk,227
|
|
68
|
-
arekit/common/experiment/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
69
|
-
arekit/common/experiment/api/base_samples_io.py,sha256=SN8CnbEYaazE3SldvnENfjoNRHsTejtrg4jJfqfZLMs,516
|
|
70
61
|
arekit/common/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
71
62
|
arekit/common/frames/text_variant.py,sha256=TlWR4jnuF7HW9BMHhOTKkr768V_Ub0wd0E5A4YTwD0c,875
|
|
72
63
|
arekit/common/frames/connotations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -94,7 +85,7 @@ arekit/common/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
|
94
85
|
arekit/common/model/labeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
95
86
|
arekit/common/model/labeling/base.py,sha256=uj7_igCWEU23OjnzabNy0LyxoZ6S_qSfCA-ZaoL1erA,727
|
|
96
87
|
arekit/common/model/labeling/modes.py,sha256=DiwC6Aomke-ojwwpR2pcd4qgQSwmRdGCvQlyHHhN3YY,127
|
|
97
|
-
arekit/common/model/labeling/single.py,sha256=
|
|
88
|
+
arekit/common/model/labeling/single.py,sha256=HJMFffbxfmV6dKK8t-MKjD-bOx_wuWUs35zmcSWcUL0,878
|
|
98
89
|
arekit/common/opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
99
90
|
arekit/common/opinions/base.py,sha256=eIx1RzsngCkpnF2Utju5i_Qp7gqF_rDIe_UDeMGXtmo,2112
|
|
100
91
|
arekit/common/opinions/collection.py,sha256=bdx-CIYYdE-DrjyB1mRTGtkLb-lrGPTSLl25xv5EHnM,4938
|
|
@@ -106,23 +97,21 @@ arekit/common/opinions/annot/algo_based.py,sha256=cvDGDmUoUaQ1Xcbyouxrjs0CkHRfRo
|
|
|
106
97
|
arekit/common/opinions/annot/base.py,sha256=IvwrwT8O3s6b2_R0arpMR4Uog7kuWQZUAyRP5cq_27A,382
|
|
107
98
|
arekit/common/opinions/annot/algo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
108
99
|
arekit/common/opinions/annot/algo/base.py,sha256=ymll-4-SplCY7CLswjOZEC1vsVHIEzUP0JMYgvL8hbo,124
|
|
109
|
-
arekit/common/opinions/annot/algo/pair_based.py,sha256=
|
|
100
|
+
arekit/common/opinions/annot/algo/pair_based.py,sha256=0m0l-KEDvtARDEnl8Sr_MeEJp3yT1re_VsNAO2ZQQUM,4762
|
|
110
101
|
arekit/common/opinions/annot/algo/predefined.py,sha256=zU39SADPKnykHCNB-Bmn_0bvd6gYWWYmfgfi-68hHSs,741
|
|
111
102
|
arekit/common/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
112
|
-
arekit/common/pipeline/base.py,sha256=
|
|
113
|
-
arekit/common/pipeline/batching.py,sha256=
|
|
103
|
+
arekit/common/pipeline/base.py,sha256=RHpZs4OT2t9wGTMUxtpBM7q-zCrNQbf3-BFDy9Bcz4M,839
|
|
104
|
+
arekit/common/pipeline/batching.py,sha256=zm1SLSJz8T9gXrBdiztzS2f7VSWb4uFcYkzEu5TIfrE,1119
|
|
114
105
|
arekit/common/pipeline/context.py,sha256=Fw25lBVakHNAXjtkdEqopR-Jh59cDKGWD2jCJxBrj7Y,1126
|
|
115
106
|
arekit/common/pipeline/conts.py,sha256=NAQNsHt1kK3HnxWv3M6yXi0c7C6Mx6ZZ6KZc0yE0eas,70
|
|
116
107
|
arekit/common/pipeline/utils.py,sha256=5VqH1LtRa4tYUbyiRvWdBmP4biFhTKq9vhr8QiRFFkY,882
|
|
117
108
|
arekit/common/pipeline/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
118
|
-
arekit/common/pipeline/items/base.py,sha256=
|
|
109
|
+
arekit/common/pipeline/items/base.py,sha256=15-z8ERQ0QxaRszs7sHQduU0KIBJIm8B0V2nwCva6d0,1695
|
|
119
110
|
arekit/common/pipeline/items/flatten.py,sha256=9T4jWqPGv4UDxajlM0Nm0-gvwUgqqYB8XH0efTum9a0,542
|
|
120
111
|
arekit/common/pipeline/items/handle.py,sha256=QS5Byj7-o5jmFi0ag58NE3zm2-JzVIunIgc3Pn1ij6g,578
|
|
121
112
|
arekit/common/pipeline/items/iter.py,sha256=Tk9WdUMPOq20s7jEWEpU4PmillnVtQ8nIa2ct7iw-3s,406
|
|
122
113
|
arekit/common/pipeline/items/map.py,sha256=G5wBdjaaxePD0pijrxsfpJACeP7kzj7HerjCkNIhmII,381
|
|
123
114
|
arekit/common/pipeline/items/map_nested.py,sha256=vs0GdJNr3qSF9p2yd1nWji5E1HGzECbvOfN2MqoHc2A,630
|
|
124
|
-
arekit/common/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
125
|
-
arekit/common/service/sqlite.py,sha256=1jLIszkcJGeT0hUos8Y0Chp3o9XRUfljG2P9q0T2_Ds,1440
|
|
126
115
|
arekit/common/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
127
116
|
arekit/common/synonyms/base.py,sha256=YxD-CKCjlEtar1zTdumnfC3vKgbP2wLODR9mMEwbbnA,4237
|
|
128
117
|
arekit/common/synonyms/grouping.py,sha256=fi7QQbBvsTvvP2CPTesSPEsPNmGfc6euqj-HPhVvtlg,698
|
|
@@ -137,123 +126,53 @@ arekit/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
137
126
|
arekit/contrib/bert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
138
127
|
arekit/contrib/bert/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
139
128
|
arekit/contrib/bert/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
140
|
-
arekit/contrib/bert/input/providers/cropped_sample.py,sha256=
|
|
129
|
+
arekit/contrib/bert/input/providers/cropped_sample.py,sha256=WJNAzILJDMYYhGpxg1r1F3f1X71kVV30gDhkgwH59H0,755
|
|
141
130
|
arekit/contrib/bert/input/providers/text_pair.py,sha256=_1d-he0n42y3ksj8RjJlNHgHnaQUEq0aQhUdTPRMKgg,2817
|
|
142
131
|
arekit/contrib/bert/terms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
143
|
-
arekit/contrib/bert/terms/mapper.py,sha256=
|
|
144
|
-
arekit/contrib/networks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
145
|
-
arekit/contrib/networks/embedding.py,sha256=lrLdB6CdmnmzwavAL6MZuLHceNM3PsZZiWLZ4BjGeXc,3845
|
|
146
|
-
arekit/contrib/networks/embedding_io.py,sha256=hV1MBr9wu9-10gQgnAzLuC-l897aB-8KNcw4h69B5VM,460
|
|
147
|
-
arekit/contrib/networks/vectorizer.py,sha256=KKV_f0GZD10ZpeYgqZfvMapJtsKa3NBddR6W_GdYqrM,155
|
|
148
|
-
arekit/contrib/networks/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
149
|
-
arekit/contrib/networks/input/const.py,sha256=nPeuO-G6MILNlIkGc5HzSDj_RmTwLflReF7n5htFAUI,176
|
|
150
|
-
arekit/contrib/networks/input/ctx_serialization.py,sha256=eCOw4xjp8A7Z2WFanshooS3MqSy7dbZ8ywf_DA2LZO8,982
|
|
151
|
-
arekit/contrib/networks/input/rows_parser.py,sha256=6_43LbAelveY9yEWMU5BdvQlpWwm4RDOjUEmqHuPYdE,1807
|
|
152
|
-
arekit/contrib/networks/input/term_types.py,sha256=P8E5LKegZE5ZEh4vNtC55Lu8USbQt8_Eo14op_anmvU,348
|
|
153
|
-
arekit/contrib/networks/input/terms_mapping.py,sha256=NAnuTAbj7tBTe1Ga4js2IfnUdAWlTV9fcgSQEgYqQUQ,2129
|
|
154
|
-
arekit/contrib/networks/input/embedding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
155
|
-
arekit/contrib/networks/input/embedding/matrix.py,sha256=BFn7eXhiqXY7c4tUfy1fzemIqRnZYx_GiEv873QnIEs,952
|
|
156
|
-
arekit/contrib/networks/input/embedding/offsets.py,sha256=HrBfbFD03o_Y0ZvEGTd-FRxmPx55_5vqItTranMFy88,1313
|
|
157
|
-
arekit/contrib/networks/input/formatters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
158
|
-
arekit/contrib/networks/input/formatters/pos_mapper.py,sha256=yftPKYU7noVb_q0KAflHf7bqjuUXt5siIgbnwMEoWrw,773
|
|
159
|
-
arekit/contrib/networks/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
160
|
-
arekit/contrib/networks/input/providers/sample.py,sha256=MHtXhhBD-kM0yzTACTbY14KMPIuhiLgUKEYXfhvumfo,5445
|
|
161
|
-
arekit/contrib/networks/input/providers/term_connotation.py,sha256=Q90pVN4hQgYAk3oBSCPYc6_1xQUQE1b6ksiU_k8frcM,1157
|
|
162
|
-
arekit/contrib/networks/input/providers/text.py,sha256=kucezKm6Ilmy5wuM2jUP5xk9zh1K1Pf8KcMd1prrp8k,917
|
|
132
|
+
arekit/contrib/bert/terms/mapper.py,sha256=YMY1JasNc___83ihiV1KqzwGyC3qs3ZNN90NmHqBEZ0,976
|
|
163
133
|
arekit/contrib/prompt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
164
|
-
arekit/contrib/prompt/sample.py,sha256=
|
|
134
|
+
arekit/contrib/prompt/sample.py,sha256=iDwe65pUBIrk0Hjh8v7o1XesRPxCVsJojw-dcASPmWc,2867
|
|
165
135
|
arekit/contrib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
166
|
-
arekit/contrib/utils/serializer.py,sha256=D9LJ2ZXeVx3YntV-HqEnt32xW-s4GauwD97XRVlqr0g,1626
|
|
167
136
|
arekit/contrib/utils/bert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
168
|
-
arekit/contrib/utils/bert/samplers.py,sha256=
|
|
137
|
+
arekit/contrib/utils/bert/samplers.py,sha256=vleluRLRFzDkGRZ_ReeHsY8IJAS-TxJgoTTro4mYrs4,1102
|
|
169
138
|
arekit/contrib/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
170
139
|
arekit/contrib/utils/data/contents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
171
140
|
arekit/contrib/utils/data/contents/opinions.py,sha256=MSV7NytEe15adKhhHCq5KiCj6ZBq31nV-u2rcSfFCgE,1738
|
|
172
141
|
arekit/contrib/utils/data/doc_provider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
173
142
|
arekit/contrib/utils/data/doc_provider/dict_based.py,sha256=zUOiiIbj5zby4xqMb0m9N-a6enavJJ7wFmPaGErykWU,371
|
|
174
143
|
arekit/contrib/utils/data/doc_provider/dir_based.py,sha256=FTw3kLV_CYtPoUoHl39IrP6RjLvTecCno9May95jVXw,1916
|
|
175
|
-
arekit/contrib/utils/data/readers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
176
|
-
arekit/contrib/utils/data/readers/base.py,sha256=zAsZLX5ng0_gb_ysL6wQchptmBHlNgqgQilw295Y5Aw,153
|
|
177
|
-
arekit/contrib/utils/data/readers/csv_pd.py,sha256=Ym49j04Z-_WQN-7xJMiiN1y2TIMnMDtPxy5h0mT3WBQ,1383
|
|
178
|
-
arekit/contrib/utils/data/readers/jsonl.py,sha256=c2bHwnTfNEwb1c8B9fRwaQyeze5x3nOd2UXXAp4MbxQ,426
|
|
179
|
-
arekit/contrib/utils/data/readers/sqlite.py,sha256=U1138XNCIwqycNivxwzwIUnowj3jDkP4M6J_Kvyedbc,416
|
|
180
|
-
arekit/contrib/utils/data/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
181
|
-
arekit/contrib/utils/data/service/balance.py,sha256=PgA5B6qSPmt8ITPLsQuCkniE8-u2NO_eQ2m-U9Akh98,1547
|
|
182
144
|
arekit/contrib/utils/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
183
|
-
arekit/contrib/utils/data/storages/jsonl_based.py,sha256=
|
|
184
|
-
arekit/contrib/utils/data/storages/
|
|
185
|
-
arekit/contrib/utils/data/storages/
|
|
186
|
-
arekit/contrib/utils/data/storages/sqlite_based.py,sha256=ARwVisVbPKBap_mVdpvTpp28iXgJbCJ3dAj41UYu03Q,609
|
|
145
|
+
arekit/contrib/utils/data/storages/jsonl_based.py,sha256=dz8uizu9t1C215o0HEL8y4LiDKR4aC_-OwDu_xF0xIM,522
|
|
146
|
+
arekit/contrib/utils/data/storages/row_cache.py,sha256=MRK0uJFvw6O99k2aFb3JLZhLUBo2JUO-WYQ4EeRRu6M,2051
|
|
147
|
+
arekit/contrib/utils/data/storages/sqlite_based.py,sha256=cIYAHyiB4CMftKgrgLqw-L4F1WnhbspjwWLSPqH5NHk,682
|
|
187
148
|
arekit/contrib/utils/data/writers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
188
149
|
arekit/contrib/utils/data/writers/base.py,sha256=JLwf5WVl_U319sdMev8YOn4OoCcrgNIUZtrOuG1JLjI,766
|
|
189
|
-
arekit/contrib/utils/data/writers/csv_native.py,sha256=7fPxYeu9YDK8Cvjp1n-sbKT63ZuhDIEv3VwghHuKk5k,2252
|
|
190
|
-
arekit/contrib/utils/data/writers/csv_pd.py,sha256=WhBjDJCHUBy_TabngMF42Qicx0ye8xIus0m6c7qotto,1330
|
|
191
|
-
arekit/contrib/utils/data/writers/json_opennre.py,sha256=EkhXmONgtMe7A9VKrs9ElFHc8RoMumjFbkKfwuOVOoU,5067
|
|
192
|
-
arekit/contrib/utils/data/writers/sqlite_native.py,sha256=MnbLU8iPvYvpYgEbOXhBKH_G8DJs0W9iSuhr_TPKBAQ,4601
|
|
193
|
-
arekit/contrib/utils/embeddings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
194
|
-
arekit/contrib/utils/embeddings/rusvectores.py,sha256=WA0HejE2U5kgeBvh4_vty2QzoAkFXiMk94BK8FHxoxw,1931
|
|
195
|
-
arekit/contrib/utils/embeddings/tokens.py,sha256=z3lJ30JTX9zvZtPgzRl3yANECmuA1qboMDTcJsr_4E4,872
|
|
196
150
|
arekit/contrib/utils/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
197
151
|
arekit/contrib/utils/entities/filter.py,sha256=aHTExIMFaMdy4QL8iYE23eiby3qLImAakXR6gNqG6fs,145
|
|
198
152
|
arekit/contrib/utils/entities/formatters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
199
|
-
arekit/contrib/utils/entities/formatters/str_display.py,sha256=N8igv7EVaTFayvLXkyBGtm67KwHaeP-M-L8d7oqBG9Q,401
|
|
200
153
|
arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py,sha256=rEUIma9O3kOBWIguGtJ69JH-00Dhm0vUBOd5yNcKweY,653
|
|
201
154
|
arekit/contrib/utils/io_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
202
|
-
arekit/contrib/utils/io_utils/embedding.py,sha256=cBDRv_1LROJ262QaL3QVfGt2W9EvBfbh83oL41PJn60,2543
|
|
203
155
|
arekit/contrib/utils/io_utils/utils.py,sha256=310SIJTsNLn2OZrGPer9W4ZP52PHkjBK3zsyqxVs3h0,537
|
|
204
|
-
arekit/contrib/utils/np_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
205
|
-
arekit/contrib/utils/np_utils/embedding.py,sha256=G7Ls_ClzbskLLy-opRcVzQlfUfhdwbqoXgk0zoGrmHM,798
|
|
206
|
-
arekit/contrib/utils/np_utils/npz_utils.py,sha256=XoUHNmOlcr2X674R1xKGUJitEpFCIBJ8DOpNEPhtJFk,234
|
|
207
|
-
arekit/contrib/utils/np_utils/vocab.py,sha256=FsS18chMLU4WfMeGwBbvmfB5Qmoj5tZTOo-4zqWPm3Q,580
|
|
208
156
|
arekit/contrib/utils/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
209
157
|
arekit/contrib/utils/pipelines/opinion_collections.py,sha256=y9-klVJGCN9mPd7t1ECllAiCnAb3MKVXC1PnYddp5sQ,3195
|
|
210
158
|
arekit/contrib/utils/pipelines/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
211
|
-
arekit/contrib/utils/pipelines/items/sampling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
212
|
-
arekit/contrib/utils/pipelines/items/sampling/base.py,sha256=-H-r5GIi9ee7CxxpJs8KnHC91l7Y1dYaWPR_OK17E8g,4245
|
|
213
|
-
arekit/contrib/utils/pipelines/items/sampling/networks.py,sha256=E0EjQ4KRd3oYLFVbie05XJa00JqR26eLRoMrDnuQySQ,2653
|
|
214
159
|
arekit/contrib/utils/pipelines/items/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
215
|
-
arekit/contrib/utils/pipelines/items/text/entities_default.py,sha256=vNx5ir2mf7a1gg_OeqUsf_p1Fu2k7QIFxVpe-CuwZ84,727
|
|
216
160
|
arekit/contrib/utils/pipelines/items/text/frames.py,sha256=pZQybYfgEQB1DM3PtmsgrtB2Xl0HejmP4rhT0nR_YKE,2586
|
|
217
|
-
arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py,sha256=4rIAAB-_GeWNbu5KyaDm5qttH4o2Bzpdvy-D9YR5bRk,1776
|
|
218
|
-
arekit/contrib/utils/pipelines/items/text/frames_negation.py,sha256=AdoY7lqSAT0RApp0DbqeI7xxyRVF6NPJLAfR59lsIec,1303
|
|
219
|
-
arekit/contrib/utils/pipelines/items/text/tokenizer.py,sha256=FmV5flziDLCNttxrUzRr-FGCcKK6venZEcZ-KwcqwNE,3147
|
|
220
|
-
arekit/contrib/utils/pipelines/items/text/translator.py,sha256=TkXVyZYRbS8P4S2Pnn2GzQMRa-9ba-nS4_zXvsf16vU,5365
|
|
221
161
|
arekit/contrib/utils/pipelines/text_opinion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
222
|
-
arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=
|
|
162
|
+
arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=kKBQTvZxYYf9tBYmUv3Ipj9OOYKmHnYG0y5Gyjt27yA,4587
|
|
223
163
|
arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
224
|
-
arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py,sha256=
|
|
164
|
+
arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py,sha256=69xmuxqVmsYxBYpV2gYF7j3Z5iPk0ndjnOZe2Yy5WDA,1911
|
|
225
165
|
arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
226
166
|
arekit/contrib/utils/pipelines/text_opinion/filters/base.py,sha256=GnKnJB4MKqiMSJny3a9Na7l7Csm7abbt6GADBCY18Mw,143
|
|
227
167
|
arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py,sha256=3Pjq4IJJMT7dYpK266lN66WQJUnQO3P0rG6wcAvJOOA,649
|
|
228
168
|
arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py,sha256=pdWFJaKh4kKIsUuBNp3WNy5Rj80CjWEy2wp-0axFnrI,1254
|
|
229
169
|
arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py,sha256=4AFS5zhocJuYphGO2ZMWmYTtIhGItKDTkB0--AmjgnA,1151
|
|
230
|
-
arekit/contrib/utils/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
231
|
-
arekit/contrib/utils/processing/languages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
232
|
-
arekit/contrib/utils/processing/languages/mods.py,sha256=OERKcglI4pJEIQxlWMYuYg_uHnNWVpP-mqhnFsQbY7A,263
|
|
233
|
-
arekit/contrib/utils/processing/languages/pos.py,sha256=etC3ueLGgZorgKEc3TWpeIuv46vs392xPi1lM31Cg0s,278
|
|
234
|
-
arekit/contrib/utils/processing/languages/ru/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
235
|
-
arekit/contrib/utils/processing/languages/ru/cases.py,sha256=27sIQsU5_0aT4EVuPtKCK-tfi1Q0TH11phV1x5hIzLs,1492
|
|
236
|
-
arekit/contrib/utils/processing/languages/ru/constants.py,sha256=f4z7ivILKqYju9rkagi9_FIvPm1FnWHbXgxigyb3zm4,147
|
|
237
|
-
arekit/contrib/utils/processing/languages/ru/mods.py,sha256=j4xKgRbCC834i9n-RyU607v9Qph9sP_B31WLrKFByRk,343
|
|
238
|
-
arekit/contrib/utils/processing/languages/ru/number.py,sha256=kHyP0Lp_iHVDwkbN7tkZUJpGFQ40QRm-j_1g0dFU-sM,401
|
|
239
|
-
arekit/contrib/utils/processing/languages/ru/pos_service.py,sha256=BWHLPybjmTVNXjJM2QmrZlEDcl7nZY7keLmXZcG_PFM,1125
|
|
240
|
-
arekit/contrib/utils/processing/lemmatization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
241
|
-
arekit/contrib/utils/processing/lemmatization/mystem.py,sha256=_FRqEGWUlgAbhSJ-dsyoFg_qbbUxePDSAOWWuveRqCo,1340
|
|
242
|
-
arekit/contrib/utils/processing/pos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
243
|
-
arekit/contrib/utils/processing/pos/base.py,sha256=CrMr3u6lRs2NoV7uch5HZgV71A-0M-pwJfwXjfudHBY,259
|
|
244
|
-
arekit/contrib/utils/processing/pos/mystem_wrap.py,sha256=C9AnRIAZL4e8DMNte9LDuvxS-cbEQpo2AYdQtP9uIJ4,4336
|
|
245
|
-
arekit/contrib/utils/processing/pos/russian.py,sha256=POCo6xKmK7vAEq-kWlODg611kLOtOj37OVc3L_GWL-8,229
|
|
246
|
-
arekit/contrib/utils/processing/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
247
|
-
arekit/contrib/utils/processing/text/tokens.py,sha256=_3u5Oy1MG_QfHH8wi0x0nA588qSaCp3Wmnp2SzMWjXY,3573
|
|
248
170
|
arekit/contrib/utils/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
249
171
|
arekit/contrib/utils/synonyms/simple.py,sha256=ST9EwuWP88FzbyV8Gi0-biTPgGOsZ7OWyaBWHL_U_eo,557
|
|
250
172
|
arekit/contrib/utils/synonyms/stemmer_based.py,sha256=q19P_XOCWN2_JrBtybAt7ToMIr1ambw4ahr0fSEEHmQ,1400
|
|
251
|
-
arekit/
|
|
252
|
-
arekit/
|
|
253
|
-
arekit/
|
|
254
|
-
arekit-0.25.
|
|
255
|
-
arekit-0.25.
|
|
256
|
-
arekit-0.25.
|
|
257
|
-
arekit-0.25.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
|
|
258
|
-
arekit-0.25.0.dist-info/top_level.txt,sha256=4pXuFE8IE0lBsqi6ZsR7figx0H939VIX4_-76YIbkOQ,7
|
|
259
|
-
arekit-0.25.0.dist-info/RECORD,,
|
|
173
|
+
arekit-0.25.2.data/data/logo.png,sha256=S8OZ4MGGD72Pf5co7ngYbXKkJH1EUhbErUXv1ZjUWiU,45718
|
|
174
|
+
arekit-0.25.2.dist-info/LICENSE,sha256=JO9tIbxAvhwDv73cX-gUStr9yA-TY7wusUeLHRx7JuY,1076
|
|
175
|
+
arekit-0.25.2.dist-info/METADATA,sha256=CsXviPZIM44LGhiyBRH-MK0DGOP7UAc4GHbvSaLcwxw,3252
|
|
176
|
+
arekit-0.25.2.dist-info/WHEEL,sha256=g4nMs7d-Xl9-xC9XovUrsDHGXt-FT0E17Yqo92DEfvY,92
|
|
177
|
+
arekit-0.25.2.dist-info/top_level.txt,sha256=4pXuFE8IE0lBsqi6ZsR7figx0H939VIX4_-76YIbkOQ,7
|
|
178
|
+
arekit-0.25.2.dist-info/RECORD,,
|
|
File without changes
|
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
from arekit.common.data.input.providers.columns.base import BaseColumnsProvider
|
|
2
|
-
from arekit.common.data.input.providers.contents import ContentsProvider
|
|
3
|
-
from arekit.common.data.input.providers.rows.base import BaseRowProvider
|
|
4
|
-
from arekit.common.data.storages.base import BaseRowsStorage
|
|
5
|
-
from arekit.contrib.utils.data.storages.row_cache import RowCacheStorage
|
|
6
|
-
from arekit.contrib.utils.data.writers.base import BaseWriter
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class BaseInputRepository(object):
|
|
10
|
-
|
|
11
|
-
def __init__(self, columns_provider, rows_provider, storage):
|
|
12
|
-
assert(isinstance(columns_provider, BaseColumnsProvider))
|
|
13
|
-
assert(isinstance(rows_provider, BaseRowProvider))
|
|
14
|
-
assert(isinstance(storage, BaseRowsStorage))
|
|
15
|
-
|
|
16
|
-
self._columns_provider = columns_provider
|
|
17
|
-
self._rows_provider = rows_provider
|
|
18
|
-
self._storage = storage
|
|
19
|
-
|
|
20
|
-
# Do setup operations.
|
|
21
|
-
self._setup_columns_provider()
|
|
22
|
-
self._setup_rows_provider()
|
|
23
|
-
|
|
24
|
-
# region protected methods
|
|
25
|
-
|
|
26
|
-
def _setup_columns_provider(self):
|
|
27
|
-
pass
|
|
28
|
-
|
|
29
|
-
def _setup_rows_provider(self):
|
|
30
|
-
pass
|
|
31
|
-
|
|
32
|
-
# endregion
|
|
33
|
-
|
|
34
|
-
def populate(self, contents_provider, doc_ids, desc="", writer=None, target=None):
|
|
35
|
-
assert(isinstance(contents_provider, ContentsProvider))
|
|
36
|
-
assert(isinstance(self._storage, BaseRowsStorage))
|
|
37
|
-
assert(isinstance(doc_ids, list))
|
|
38
|
-
assert(isinstance(writer, BaseWriter) or writer is None)
|
|
39
|
-
assert(isinstance(target, str) or target is None)
|
|
40
|
-
|
|
41
|
-
def iter_rows(idle_mode):
|
|
42
|
-
return self._rows_provider.iter_by_rows(
|
|
43
|
-
contents_provider=contents_provider,
|
|
44
|
-
doc_ids_iter=doc_ids,
|
|
45
|
-
idle_mode=idle_mode)
|
|
46
|
-
|
|
47
|
-
self._storage.init_empty(columns_provider=self._columns_provider)
|
|
48
|
-
|
|
49
|
-
is_async_write_mode_on = writer is not None and target is not None
|
|
50
|
-
|
|
51
|
-
if is_async_write_mode_on:
|
|
52
|
-
writer.open_target(target)
|
|
53
|
-
|
|
54
|
-
self._storage.fill(lambda idle_mode: iter_rows(idle_mode),
|
|
55
|
-
columns_provider=self._columns_provider,
|
|
56
|
-
row_handler=lambda: writer.commit_line(self._storage) if is_async_write_mode_on else None,
|
|
57
|
-
desc=desc)
|
|
58
|
-
|
|
59
|
-
if is_async_write_mode_on:
|
|
60
|
-
writer.close_target()
|
|
61
|
-
|
|
62
|
-
def push(self, writer, target, free_storage=True):
|
|
63
|
-
if not isinstance(self._storage, RowCacheStorage):
|
|
64
|
-
writer.write_all(self._storage, target)
|
|
65
|
-
|
|
66
|
-
# After writing we free the contents of the storage.
|
|
67
|
-
if free_storage:
|
|
68
|
-
self._storage.free()
|