arekit 0.24.0__py3-none-any.whl → 0.25.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arekit/common/data/storages/base.py +4 -15
- arekit/common/docs/entities_grouping.py +2 -1
- arekit/common/docs/parser.py +27 -22
- arekit/common/pipeline/base.py +12 -16
- arekit/common/pipeline/batching.py +28 -0
- arekit/common/pipeline/context.py +5 -1
- arekit/common/pipeline/items/base.py +39 -2
- arekit/common/pipeline/items/flatten.py +5 -1
- arekit/common/pipeline/items/handle.py +2 -1
- arekit/common/pipeline/items/iter.py +2 -1
- arekit/common/pipeline/items/map.py +2 -1
- arekit/common/pipeline/items/map_nested.py +4 -0
- arekit/common/pipeline/utils.py +32 -0
- arekit/common/text/{partitioning/str.py → partitioning.py} +14 -9
- arekit/common/utils.py +11 -52
- arekit/contrib/utils/data/contents/opinions.py +13 -3
- arekit/contrib/utils/data/storages/jsonl_based.py +2 -1
- arekit/contrib/utils/data/storages/pandas_based.py +2 -17
- arekit/contrib/utils/data/storages/row_cache.py +8 -2
- arekit/contrib/utils/data/storages/sqlite_based.py +18 -0
- arekit/contrib/utils/io_utils/utils.py +1 -18
- arekit/contrib/utils/pipelines/items/text/entities_default.py +2 -2
- arekit/contrib/utils/pipelines/items/text/frames.py +2 -3
- arekit/contrib/utils/pipelines/text_opinion/extraction.py +8 -10
- arekit-0.25.1.data/data/logo.png +0 -0
- arekit-0.25.1.dist-info/METADATA +81 -0
- arekit-0.25.1.dist-info/RECORD +186 -0
- {arekit-0.24.0.dist-info → arekit-0.25.1.dist-info}/WHEEL +1 -1
- arekit/common/data/input/repositories/__init__.py +0 -0
- arekit/common/data/input/repositories/base.py +0 -68
- arekit/common/data/input/repositories/sample.py +0 -22
- arekit/common/data/views/__init__.py +0 -0
- arekit/common/data/views/samples.py +0 -26
- arekit/common/docs/objects_parser.py +0 -37
- arekit/common/text/parser.py +0 -12
- arekit/common/text/partitioning/__init__.py +0 -0
- arekit/common/text/partitioning/base.py +0 -4
- arekit/common/text/partitioning/terms.py +0 -35
- arekit/contrib/networks/__init__.py +0 -0
- arekit/contrib/networks/embedding.py +0 -149
- arekit/contrib/networks/embedding_io.py +0 -18
- arekit/contrib/networks/input/__init__.py +0 -0
- arekit/contrib/networks/input/const.py +0 -6
- arekit/contrib/networks/input/ctx_serialization.py +0 -28
- arekit/contrib/networks/input/embedding/__init__.py +0 -0
- arekit/contrib/networks/input/embedding/matrix.py +0 -29
- arekit/contrib/networks/input/embedding/offsets.py +0 -55
- arekit/contrib/networks/input/formatters/__init__.py +0 -0
- arekit/contrib/networks/input/formatters/pos_mapper.py +0 -22
- arekit/contrib/networks/input/providers/__init__.py +0 -0
- arekit/contrib/networks/input/providers/sample.py +0 -129
- arekit/contrib/networks/input/providers/term_connotation.py +0 -23
- arekit/contrib/networks/input/providers/text.py +0 -24
- arekit/contrib/networks/input/rows_parser.py +0 -47
- arekit/contrib/networks/input/term_types.py +0 -13
- arekit/contrib/networks/input/terms_mapping.py +0 -60
- arekit/contrib/networks/vectorizer.py +0 -6
- arekit/contrib/source/__init__.py +0 -0
- arekit/contrib/source/brat/__init__.py +0 -0
- arekit/contrib/source/brat/annot.py +0 -84
- arekit/contrib/source/brat/doc.py +0 -28
- arekit/contrib/source/brat/entities/__init__.py +0 -0
- arekit/contrib/source/brat/entities/compound.py +0 -13
- arekit/contrib/source/brat/entities/entity.py +0 -42
- arekit/contrib/source/brat/entities/parser.py +0 -53
- arekit/contrib/source/brat/opinions/__init__.py +0 -0
- arekit/contrib/source/brat/opinions/converter.py +0 -19
- arekit/contrib/source/brat/relation.py +0 -32
- arekit/contrib/source/brat/sentence.py +0 -69
- arekit/contrib/source/brat/sentences_reader.py +0 -128
- arekit/contrib/source/download.py +0 -41
- arekit/contrib/source/nerel/__init__.py +0 -0
- arekit/contrib/source/nerel/entities.py +0 -55
- arekit/contrib/source/nerel/folding/__init__.py +0 -0
- arekit/contrib/source/nerel/folding/fixed.py +0 -74
- arekit/contrib/source/nerel/io_utils.py +0 -62
- arekit/contrib/source/nerel/labels.py +0 -241
- arekit/contrib/source/nerel/reader.py +0 -46
- arekit/contrib/source/nerel/utils.py +0 -24
- arekit/contrib/source/nerel/versions.py +0 -12
- arekit/contrib/source/nerelbio/__init__.py +0 -0
- arekit/contrib/source/nerelbio/io_utils.py +0 -62
- arekit/contrib/source/nerelbio/labels.py +0 -265
- arekit/contrib/source/nerelbio/reader.py +0 -8
- arekit/contrib/source/nerelbio/versions.py +0 -8
- arekit/contrib/source/ruattitudes/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/collection.py +0 -36
- arekit/contrib/source/ruattitudes/doc.py +0 -51
- arekit/contrib/source/ruattitudes/doc_brat.py +0 -44
- arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/entity/parser.py +0 -7
- arekit/contrib/source/ruattitudes/io_utils.py +0 -56
- arekit/contrib/source/ruattitudes/labels_fmt.py +0 -12
- arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/opinions/base.py +0 -28
- arekit/contrib/source/ruattitudes/opinions/converter.py +0 -37
- arekit/contrib/source/ruattitudes/reader.py +0 -268
- arekit/contrib/source/ruattitudes/sentence.py +0 -73
- arekit/contrib/source/ruattitudes/synonyms.py +0 -17
- arekit/contrib/source/ruattitudes/text_object.py +0 -59
- arekit/contrib/source/rusentiframes/__init__.py +0 -0
- arekit/contrib/source/rusentiframes/collection.py +0 -157
- arekit/contrib/source/rusentiframes/effect.py +0 -24
- arekit/contrib/source/rusentiframes/io_utils.py +0 -19
- arekit/contrib/source/rusentiframes/labels_fmt.py +0 -22
- arekit/contrib/source/rusentiframes/polarity.py +0 -35
- arekit/contrib/source/rusentiframes/role.py +0 -15
- arekit/contrib/source/rusentiframes/state.py +0 -24
- arekit/contrib/source/rusentiframes/types.py +0 -42
- arekit/contrib/source/rusentiframes/value.py +0 -2
- arekit/contrib/source/rusentrel/__init__.py +0 -0
- arekit/contrib/source/rusentrel/const.py +0 -3
- arekit/contrib/source/rusentrel/docs_reader.py +0 -51
- arekit/contrib/source/rusentrel/entities.py +0 -26
- arekit/contrib/source/rusentrel/io_utils.py +0 -125
- arekit/contrib/source/rusentrel/labels_fmt.py +0 -12
- arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
- arekit/contrib/source/rusentrel/opinions/collection.py +0 -30
- arekit/contrib/source/rusentrel/opinions/converter.py +0 -40
- arekit/contrib/source/rusentrel/opinions/provider.py +0 -54
- arekit/contrib/source/rusentrel/opinions/writer.py +0 -42
- arekit/contrib/source/rusentrel/synonyms.py +0 -17
- arekit/contrib/source/sentinerel/__init__.py +0 -0
- arekit/contrib/source/sentinerel/entities.py +0 -52
- arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
- arekit/contrib/source/sentinerel/folding/factory.py +0 -31
- arekit/contrib/source/sentinerel/folding/fixed.py +0 -70
- arekit/contrib/source/sentinerel/io_utils.py +0 -87
- arekit/contrib/source/sentinerel/labels.py +0 -53
- arekit/contrib/source/sentinerel/labels_scaler.py +0 -30
- arekit/contrib/source/sentinerel/reader.py +0 -42
- arekit/contrib/source/synonyms/__init__.py +0 -0
- arekit/contrib/source/synonyms/utils.py +0 -19
- arekit/contrib/source/zip_utils.py +0 -47
- arekit/contrib/utils/connotations/__init__.py +0 -0
- arekit/contrib/utils/connotations/rusentiframes_sentiment.py +0 -23
- arekit/contrib/utils/data/readers/__init__.py +0 -0
- arekit/contrib/utils/data/readers/base.py +0 -7
- arekit/contrib/utils/data/readers/csv_pd.py +0 -38
- arekit/contrib/utils/data/readers/jsonl.py +0 -15
- arekit/contrib/utils/data/service/__init__.py +0 -0
- arekit/contrib/utils/data/service/balance.py +0 -50
- arekit/contrib/utils/data/writers/csv_native.py +0 -63
- arekit/contrib/utils/data/writers/csv_pd.py +0 -40
- arekit/contrib/utils/data/writers/json_opennre.py +0 -132
- arekit/contrib/utils/data/writers/sqlite_native.py +0 -110
- arekit/contrib/utils/download.py +0 -77
- arekit/contrib/utils/embeddings/__init__.py +0 -0
- arekit/contrib/utils/embeddings/rusvectores.py +0 -58
- arekit/contrib/utils/embeddings/tokens.py +0 -30
- arekit/contrib/utils/io_utils/embedding.py +0 -72
- arekit/contrib/utils/io_utils/opinions.py +0 -37
- arekit/contrib/utils/io_utils/samples.py +0 -79
- arekit/contrib/utils/lexicons/__init__.py +0 -0
- arekit/contrib/utils/lexicons/lexicon.py +0 -41
- arekit/contrib/utils/lexicons/relation.py +0 -42
- arekit/contrib/utils/lexicons/rusentilex.py +0 -37
- arekit/contrib/utils/nn/__init__.py +0 -0
- arekit/contrib/utils/nn/rows.py +0 -83
- arekit/contrib/utils/np_utils/__init__.py +0 -0
- arekit/contrib/utils/np_utils/embedding.py +0 -22
- arekit/contrib/utils/np_utils/npz_utils.py +0 -13
- arekit/contrib/utils/np_utils/vocab.py +0 -20
- arekit/contrib/utils/pipelines/items/sampling/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/sampling/base.py +0 -99
- arekit/contrib/utils/pipelines/items/sampling/networks.py +0 -54
- arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +0 -36
- arekit/contrib/utils/pipelines/items/text/frames_negation.py +0 -32
- arekit/contrib/utils/pipelines/items/text/terms_splitter.py +0 -10
- arekit/contrib/utils/pipelines/items/text/tokenizer.py +0 -107
- arekit/contrib/utils/pipelines/items/text/translator.py +0 -135
- arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py +0 -27
- arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +0 -65
- arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +0 -60
- arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py +0 -29
- arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +0 -64
- arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +0 -79
- arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py +0 -56
- arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +0 -20
- arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +0 -65
- arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py +0 -21
- arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +0 -107
- arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py +0 -29
- arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +0 -62
- arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +0 -180
- arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +0 -50
- arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +0 -88
- arekit/contrib/utils/processing/languages/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/mods.py +0 -12
- arekit/contrib/utils/processing/languages/pos.py +0 -23
- arekit/contrib/utils/processing/languages/ru/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/ru/cases.py +0 -78
- arekit/contrib/utils/processing/languages/ru/constants.py +0 -6
- arekit/contrib/utils/processing/languages/ru/mods.py +0 -13
- arekit/contrib/utils/processing/languages/ru/number.py +0 -23
- arekit/contrib/utils/processing/languages/ru/pos_service.py +0 -36
- arekit/contrib/utils/processing/lemmatization/__init__.py +0 -0
- arekit/contrib/utils/processing/lemmatization/mystem.py +0 -51
- arekit/contrib/utils/processing/pos/__init__.py +0 -0
- arekit/contrib/utils/processing/pos/base.py +0 -12
- arekit/contrib/utils/processing/pos/mystem_wrap.py +0 -134
- arekit/contrib/utils/processing/pos/russian.py +0 -10
- arekit/contrib/utils/processing/text/__init__.py +0 -0
- arekit/contrib/utils/processing/text/tokens.py +0 -127
- arekit/contrib/utils/resources.py +0 -25
- arekit/contrib/utils/serializer.py +0 -43
- arekit/contrib/utils/sources/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +0 -63
- arekit/contrib/utils/vectorizers/__init__.py +0 -0
- arekit/contrib/utils/vectorizers/bpe.py +0 -93
- arekit/contrib/utils/vectorizers/random_norm.py +0 -39
- arekit/download_data.py +0 -11
- arekit-0.24.0.dist-info/METADATA +0 -23
- arekit-0.24.0.dist-info/RECORD +0 -374
- {arekit-0.24.0.dist-info → arekit-0.25.1.dist-info}/LICENSE +0 -0
- {arekit-0.24.0.dist-info → arekit-0.25.1.dist-info}/top_level.txt +0 -0
|
File without changes
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
from arekit.contrib.source.ruattitudes.io_utils import RuAttitudesVersions, RuAttitudesIOUtils
|
|
2
|
-
from arekit.contrib.source.ruattitudes.reader import RuAttitudesFormatReader
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class RuAttitudesCollection(object):
|
|
6
|
-
|
|
7
|
-
@staticmethod
|
|
8
|
-
def __get_reading_handler(input_file, read_inds_only, get_doc_inds_func):
|
|
9
|
-
assert(isinstance(read_inds_only, bool))
|
|
10
|
-
|
|
11
|
-
if read_inds_only:
|
|
12
|
-
return RuAttitudesFormatReader.iter_docs_inds(input_file=input_file,
|
|
13
|
-
get_doc_index_func=get_doc_inds_func)
|
|
14
|
-
else:
|
|
15
|
-
return RuAttitudesFormatReader.iter_docs(input_file=input_file,
|
|
16
|
-
get_doc_index_func=get_doc_inds_func)
|
|
17
|
-
|
|
18
|
-
@staticmethod
|
|
19
|
-
def iter_docs(version, get_doc_index_func, return_inds_only):
|
|
20
|
-
"""
|
|
21
|
-
RuAttitudes collection reader from zip archive
|
|
22
|
-
"""
|
|
23
|
-
assert(isinstance(version, RuAttitudesVersions))
|
|
24
|
-
assert(callable(get_doc_index_func))
|
|
25
|
-
assert(isinstance(return_inds_only, bool))
|
|
26
|
-
|
|
27
|
-
it = RuAttitudesIOUtils.iter_from_zip(
|
|
28
|
-
inner_path=RuAttitudesIOUtils.get_collection_filepath(),
|
|
29
|
-
process_func=lambda input_filepath: RuAttitudesCollection.__get_reading_handler(
|
|
30
|
-
input_file=input_filepath,
|
|
31
|
-
read_inds_only=return_inds_only,
|
|
32
|
-
get_doc_inds_func=get_doc_index_func),
|
|
33
|
-
version=version)
|
|
34
|
-
|
|
35
|
-
for doc in it:
|
|
36
|
-
yield doc
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
from arekit.contrib.source.ruattitudes.sentence import RuAttitudesSentence
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class RuAttitudesDocument(object):
|
|
5
|
-
|
|
6
|
-
def __init__(self, sentences, doc_index):
|
|
7
|
-
assert(len(sentences) > 0)
|
|
8
|
-
|
|
9
|
-
self.__sentences = sentences
|
|
10
|
-
self.__objects_before_sentence = self.__cache_objects_declared_before()
|
|
11
|
-
self.__doc_index = doc_index
|
|
12
|
-
|
|
13
|
-
self.__set_owners()
|
|
14
|
-
|
|
15
|
-
# region properties
|
|
16
|
-
|
|
17
|
-
@property
|
|
18
|
-
def ID(self):
|
|
19
|
-
return self.__doc_index
|
|
20
|
-
|
|
21
|
-
@property
|
|
22
|
-
def Title(self):
|
|
23
|
-
return self.__sentences[0]
|
|
24
|
-
|
|
25
|
-
# endregion
|
|
26
|
-
|
|
27
|
-
# region private methods
|
|
28
|
-
|
|
29
|
-
def __set_owners(self):
|
|
30
|
-
for sentence in self.__sentences:
|
|
31
|
-
assert(isinstance(sentence, RuAttitudesSentence))
|
|
32
|
-
sentence.set_owner(self)
|
|
33
|
-
|
|
34
|
-
def __cache_objects_declared_before(self):
|
|
35
|
-
d = {}
|
|
36
|
-
before = 0
|
|
37
|
-
for s in self.__sentences:
|
|
38
|
-
assert(isinstance(s, RuAttitudesSentence))
|
|
39
|
-
d[s.SentenceIndex] = before
|
|
40
|
-
before += s.ObjectsCount
|
|
41
|
-
|
|
42
|
-
return d
|
|
43
|
-
|
|
44
|
-
# endregion
|
|
45
|
-
|
|
46
|
-
def get_objects_declared_before(self, sentence_index):
|
|
47
|
-
return self.__objects_before_sentence[sentence_index]
|
|
48
|
-
|
|
49
|
-
def iter_sentences(self):
|
|
50
|
-
for sentence in self.__sentences:
|
|
51
|
-
yield sentence
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
from arekit.contrib.source.brat.doc import BratDocument
|
|
2
|
-
from arekit.contrib.source.brat.sentence import BratSentence
|
|
3
|
-
from arekit.contrib.source.ruattitudes.doc import RuAttitudesDocument
|
|
4
|
-
from arekit.contrib.source.ruattitudes.opinions.base import SentenceOpinion
|
|
5
|
-
from arekit.contrib.source.ruattitudes.opinions.converter import RuAttitudesSentenceOpinionConverter
|
|
6
|
-
from arekit.contrib.source.ruattitudes.sentence import RuAttitudesSentence
|
|
7
|
-
from arekit.common.utils import split_by_whitespaces
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class RuAttitudesDocumentsConverter(object):
|
|
11
|
-
""" Performs conversion to a brat-based representation.
|
|
12
|
-
The latter allows then allows to adopt pipelines for TextOpnion extraction.
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
@staticmethod
|
|
16
|
-
def to_brat_doc(doc):
|
|
17
|
-
assert(isinstance(doc, RuAttitudesDocument))
|
|
18
|
-
text_opinions = RuAttitudesDocumentsConverter.__iter_text_opinions(doc=doc)
|
|
19
|
-
brat_sentences = RuAttitudesDocumentsConverter.__to_brat_sentences(doc.iter_sentences())
|
|
20
|
-
return BratDocument(doc_id=doc.ID,
|
|
21
|
-
sentences=brat_sentences,
|
|
22
|
-
text_relations=list(text_opinions))
|
|
23
|
-
|
|
24
|
-
@staticmethod
|
|
25
|
-
def __to_brat_sentences(sentences_iter):
|
|
26
|
-
sentences = []
|
|
27
|
-
for s in sentences_iter:
|
|
28
|
-
assert(isinstance(s, RuAttitudesSentence))
|
|
29
|
-
assert(s.Owner is not None)
|
|
30
|
-
brat_entities = [obj.to_entity(s.get_doc_level_text_object_id) for obj in s.iter_objects()]
|
|
31
|
-
brat_sentence = BratSentence(text=split_by_whitespaces(s.Text), index_begin=0, entities=brat_entities)
|
|
32
|
-
sentences.append(brat_sentence)
|
|
33
|
-
return sentences
|
|
34
|
-
|
|
35
|
-
@staticmethod
|
|
36
|
-
def __iter_text_opinions(doc):
|
|
37
|
-
assert(isinstance(doc, RuAttitudesDocument))
|
|
38
|
-
for sentence in doc.iter_sentences():
|
|
39
|
-
assert(isinstance(sentence, RuAttitudesSentence))
|
|
40
|
-
for sentence_opinion in sentence.iter_sentence_opins():
|
|
41
|
-
assert(isinstance(sentence_opinion, SentenceOpinion))
|
|
42
|
-
yield RuAttitudesSentenceOpinionConverter.to_brat_relation(
|
|
43
|
-
sentence_opinion=sentence_opinion,
|
|
44
|
-
end_to_doc_id_func=sentence.get_doc_level_text_object_id)
|
|
File without changes
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
from os import path
|
|
2
|
-
|
|
3
|
-
from enum import Enum
|
|
4
|
-
|
|
5
|
-
from arekit.contrib.source.zip_utils import ZipArchiveUtils
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class RuAttitudesVersions(Enum):
|
|
9
|
-
Debug = "dbg"
|
|
10
|
-
V10 = "v1_0"
|
|
11
|
-
V11 = "v1_1"
|
|
12
|
-
V20Base = 'v2_0_base'
|
|
13
|
-
V20Large = 'v2_0_large'
|
|
14
|
-
V20BaseNeut = 'v2_0_base_neut'
|
|
15
|
-
V20LargeNeut = 'v2_0_large_neut'
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class RuAttitudesVersionsService:
|
|
19
|
-
|
|
20
|
-
@staticmethod
|
|
21
|
-
def __iter_type_and_names():
|
|
22
|
-
for version_type in RuAttitudesVersions:
|
|
23
|
-
yield version_type, version_type.value
|
|
24
|
-
|
|
25
|
-
@staticmethod
|
|
26
|
-
def find_by_name(name):
|
|
27
|
-
for version_type, related_name in RuAttitudesVersionsService.__iter_type_and_names():
|
|
28
|
-
if name == related_name:
|
|
29
|
-
return version_type
|
|
30
|
-
raise Exception("Version `{}` does not supported".format(name))
|
|
31
|
-
|
|
32
|
-
@staticmethod
|
|
33
|
-
def iter_supported_names():
|
|
34
|
-
for _, name in RuAttitudesVersionsService.__iter_type_and_names():
|
|
35
|
-
yield name
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class RuAttitudesIOUtils(ZipArchiveUtils):
|
|
39
|
-
|
|
40
|
-
# region internal methods
|
|
41
|
-
|
|
42
|
-
@staticmethod
|
|
43
|
-
def get_archive_filepath(version):
|
|
44
|
-
assert(isinstance(version, str))
|
|
45
|
-
return path.join(RuAttitudesIOUtils.get_data_root(),
|
|
46
|
-
"ruattitudes-{version}.zip".format(version=version))
|
|
47
|
-
|
|
48
|
-
@staticmethod
|
|
49
|
-
def get_collection_filepath():
|
|
50
|
-
return "collection.txt"
|
|
51
|
-
|
|
52
|
-
@classmethod
|
|
53
|
-
def get_synonyms_innerpath(cls):
|
|
54
|
-
return "synonyms.txt"
|
|
55
|
-
|
|
56
|
-
# endregion
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
from arekit.common.labels.scaler.base import BaseLabelScaler
|
|
2
|
-
from arekit.common.labels.str_fmt import StringLabelsFormatter
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class RuAttitudesLabelFormatter(StringLabelsFormatter):
|
|
6
|
-
|
|
7
|
-
def __init__(self, label_scaler):
|
|
8
|
-
assert(isinstance(label_scaler, BaseLabelScaler))
|
|
9
|
-
stol = {}
|
|
10
|
-
for int_label in [-1, 0, 1]:
|
|
11
|
-
stol[str(int_label)] = type(label_scaler.int_to_label(int_label))
|
|
12
|
-
super(RuAttitudesLabelFormatter, self).__init__(stol=stol)
|
|
File without changes
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
class SentenceOpinion(object):
|
|
2
|
-
""" Provides an opinion within a sentence.
|
|
3
|
-
Specific for RuAttitudes collection, as the latter provides
|
|
4
|
-
connections within a sentence.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
def __init__(self, source_id, target_id, label_int, tag):
|
|
8
|
-
assert(isinstance(label_int, int))
|
|
9
|
-
self.__label_int = label_int
|
|
10
|
-
self.__source_id = source_id
|
|
11
|
-
self.__target_id = target_id
|
|
12
|
-
self.__tag = tag
|
|
13
|
-
|
|
14
|
-
@property
|
|
15
|
-
def SourceID(self):
|
|
16
|
-
return self.__source_id
|
|
17
|
-
|
|
18
|
-
@property
|
|
19
|
-
def TargetID(self):
|
|
20
|
-
return self.__target_id
|
|
21
|
-
|
|
22
|
-
@property
|
|
23
|
-
def Label(self):
|
|
24
|
-
return self.__label_int
|
|
25
|
-
|
|
26
|
-
@property
|
|
27
|
-
def Tag(self):
|
|
28
|
-
return self.__tag
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
from arekit.common.labels.scaler.base import BaseLabelScaler
|
|
2
|
-
from arekit.common.opinions.base import Opinion
|
|
3
|
-
from arekit.contrib.source.brat.relation import BratRelation
|
|
4
|
-
from arekit.contrib.source.ruattitudes.opinions.base import SentenceOpinion
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class RuAttitudesSentenceOpinionConverter:
|
|
8
|
-
|
|
9
|
-
@staticmethod
|
|
10
|
-
def to_brat_relation(sentence_opinion, end_to_doc_id_func):
|
|
11
|
-
""" Converts opinion into brat-related relation.
|
|
12
|
-
NOTE: for rel_type we just call str() over int-based value.
|
|
13
|
-
"""
|
|
14
|
-
assert(isinstance(sentence_opinion, SentenceOpinion))
|
|
15
|
-
return BratRelation(id_in_doc="0",
|
|
16
|
-
source_id=end_to_doc_id_func(sentence_opinion.SourceID),
|
|
17
|
-
target_id=end_to_doc_id_func(sentence_opinion.TargetID),
|
|
18
|
-
rel_type=str(sentence_opinion.Label))
|
|
19
|
-
|
|
20
|
-
@staticmethod
|
|
21
|
-
def to_opinion(sentence_opinion, source_value, target_value, label_scaler):
|
|
22
|
-
"""
|
|
23
|
-
Converts onto document, non referenced opinion
|
|
24
|
-
(non bounded to the text).
|
|
25
|
-
"""
|
|
26
|
-
assert(isinstance(sentence_opinion, SentenceOpinion))
|
|
27
|
-
assert(isinstance(label_scaler, BaseLabelScaler))
|
|
28
|
-
|
|
29
|
-
opinion = Opinion(source_value=source_value,
|
|
30
|
-
target_value=target_value,
|
|
31
|
-
label=label_scaler.int_to_label(sentence_opinion.Label))
|
|
32
|
-
|
|
33
|
-
# Using this tag allows to perform a revert operation,
|
|
34
|
-
# i.e. to find opinion_ref by opinion.
|
|
35
|
-
opinion.set_tag(sentence_opinion.Tag)
|
|
36
|
-
|
|
37
|
-
return opinion
|
|
@@ -1,268 +0,0 @@
|
|
|
1
|
-
from arekit.common.utils import split_by_whitespaces
|
|
2
|
-
from arekit.contrib.source.ruattitudes.doc import RuAttitudesDocument
|
|
3
|
-
from arekit.contrib.source.ruattitudes.opinions.base import SentenceOpinion
|
|
4
|
-
from arekit.contrib.source.ruattitudes.sentence import RuAttitudesSentence
|
|
5
|
-
from arekit.contrib.source.ruattitudes.text_object import TextObject
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class RuAttitudesFormatReader(object):
|
|
9
|
-
|
|
10
|
-
DOC_SEP_KEY = '--------'
|
|
11
|
-
FILE_KEY = "File:"
|
|
12
|
-
OBJ_KEY = "Object:"
|
|
13
|
-
TITLE_KEY = "Title:"
|
|
14
|
-
SINDEX_KEY = "Sentence:"
|
|
15
|
-
OPINION_KEY = "Attitude:"
|
|
16
|
-
STEXT_KEY = "Text:"
|
|
17
|
-
TERMS_IN_TITLE = "TermsInTitle:"
|
|
18
|
-
TERMS_IN_TEXT = "TermsInText:"
|
|
19
|
-
FRAMEVAR_TITLE = "FrameVariant:"
|
|
20
|
-
|
|
21
|
-
AUTH_LABEL = '<AUTH>'
|
|
22
|
-
|
|
23
|
-
def __iter__(self):
|
|
24
|
-
pass
|
|
25
|
-
|
|
26
|
-
# region private methods
|
|
27
|
-
|
|
28
|
-
@staticmethod
|
|
29
|
-
def iter_docs_inds(input_file, get_doc_index_func):
|
|
30
|
-
assert(callable(get_doc_index_func))
|
|
31
|
-
|
|
32
|
-
title = None
|
|
33
|
-
local_doc_ind = 0
|
|
34
|
-
has_sentences = False
|
|
35
|
-
|
|
36
|
-
for line in RuAttitudesFormatReader.__iter_lines(input_file):
|
|
37
|
-
|
|
38
|
-
if RuAttitudesFormatReader.__check_is_title(line):
|
|
39
|
-
# We use a placeholder, there is no need in actual value out there.
|
|
40
|
-
title = "title"
|
|
41
|
-
has_sentences = True
|
|
42
|
-
|
|
43
|
-
if RuAttitudesFormatReader.__check_is_doc_sep(line=line, title=title):
|
|
44
|
-
yield RuAttitudesFormatReader.__assign_doc_index(doc_index_func=get_doc_index_func,
|
|
45
|
-
local_index=local_doc_ind)
|
|
46
|
-
local_doc_ind += 1
|
|
47
|
-
title = None
|
|
48
|
-
|
|
49
|
-
if has_sentences:
|
|
50
|
-
yield RuAttitudesFormatReader.__assign_doc_index(doc_index_func=get_doc_index_func,
|
|
51
|
-
local_index=local_doc_ind)
|
|
52
|
-
|
|
53
|
-
@staticmethod
|
|
54
|
-
def iter_docs(input_file, get_doc_index_func):
|
|
55
|
-
assert(callable(get_doc_index_func))
|
|
56
|
-
|
|
57
|
-
reset = False
|
|
58
|
-
title = None
|
|
59
|
-
title_terms_count = None
|
|
60
|
-
text_terms_count = None
|
|
61
|
-
sentences = []
|
|
62
|
-
opinions_list = []
|
|
63
|
-
objects_list = []
|
|
64
|
-
s_index = 0
|
|
65
|
-
objects_in_prior_sentences_count = 0
|
|
66
|
-
local_doc_ind = 0
|
|
67
|
-
|
|
68
|
-
for line in RuAttitudesFormatReader.__iter_lines(input_file):
|
|
69
|
-
|
|
70
|
-
if RuAttitudesFormatReader.FILE_KEY in line:
|
|
71
|
-
pass
|
|
72
|
-
|
|
73
|
-
if RuAttitudesFormatReader.OBJ_KEY in line:
|
|
74
|
-
object = RuAttitudesFormatReader.__parse_object(line)
|
|
75
|
-
objects_list.append(object)
|
|
76
|
-
|
|
77
|
-
if RuAttitudesFormatReader.OPINION_KEY in line:
|
|
78
|
-
sentence_opin = RuAttitudesFormatReader.__parse_sentence_opin(line)
|
|
79
|
-
opinions_list.append(sentence_opin)
|
|
80
|
-
|
|
81
|
-
if RuAttitudesFormatReader.FRAMEVAR_TITLE in line:
|
|
82
|
-
# TODO. This information is ommited now.
|
|
83
|
-
pass
|
|
84
|
-
|
|
85
|
-
if RuAttitudesFormatReader.TERMS_IN_TITLE in line:
|
|
86
|
-
title_terms_count = RuAttitudesFormatReader.__parse_terms_in_title_count(line)
|
|
87
|
-
|
|
88
|
-
if RuAttitudesFormatReader.SINDEX_KEY in line:
|
|
89
|
-
s_index = RuAttitudesFormatReader.__parse_sentence_index(line)
|
|
90
|
-
|
|
91
|
-
if RuAttitudesFormatReader.__check_is_title(line):
|
|
92
|
-
title = RuAttitudesSentence(is_title=True,
|
|
93
|
-
text=RuAttitudesFormatReader.__parse_sentence(line, True),
|
|
94
|
-
sentence_opins=opinions_list,
|
|
95
|
-
objects_list=objects_list,
|
|
96
|
-
sentence_index=-1)
|
|
97
|
-
sentences.append(title)
|
|
98
|
-
t_len = RuAttitudesFormatReader.__calculate_terms_in_line(line)
|
|
99
|
-
assert(title_terms_count == t_len or title_terms_count is None)
|
|
100
|
-
reset = True
|
|
101
|
-
|
|
102
|
-
if RuAttitudesFormatReader.STEXT_KEY in line and line.index(RuAttitudesFormatReader.STEXT_KEY) == 0:
|
|
103
|
-
sentence = RuAttitudesSentence(is_title=False,
|
|
104
|
-
text=RuAttitudesFormatReader.__parse_sentence(line, False),
|
|
105
|
-
sentence_opins=opinions_list,
|
|
106
|
-
objects_list=objects_list,
|
|
107
|
-
sentence_index=s_index)
|
|
108
|
-
sentences.append(sentence)
|
|
109
|
-
objects_in_prior_sentences_count += len(objects_list)
|
|
110
|
-
t_len = RuAttitudesFormatReader.__calculate_terms_in_line(line)
|
|
111
|
-
assert(text_terms_count == t_len or text_terms_count is None)
|
|
112
|
-
reset = True
|
|
113
|
-
|
|
114
|
-
if RuAttitudesFormatReader.__check_is_doc_sep(line=line, title=title):
|
|
115
|
-
doc_index = RuAttitudesFormatReader.__assign_doc_index(doc_index_func=get_doc_index_func,
|
|
116
|
-
local_index=local_doc_ind)
|
|
117
|
-
yield RuAttitudesDocument(sentences=sentences,
|
|
118
|
-
doc_index=doc_index)
|
|
119
|
-
local_doc_ind += 1
|
|
120
|
-
sentences = []
|
|
121
|
-
reset = True
|
|
122
|
-
|
|
123
|
-
if RuAttitudesFormatReader.TERMS_IN_TEXT in line:
|
|
124
|
-
text_terms_count = RuAttitudesFormatReader.__parse_terms_in_text_count(line)
|
|
125
|
-
|
|
126
|
-
if reset:
|
|
127
|
-
opinions_list = []
|
|
128
|
-
objects_list = []
|
|
129
|
-
title_terms_count = None
|
|
130
|
-
reset = False
|
|
131
|
-
|
|
132
|
-
if len(sentences) > 0:
|
|
133
|
-
doc_index = RuAttitudesFormatReader.__assign_doc_index(doc_index_func=get_doc_index_func,
|
|
134
|
-
local_index=local_doc_ind)
|
|
135
|
-
yield RuAttitudesDocument(sentences=sentences,
|
|
136
|
-
doc_index=doc_index)
|
|
137
|
-
sentences = []
|
|
138
|
-
|
|
139
|
-
assert(len(sentences) == 0)
|
|
140
|
-
|
|
141
|
-
@staticmethod
|
|
142
|
-
def __assign_doc_index(doc_index_func, local_index):
|
|
143
|
-
assert(callable(doc_index_func))
|
|
144
|
-
return doc_index_func(local_index)
|
|
145
|
-
|
|
146
|
-
@staticmethod
|
|
147
|
-
def __check_is_doc_sep(line, title):
|
|
148
|
-
return RuAttitudesFormatReader.DOC_SEP_KEY in line and title is not None
|
|
149
|
-
|
|
150
|
-
@staticmethod
|
|
151
|
-
def __check_is_title(line):
|
|
152
|
-
return RuAttitudesFormatReader.TITLE_KEY in line
|
|
153
|
-
|
|
154
|
-
@staticmethod
|
|
155
|
-
def __iter_lines(input_file):
|
|
156
|
-
for line in input_file.readlines():
|
|
157
|
-
yield line.decode('utf-8')
|
|
158
|
-
|
|
159
|
-
@staticmethod
|
|
160
|
-
def __calculate_terms_in_line(line):
|
|
161
|
-
assert(isinstance(line, str))
|
|
162
|
-
return len(split_by_whitespaces(line))
|
|
163
|
-
|
|
164
|
-
@staticmethod
|
|
165
|
-
def __parse_sentence(line, is_title):
|
|
166
|
-
assert(isinstance(is_title, bool))
|
|
167
|
-
|
|
168
|
-
key = RuAttitudesFormatReader.STEXT_KEY if not is_title else RuAttitudesFormatReader.TITLE_KEY
|
|
169
|
-
text = line[len(key):]
|
|
170
|
-
return text.strip()
|
|
171
|
-
|
|
172
|
-
@staticmethod
|
|
173
|
-
def __parse_sentence_opin(line):
|
|
174
|
-
line = line[len(RuAttitudesFormatReader.OPINION_KEY):]
|
|
175
|
-
|
|
176
|
-
s_from = line.index('b:(')
|
|
177
|
-
s_to = line.index(')', s_from)
|
|
178
|
-
label = int(line[s_from + 3:s_to])
|
|
179
|
-
|
|
180
|
-
o_from = line.index('oi:[')
|
|
181
|
-
o_to = line.index(']', o_from)
|
|
182
|
-
source_object_id_in_sentence, target_object_id_in_sentence = line[o_from + 4:o_to].split(',')
|
|
183
|
-
|
|
184
|
-
source_object_id_in_sentence = int(source_object_id_in_sentence)
|
|
185
|
-
target_object_id_in_sentence = int(target_object_id_in_sentence)
|
|
186
|
-
|
|
187
|
-
s_from = line.index('si:{')
|
|
188
|
-
s_to = line.index('}', s_from)
|
|
189
|
-
opninion_key = line[s_from+4:s_to]
|
|
190
|
-
|
|
191
|
-
sentence_opin = SentenceOpinion(source_id=source_object_id_in_sentence,
|
|
192
|
-
target_id=target_object_id_in_sentence,
|
|
193
|
-
label_int=label,
|
|
194
|
-
tag=opninion_key)
|
|
195
|
-
|
|
196
|
-
return sentence_opin
|
|
197
|
-
|
|
198
|
-
@staticmethod
|
|
199
|
-
def __parse_object(line):
|
|
200
|
-
assert(isinstance(line, str))
|
|
201
|
-
|
|
202
|
-
line = line[len(RuAttitudesFormatReader.OBJ_KEY):]
|
|
203
|
-
|
|
204
|
-
obj_ind_begin = line.index('oi:[', 0)
|
|
205
|
-
obj_ind_end = line.index(']', obj_ind_begin + 1)
|
|
206
|
-
|
|
207
|
-
o_begin = line.index("'", 0)
|
|
208
|
-
o_end = line.index("'", o_begin + 1)
|
|
209
|
-
|
|
210
|
-
b_from = line.index('b:(')
|
|
211
|
-
b_to = line.index(')', b_from)
|
|
212
|
-
|
|
213
|
-
id_in_sentence = int(line[obj_ind_begin + 4:obj_ind_end])
|
|
214
|
-
term_index, length = line[b_from+3:b_to].split(',')
|
|
215
|
-
value = line[o_begin + 1:o_end]
|
|
216
|
-
|
|
217
|
-
obj_type = RuAttitudesFormatReader.__try_get_type(line)
|
|
218
|
-
|
|
219
|
-
sg_from = line.index('si:{')
|
|
220
|
-
sg_to = line.index('}', sg_from)
|
|
221
|
-
group_index = int(line[sg_from+4:sg_to])
|
|
222
|
-
|
|
223
|
-
is_auth = '<AUTH>' in line
|
|
224
|
-
|
|
225
|
-
text_object = TextObject(id_in_sentence=id_in_sentence,
|
|
226
|
-
value=value,
|
|
227
|
-
obj_type=obj_type,
|
|
228
|
-
position=int(term_index),
|
|
229
|
-
terms_count=int(length),
|
|
230
|
-
syn_group_index=group_index,
|
|
231
|
-
is_auth=is_auth)
|
|
232
|
-
|
|
233
|
-
return text_object
|
|
234
|
-
|
|
235
|
-
@staticmethod
|
|
236
|
-
def __parse_terms_in_title_count(line):
|
|
237
|
-
line = line[len(RuAttitudesFormatReader.TERMS_IN_TITLE):]
|
|
238
|
-
return int(line)
|
|
239
|
-
|
|
240
|
-
@staticmethod
|
|
241
|
-
def __parse_terms_in_text_count(line):
|
|
242
|
-
line = line[len(RuAttitudesFormatReader.TERMS_IN_TEXT):]
|
|
243
|
-
return int(line)
|
|
244
|
-
|
|
245
|
-
@staticmethod
|
|
246
|
-
def __parse_sentence_index(line):
|
|
247
|
-
line = line[len(RuAttitudesFormatReader.SINDEX_KEY):]
|
|
248
|
-
return int(line)
|
|
249
|
-
|
|
250
|
-
@staticmethod
|
|
251
|
-
def __try_get_type(line):
|
|
252
|
-
|
|
253
|
-
# Tag, utilized in RuAttitudes-2.0 format.
|
|
254
|
-
template = 'type:'
|
|
255
|
-
if template in line:
|
|
256
|
-
is_auth = RuAttitudesFormatReader.AUTH_LABEL in line
|
|
257
|
-
t_from = line.index(template)
|
|
258
|
-
t_to = line.index(RuAttitudesFormatReader.AUTH_LABEL[0], t_from) if is_auth else len(line)
|
|
259
|
-
return line[t_from + len(template):t_to].strip()
|
|
260
|
-
|
|
261
|
-
# Tag, utilized in RuAttitudes-1.* format.
|
|
262
|
-
template = 't:['
|
|
263
|
-
if template in line:
|
|
264
|
-
t_from = line.index(template)
|
|
265
|
-
t_to = line.index(']', t_from)
|
|
266
|
-
return line[t_from + len(template):t_to].strip()
|
|
267
|
-
|
|
268
|
-
# endregion
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
from arekit.common.docs.sentence import BaseDocumentSentence
|
|
2
|
-
from arekit.contrib.source.ruattitudes.opinions.base import SentenceOpinion
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class RuAttitudesSentence(BaseDocumentSentence):
|
|
6
|
-
|
|
7
|
-
def __init__(self, is_title, text, sentence_opins, objects_list, sentence_index):
|
|
8
|
-
assert(isinstance(is_title, bool))
|
|
9
|
-
assert(isinstance(sentence_opins, list))
|
|
10
|
-
assert(isinstance(objects_list, list))
|
|
11
|
-
assert(isinstance(sentence_index, int))
|
|
12
|
-
super(RuAttitudesSentence, self).__init__(text)
|
|
13
|
-
|
|
14
|
-
self.__is_title = is_title
|
|
15
|
-
self.__sentence_opins = sentence_opins
|
|
16
|
-
self.__objects = objects_list
|
|
17
|
-
self.__sentence_index = sentence_index
|
|
18
|
-
self.__owner = None
|
|
19
|
-
|
|
20
|
-
# region properties
|
|
21
|
-
|
|
22
|
-
@property
|
|
23
|
-
def SentenceIndex(self):
|
|
24
|
-
return self.__sentence_index
|
|
25
|
-
|
|
26
|
-
@property
|
|
27
|
-
def IsTitle(self):
|
|
28
|
-
return self.__is_title
|
|
29
|
-
|
|
30
|
-
@property
|
|
31
|
-
def Owner(self):
|
|
32
|
-
return self.__owner
|
|
33
|
-
|
|
34
|
-
@property
|
|
35
|
-
def ObjectsCount(self):
|
|
36
|
-
return len(self.__objects)
|
|
37
|
-
|
|
38
|
-
# endregion
|
|
39
|
-
|
|
40
|
-
# region public methods
|
|
41
|
-
|
|
42
|
-
def set_owner(self, owner):
|
|
43
|
-
if self.__owner is not None:
|
|
44
|
-
raise Exception("Owner is already declared")
|
|
45
|
-
self.__owner = owner
|
|
46
|
-
|
|
47
|
-
def get_objects(self, sentence_opin):
|
|
48
|
-
assert(isinstance(sentence_opin, SentenceOpinion))
|
|
49
|
-
source_obj = self.__objects[sentence_opin.SourceID]
|
|
50
|
-
target_obj = self.__objects[sentence_opin.TargetID]
|
|
51
|
-
return source_obj, target_obj
|
|
52
|
-
|
|
53
|
-
def get_doc_level_text_object_id(self, text_object_ind):
|
|
54
|
-
return text_object_ind + self.__owner.get_objects_declared_before(self.SentenceIndex)
|
|
55
|
-
|
|
56
|
-
def iter_objects(self):
|
|
57
|
-
for object in self.__objects:
|
|
58
|
-
yield object
|
|
59
|
-
|
|
60
|
-
def find_sentence_opin_by_key(self, key):
|
|
61
|
-
assert(key is not None)
|
|
62
|
-
|
|
63
|
-
for opinion in self.__sentence_opins:
|
|
64
|
-
if opinion.Tag == key:
|
|
65
|
-
return opinion
|
|
66
|
-
|
|
67
|
-
return None
|
|
68
|
-
|
|
69
|
-
def iter_sentence_opins(self):
|
|
70
|
-
for opinion in self.__sentence_opins:
|
|
71
|
-
yield opinion
|
|
72
|
-
|
|
73
|
-
# endregion
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from arekit.contrib.source.ruattitudes.io_utils import RuAttitudesIOUtils
|
|
2
|
-
from arekit.contrib.source.synonyms.utils import iter_synonym_groups
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class RuAttitudesSynonymsCollectionHelper(object):
|
|
6
|
-
|
|
7
|
-
@staticmethod
|
|
8
|
-
def iter_groups(version):
|
|
9
|
-
it = RuAttitudesIOUtils.iter_from_zip(
|
|
10
|
-
inner_path=RuAttitudesIOUtils.get_synonyms_innerpath(),
|
|
11
|
-
process_func=lambda input_file: iter_synonym_groups(
|
|
12
|
-
input_file,
|
|
13
|
-
desc="Loading RuAttitudes SynonymsCollection"),
|
|
14
|
-
version=version)
|
|
15
|
-
|
|
16
|
-
for group in it:
|
|
17
|
-
yield group
|