arekit 0.24.0__py3-none-any.whl → 0.25.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arekit/common/data/storages/base.py +4 -15
- arekit/common/docs/entities_grouping.py +2 -1
- arekit/common/docs/parser.py +27 -22
- arekit/common/pipeline/base.py +12 -16
- arekit/common/pipeline/batching.py +28 -0
- arekit/common/pipeline/context.py +5 -1
- arekit/common/pipeline/items/base.py +39 -2
- arekit/common/pipeline/items/flatten.py +5 -1
- arekit/common/pipeline/items/handle.py +2 -1
- arekit/common/pipeline/items/iter.py +2 -1
- arekit/common/pipeline/items/map.py +2 -1
- arekit/common/pipeline/items/map_nested.py +4 -0
- arekit/common/pipeline/utils.py +32 -0
- arekit/common/text/{partitioning/str.py → partitioning.py} +14 -9
- arekit/common/utils.py +11 -52
- arekit/contrib/utils/data/contents/opinions.py +13 -3
- arekit/contrib/utils/data/storages/jsonl_based.py +2 -1
- arekit/contrib/utils/data/storages/pandas_based.py +2 -17
- arekit/contrib/utils/data/storages/row_cache.py +8 -2
- arekit/contrib/utils/data/storages/sqlite_based.py +18 -0
- arekit/contrib/utils/io_utils/utils.py +1 -18
- arekit/contrib/utils/pipelines/items/text/entities_default.py +2 -2
- arekit/contrib/utils/pipelines/items/text/frames.py +2 -3
- arekit/contrib/utils/pipelines/text_opinion/extraction.py +8 -10
- arekit-0.25.1.data/data/logo.png +0 -0
- arekit-0.25.1.dist-info/METADATA +81 -0
- arekit-0.25.1.dist-info/RECORD +186 -0
- {arekit-0.24.0.dist-info → arekit-0.25.1.dist-info}/WHEEL +1 -1
- arekit/common/data/input/repositories/__init__.py +0 -0
- arekit/common/data/input/repositories/base.py +0 -68
- arekit/common/data/input/repositories/sample.py +0 -22
- arekit/common/data/views/__init__.py +0 -0
- arekit/common/data/views/samples.py +0 -26
- arekit/common/docs/objects_parser.py +0 -37
- arekit/common/text/parser.py +0 -12
- arekit/common/text/partitioning/__init__.py +0 -0
- arekit/common/text/partitioning/base.py +0 -4
- arekit/common/text/partitioning/terms.py +0 -35
- arekit/contrib/networks/__init__.py +0 -0
- arekit/contrib/networks/embedding.py +0 -149
- arekit/contrib/networks/embedding_io.py +0 -18
- arekit/contrib/networks/input/__init__.py +0 -0
- arekit/contrib/networks/input/const.py +0 -6
- arekit/contrib/networks/input/ctx_serialization.py +0 -28
- arekit/contrib/networks/input/embedding/__init__.py +0 -0
- arekit/contrib/networks/input/embedding/matrix.py +0 -29
- arekit/contrib/networks/input/embedding/offsets.py +0 -55
- arekit/contrib/networks/input/formatters/__init__.py +0 -0
- arekit/contrib/networks/input/formatters/pos_mapper.py +0 -22
- arekit/contrib/networks/input/providers/__init__.py +0 -0
- arekit/contrib/networks/input/providers/sample.py +0 -129
- arekit/contrib/networks/input/providers/term_connotation.py +0 -23
- arekit/contrib/networks/input/providers/text.py +0 -24
- arekit/contrib/networks/input/rows_parser.py +0 -47
- arekit/contrib/networks/input/term_types.py +0 -13
- arekit/contrib/networks/input/terms_mapping.py +0 -60
- arekit/contrib/networks/vectorizer.py +0 -6
- arekit/contrib/source/__init__.py +0 -0
- arekit/contrib/source/brat/__init__.py +0 -0
- arekit/contrib/source/brat/annot.py +0 -84
- arekit/contrib/source/brat/doc.py +0 -28
- arekit/contrib/source/brat/entities/__init__.py +0 -0
- arekit/contrib/source/brat/entities/compound.py +0 -13
- arekit/contrib/source/brat/entities/entity.py +0 -42
- arekit/contrib/source/brat/entities/parser.py +0 -53
- arekit/contrib/source/brat/opinions/__init__.py +0 -0
- arekit/contrib/source/brat/opinions/converter.py +0 -19
- arekit/contrib/source/brat/relation.py +0 -32
- arekit/contrib/source/brat/sentence.py +0 -69
- arekit/contrib/source/brat/sentences_reader.py +0 -128
- arekit/contrib/source/download.py +0 -41
- arekit/contrib/source/nerel/__init__.py +0 -0
- arekit/contrib/source/nerel/entities.py +0 -55
- arekit/contrib/source/nerel/folding/__init__.py +0 -0
- arekit/contrib/source/nerel/folding/fixed.py +0 -74
- arekit/contrib/source/nerel/io_utils.py +0 -62
- arekit/contrib/source/nerel/labels.py +0 -241
- arekit/contrib/source/nerel/reader.py +0 -46
- arekit/contrib/source/nerel/utils.py +0 -24
- arekit/contrib/source/nerel/versions.py +0 -12
- arekit/contrib/source/nerelbio/__init__.py +0 -0
- arekit/contrib/source/nerelbio/io_utils.py +0 -62
- arekit/contrib/source/nerelbio/labels.py +0 -265
- arekit/contrib/source/nerelbio/reader.py +0 -8
- arekit/contrib/source/nerelbio/versions.py +0 -8
- arekit/contrib/source/ruattitudes/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/collection.py +0 -36
- arekit/contrib/source/ruattitudes/doc.py +0 -51
- arekit/contrib/source/ruattitudes/doc_brat.py +0 -44
- arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/entity/parser.py +0 -7
- arekit/contrib/source/ruattitudes/io_utils.py +0 -56
- arekit/contrib/source/ruattitudes/labels_fmt.py +0 -12
- arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/opinions/base.py +0 -28
- arekit/contrib/source/ruattitudes/opinions/converter.py +0 -37
- arekit/contrib/source/ruattitudes/reader.py +0 -268
- arekit/contrib/source/ruattitudes/sentence.py +0 -73
- arekit/contrib/source/ruattitudes/synonyms.py +0 -17
- arekit/contrib/source/ruattitudes/text_object.py +0 -59
- arekit/contrib/source/rusentiframes/__init__.py +0 -0
- arekit/contrib/source/rusentiframes/collection.py +0 -157
- arekit/contrib/source/rusentiframes/effect.py +0 -24
- arekit/contrib/source/rusentiframes/io_utils.py +0 -19
- arekit/contrib/source/rusentiframes/labels_fmt.py +0 -22
- arekit/contrib/source/rusentiframes/polarity.py +0 -35
- arekit/contrib/source/rusentiframes/role.py +0 -15
- arekit/contrib/source/rusentiframes/state.py +0 -24
- arekit/contrib/source/rusentiframes/types.py +0 -42
- arekit/contrib/source/rusentiframes/value.py +0 -2
- arekit/contrib/source/rusentrel/__init__.py +0 -0
- arekit/contrib/source/rusentrel/const.py +0 -3
- arekit/contrib/source/rusentrel/docs_reader.py +0 -51
- arekit/contrib/source/rusentrel/entities.py +0 -26
- arekit/contrib/source/rusentrel/io_utils.py +0 -125
- arekit/contrib/source/rusentrel/labels_fmt.py +0 -12
- arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
- arekit/contrib/source/rusentrel/opinions/collection.py +0 -30
- arekit/contrib/source/rusentrel/opinions/converter.py +0 -40
- arekit/contrib/source/rusentrel/opinions/provider.py +0 -54
- arekit/contrib/source/rusentrel/opinions/writer.py +0 -42
- arekit/contrib/source/rusentrel/synonyms.py +0 -17
- arekit/contrib/source/sentinerel/__init__.py +0 -0
- arekit/contrib/source/sentinerel/entities.py +0 -52
- arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
- arekit/contrib/source/sentinerel/folding/factory.py +0 -31
- arekit/contrib/source/sentinerel/folding/fixed.py +0 -70
- arekit/contrib/source/sentinerel/io_utils.py +0 -87
- arekit/contrib/source/sentinerel/labels.py +0 -53
- arekit/contrib/source/sentinerel/labels_scaler.py +0 -30
- arekit/contrib/source/sentinerel/reader.py +0 -42
- arekit/contrib/source/synonyms/__init__.py +0 -0
- arekit/contrib/source/synonyms/utils.py +0 -19
- arekit/contrib/source/zip_utils.py +0 -47
- arekit/contrib/utils/connotations/__init__.py +0 -0
- arekit/contrib/utils/connotations/rusentiframes_sentiment.py +0 -23
- arekit/contrib/utils/data/readers/__init__.py +0 -0
- arekit/contrib/utils/data/readers/base.py +0 -7
- arekit/contrib/utils/data/readers/csv_pd.py +0 -38
- arekit/contrib/utils/data/readers/jsonl.py +0 -15
- arekit/contrib/utils/data/service/__init__.py +0 -0
- arekit/contrib/utils/data/service/balance.py +0 -50
- arekit/contrib/utils/data/writers/csv_native.py +0 -63
- arekit/contrib/utils/data/writers/csv_pd.py +0 -40
- arekit/contrib/utils/data/writers/json_opennre.py +0 -132
- arekit/contrib/utils/data/writers/sqlite_native.py +0 -110
- arekit/contrib/utils/download.py +0 -77
- arekit/contrib/utils/embeddings/__init__.py +0 -0
- arekit/contrib/utils/embeddings/rusvectores.py +0 -58
- arekit/contrib/utils/embeddings/tokens.py +0 -30
- arekit/contrib/utils/io_utils/embedding.py +0 -72
- arekit/contrib/utils/io_utils/opinions.py +0 -37
- arekit/contrib/utils/io_utils/samples.py +0 -79
- arekit/contrib/utils/lexicons/__init__.py +0 -0
- arekit/contrib/utils/lexicons/lexicon.py +0 -41
- arekit/contrib/utils/lexicons/relation.py +0 -42
- arekit/contrib/utils/lexicons/rusentilex.py +0 -37
- arekit/contrib/utils/nn/__init__.py +0 -0
- arekit/contrib/utils/nn/rows.py +0 -83
- arekit/contrib/utils/np_utils/__init__.py +0 -0
- arekit/contrib/utils/np_utils/embedding.py +0 -22
- arekit/contrib/utils/np_utils/npz_utils.py +0 -13
- arekit/contrib/utils/np_utils/vocab.py +0 -20
- arekit/contrib/utils/pipelines/items/sampling/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/sampling/base.py +0 -99
- arekit/contrib/utils/pipelines/items/sampling/networks.py +0 -54
- arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +0 -36
- arekit/contrib/utils/pipelines/items/text/frames_negation.py +0 -32
- arekit/contrib/utils/pipelines/items/text/terms_splitter.py +0 -10
- arekit/contrib/utils/pipelines/items/text/tokenizer.py +0 -107
- arekit/contrib/utils/pipelines/items/text/translator.py +0 -135
- arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py +0 -27
- arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +0 -65
- arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +0 -60
- arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py +0 -29
- arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +0 -64
- arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +0 -79
- arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py +0 -56
- arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +0 -20
- arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +0 -65
- arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py +0 -21
- arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +0 -107
- arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py +0 -29
- arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +0 -62
- arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +0 -180
- arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +0 -50
- arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +0 -88
- arekit/contrib/utils/processing/languages/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/mods.py +0 -12
- arekit/contrib/utils/processing/languages/pos.py +0 -23
- arekit/contrib/utils/processing/languages/ru/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/ru/cases.py +0 -78
- arekit/contrib/utils/processing/languages/ru/constants.py +0 -6
- arekit/contrib/utils/processing/languages/ru/mods.py +0 -13
- arekit/contrib/utils/processing/languages/ru/number.py +0 -23
- arekit/contrib/utils/processing/languages/ru/pos_service.py +0 -36
- arekit/contrib/utils/processing/lemmatization/__init__.py +0 -0
- arekit/contrib/utils/processing/lemmatization/mystem.py +0 -51
- arekit/contrib/utils/processing/pos/__init__.py +0 -0
- arekit/contrib/utils/processing/pos/base.py +0 -12
- arekit/contrib/utils/processing/pos/mystem_wrap.py +0 -134
- arekit/contrib/utils/processing/pos/russian.py +0 -10
- arekit/contrib/utils/processing/text/__init__.py +0 -0
- arekit/contrib/utils/processing/text/tokens.py +0 -127
- arekit/contrib/utils/resources.py +0 -25
- arekit/contrib/utils/serializer.py +0 -43
- arekit/contrib/utils/sources/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +0 -63
- arekit/contrib/utils/vectorizers/__init__.py +0 -0
- arekit/contrib/utils/vectorizers/bpe.py +0 -93
- arekit/contrib/utils/vectorizers/random_norm.py +0 -39
- arekit/download_data.py +0 -11
- arekit-0.24.0.dist-info/METADATA +0 -23
- arekit-0.24.0.dist-info/RECORD +0 -374
- {arekit-0.24.0.dist-info → arekit-0.25.1.dist-info}/LICENSE +0 -0
- {arekit-0.24.0.dist-info → arekit-0.25.1.dist-info}/top_level.txt +0 -0
|
@@ -1,59 +0,0 @@
|
|
|
1
|
-
from arekit.common.bound import Bound
|
|
2
|
-
from arekit.contrib.source.brat.entities.entity import BratEntity
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class TextObject(object):
|
|
6
|
-
"""
|
|
7
|
-
Considering any part of text, labeled by 'position', and 'type'
|
|
8
|
-
The latter is used to emphasize the entity type.
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
def __init__(self, id_in_sentence, value, obj_type, position, terms_count, syn_group_index, is_auth):
|
|
12
|
-
assert(isinstance(id_in_sentence, int))
|
|
13
|
-
assert(isinstance(value, str))
|
|
14
|
-
assert(isinstance(position, int))
|
|
15
|
-
assert(isinstance(terms_count, int) and terms_count > 0)
|
|
16
|
-
assert(isinstance(obj_type, str) or obj_type is None)
|
|
17
|
-
assert(isinstance(syn_group_index, int))
|
|
18
|
-
assert(isinstance(is_auth, bool))
|
|
19
|
-
self.__value = value
|
|
20
|
-
self.__type = obj_type
|
|
21
|
-
self.__id_in_sentence = id_in_sentence
|
|
22
|
-
self.__syn_group_index = syn_group_index
|
|
23
|
-
self.__is_auth = is_auth
|
|
24
|
-
self.__bound = Bound(pos=position, length=terms_count)
|
|
25
|
-
|
|
26
|
-
def to_entity(self, to_doc_id_func):
|
|
27
|
-
assert(callable(to_doc_id_func))
|
|
28
|
-
return BratEntity(id_in_doc=to_doc_id_func(self.__id_in_sentence),
|
|
29
|
-
value=self.__value if len(self.__value) > 0 else '[empty]',
|
|
30
|
-
e_type=self.__type,
|
|
31
|
-
index_begin=self.__bound.Position,
|
|
32
|
-
index_end=self.__bound.Position + self.__bound.Length,
|
|
33
|
-
group_index=self.__syn_group_index,
|
|
34
|
-
# In the case of RuAttitudes collection we do not support childs.
|
|
35
|
-
childs=None)
|
|
36
|
-
|
|
37
|
-
# region properties
|
|
38
|
-
|
|
39
|
-
@property
|
|
40
|
-
def Value(self):
|
|
41
|
-
return self.__value
|
|
42
|
-
|
|
43
|
-
@property
|
|
44
|
-
def Type(self):
|
|
45
|
-
return self.__type
|
|
46
|
-
|
|
47
|
-
@property
|
|
48
|
-
def IdInSentence(self):
|
|
49
|
-
return self.__id_in_sentence
|
|
50
|
-
|
|
51
|
-
@property
|
|
52
|
-
def Bound(self):
|
|
53
|
-
return self.__bound
|
|
54
|
-
|
|
55
|
-
@property
|
|
56
|
-
def IsAuthorized(self):
|
|
57
|
-
return self.__is_auth
|
|
58
|
-
|
|
59
|
-
# endregion
|
|
File without changes
|
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
import json
|
|
2
|
-
|
|
3
|
-
from arekit.common.labels.str_fmt import StringLabelsFormatter
|
|
4
|
-
from arekit.contrib.source.rusentiframes.effect import FrameEffect
|
|
5
|
-
from arekit.contrib.source.rusentiframes.io_utils import RuSentiFramesIOUtils
|
|
6
|
-
from arekit.contrib.source.rusentiframes.types import RuSentiFramesVersions
|
|
7
|
-
from arekit.contrib.source.rusentiframes.labels_fmt import RuSentiFramesLabelsFormatter, \
|
|
8
|
-
RuSentiFramesEffectLabelsFormatter
|
|
9
|
-
from arekit.contrib.source.rusentiframes.polarity import RuSentiFramesFramePolarity
|
|
10
|
-
from arekit.contrib.source.rusentiframes.role import FrameRole
|
|
11
|
-
from arekit.contrib.source.rusentiframes.state import FrameState
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class RuSentiFramesCollection(object):
|
|
15
|
-
|
|
16
|
-
__frames_key = "frames"
|
|
17
|
-
__polarity_key = "polarity"
|
|
18
|
-
__state_key = "state"
|
|
19
|
-
__effect_key = "effect"
|
|
20
|
-
__variants_key = "variants"
|
|
21
|
-
|
|
22
|
-
def __init__(self, data, labels_fmt, effect_labels_fmt, lowercase_variants=True):
|
|
23
|
-
""" data: dict
|
|
24
|
-
Has the following structure of the frame contents:
|
|
25
|
-
{
|
|
26
|
-
"frame_id": [ ... variants string list ... ]
|
|
27
|
-
...
|
|
28
|
-
}
|
|
29
|
-
lowercase_variants: bool
|
|
30
|
-
If 'True', forcely treat frame-variants as case-insensitive (lowercased)
|
|
31
|
-
or avoiding lowercasing operation in case of 'False'.
|
|
32
|
-
"""
|
|
33
|
-
assert(isinstance(data, dict))
|
|
34
|
-
assert(isinstance(labels_fmt, StringLabelsFormatter))
|
|
35
|
-
assert(isinstance(effect_labels_fmt, StringLabelsFormatter))
|
|
36
|
-
self.__labels_fmt = labels_fmt
|
|
37
|
-
self.__effect_labels_fmt = effect_labels_fmt
|
|
38
|
-
self.__data = data
|
|
39
|
-
|
|
40
|
-
if lowercase_variants:
|
|
41
|
-
for frame_id, frame in self.__data.items():
|
|
42
|
-
frame[self.__variants_key] = [variant.lower() for variant in frame[self.__variants_key]]
|
|
43
|
-
|
|
44
|
-
# region classmethods
|
|
45
|
-
|
|
46
|
-
@classmethod
|
|
47
|
-
def read(cls, version, labels_fmt, effect_labels_fmt):
|
|
48
|
-
assert(isinstance(version, RuSentiFramesVersions))
|
|
49
|
-
assert(isinstance(labels_fmt, RuSentiFramesLabelsFormatter))
|
|
50
|
-
assert(isinstance(effect_labels_fmt, RuSentiFramesEffectLabelsFormatter))
|
|
51
|
-
|
|
52
|
-
return RuSentiFramesIOUtils.read_from_zip(
|
|
53
|
-
inner_path=RuSentiFramesIOUtils.get_collection_filepath(),
|
|
54
|
-
process_func=lambda input_file: cls.__from_json(
|
|
55
|
-
input_file=input_file,
|
|
56
|
-
labels_fmt=labels_fmt,
|
|
57
|
-
effect_labels_fmt=effect_labels_fmt),
|
|
58
|
-
version=version)
|
|
59
|
-
|
|
60
|
-
@classmethod
|
|
61
|
-
def __from_json(cls, input_file, labels_fmt, effect_labels_fmt):
|
|
62
|
-
data = json.load(input_file)
|
|
63
|
-
return cls(data=data,
|
|
64
|
-
labels_fmt=labels_fmt,
|
|
65
|
-
effect_labels_fmt=effect_labels_fmt)
|
|
66
|
-
|
|
67
|
-
# endregion
|
|
68
|
-
|
|
69
|
-
# region public 'try get' methods
|
|
70
|
-
|
|
71
|
-
def try_get_frame_polarity(self, frame_id, role_src, role_dest):
|
|
72
|
-
assert(isinstance(role_src, str))
|
|
73
|
-
assert(isinstance(role_dest, str))
|
|
74
|
-
|
|
75
|
-
if not self.__check_has_frame_polarity_key(frame_id):
|
|
76
|
-
return None
|
|
77
|
-
|
|
78
|
-
for args in self.__data[frame_id][self.__frames_key][self.__polarity_key]:
|
|
79
|
-
if args[0] == role_src and args[1] == role_dest:
|
|
80
|
-
return self.__frame_polarity_from_args(args)
|
|
81
|
-
return None
|
|
82
|
-
|
|
83
|
-
# endregion
|
|
84
|
-
|
|
85
|
-
# region public 'get' methods
|
|
86
|
-
|
|
87
|
-
def get_frame_roles(self, frame_id):
|
|
88
|
-
assert(isinstance(frame_id, str))
|
|
89
|
-
return [FrameRole(source=key, description=value)
|
|
90
|
-
for key, value in self.__data[frame_id]["roles"].items()]
|
|
91
|
-
|
|
92
|
-
def get_frame_polarities(self, frame_id):
|
|
93
|
-
assert(isinstance(frame_id, str))
|
|
94
|
-
|
|
95
|
-
if not self.__check_has_frame_polarity_key(frame_id):
|
|
96
|
-
return []
|
|
97
|
-
|
|
98
|
-
return [self.__frame_polarity_from_args(args)
|
|
99
|
-
for args in self.__data[frame_id][self.__frames_key][self.__polarity_key]]
|
|
100
|
-
|
|
101
|
-
def get_frame_states(self, frame_id):
|
|
102
|
-
assert(isinstance(frame_id, str))
|
|
103
|
-
|
|
104
|
-
if self.__state_key not in self.__data[frame_id][self.__frames_key]:
|
|
105
|
-
return []
|
|
106
|
-
|
|
107
|
-
return [FrameState(role=args[0], label=self.__labels_fmt.str_to_label(args[1]), prob=args[2])
|
|
108
|
-
for args in self.__data[frame_id][self.__frames_key][self.__state_key]]
|
|
109
|
-
|
|
110
|
-
def get_frame_titles(self, frame_id):
|
|
111
|
-
assert(isinstance(frame_id, str))
|
|
112
|
-
return self.__data[frame_id]["title"]
|
|
113
|
-
|
|
114
|
-
def get_frame_variants(self, frame_id):
|
|
115
|
-
return self.__data[frame_id][self.__variants_key]
|
|
116
|
-
|
|
117
|
-
def get_frame_values(self, frame_id):
|
|
118
|
-
assert(isinstance(frame_id, str))
|
|
119
|
-
# TODO. Not implemented yet.
|
|
120
|
-
pass
|
|
121
|
-
|
|
122
|
-
def get_frame_effects(self, frame_id):
|
|
123
|
-
assert(isinstance(frame_id, str))
|
|
124
|
-
|
|
125
|
-
if self.__effect_key not in self.__data[frame_id][self.__frames_key]:
|
|
126
|
-
return []
|
|
127
|
-
|
|
128
|
-
return [FrameEffect(role=args[0], label=self.__effect_labels_fmt.str_to_label(args[1]), prob=args[2])
|
|
129
|
-
for args in self.__data[frame_id][self.__frames_key][self.__effect_key]]
|
|
130
|
-
|
|
131
|
-
# endregion
|
|
132
|
-
|
|
133
|
-
# region public 'iter' methods
|
|
134
|
-
|
|
135
|
-
def iter_frames_ids(self):
|
|
136
|
-
for frame_id in self.__data.keys():
|
|
137
|
-
yield frame_id
|
|
138
|
-
|
|
139
|
-
def iter_frame_id_and_variants(self):
|
|
140
|
-
for id, frame in self.__data.items():
|
|
141
|
-
for variant in frame[self.__variants_key]:
|
|
142
|
-
yield id, variant
|
|
143
|
-
|
|
144
|
-
# endregion
|
|
145
|
-
|
|
146
|
-
# region private methods
|
|
147
|
-
|
|
148
|
-
def __check_has_frame_polarity_key(self, frame_id):
|
|
149
|
-
return self.__polarity_key in self.__data[frame_id][self.__frames_key]
|
|
150
|
-
|
|
151
|
-
def __frame_polarity_from_args(self, args):
|
|
152
|
-
return RuSentiFramesFramePolarity(role_src=args[0],
|
|
153
|
-
role_dest=args[1],
|
|
154
|
-
label=self.__labels_fmt.str_to_label(args[2]),
|
|
155
|
-
prob=args[3])
|
|
156
|
-
|
|
157
|
-
# endregion
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
from arekit.common.labels.base import Label
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class FrameEffect(object):
|
|
5
|
-
|
|
6
|
-
def __init__(self, role, label, prob):
|
|
7
|
-
assert(isinstance(role, str))
|
|
8
|
-
assert(isinstance(label, Label))
|
|
9
|
-
assert(isinstance(prob, float))
|
|
10
|
-
self.__role = role
|
|
11
|
-
self.__label = label
|
|
12
|
-
self.__prob = prob
|
|
13
|
-
|
|
14
|
-
@property
|
|
15
|
-
def Role(self):
|
|
16
|
-
return self.__role
|
|
17
|
-
|
|
18
|
-
@property
|
|
19
|
-
def Label(self):
|
|
20
|
-
return self.__label
|
|
21
|
-
|
|
22
|
-
@property
|
|
23
|
-
def Prob(self):
|
|
24
|
-
return self.__prob
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
from os import path
|
|
2
|
-
|
|
3
|
-
from arekit.contrib.source.zip_utils import ZipArchiveUtils
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class RuSentiFramesIOUtils(ZipArchiveUtils):
|
|
7
|
-
|
|
8
|
-
# region internal methods
|
|
9
|
-
|
|
10
|
-
@staticmethod
|
|
11
|
-
def get_archive_filepath(version):
|
|
12
|
-
assert(isinstance(version, str))
|
|
13
|
-
return path.join(RuSentiFramesIOUtils.get_data_root(), "rusentiframes-{version}.zip".format(version=version))
|
|
14
|
-
|
|
15
|
-
@staticmethod
|
|
16
|
-
def get_collection_filepath():
|
|
17
|
-
return "frames.json"
|
|
18
|
-
|
|
19
|
-
# endregion
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
from arekit.common.labels.base import Label
|
|
2
|
-
from arekit.common.labels.str_fmt import StringLabelsFormatter
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class RuSentiFramesLabelsFormatter(StringLabelsFormatter):
|
|
6
|
-
|
|
7
|
-
def __init__(self, pos_label_type, neg_label_type):
|
|
8
|
-
assert(issubclass(pos_label_type, Label))
|
|
9
|
-
assert(issubclass(neg_label_type, Label))
|
|
10
|
-
stol = {'neg': neg_label_type, 'pos': pos_label_type}
|
|
11
|
-
super(RuSentiFramesLabelsFormatter, self).__init__(stol=stol)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class RuSentiFramesEffectLabelsFormatter(StringLabelsFormatter):
|
|
15
|
-
""" Effect formatter utilizes '-' and '+' signs.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
def __init__(self, pos_label_type, neg_label_type):
|
|
19
|
-
assert(issubclass(pos_label_type, Label))
|
|
20
|
-
assert(issubclass(neg_label_type, Label))
|
|
21
|
-
stol = {'-': neg_label_type, '+': pos_label_type}
|
|
22
|
-
super(RuSentiFramesEffectLabelsFormatter, self).__init__(stol=stol)
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
from arekit.common.frames.connotations.descriptor import FrameConnotationDescriptor
|
|
2
|
-
from arekit.common.labels.base import Label
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class RuSentiFramesFramePolarity(FrameConnotationDescriptor):
|
|
6
|
-
"""
|
|
7
|
-
Polarity description between source (Agent) towards dest (Theme)
|
|
8
|
-
The latter are related to roles of frame polarity.
|
|
9
|
-
"""
|
|
10
|
-
|
|
11
|
-
def __init__(self, role_src, role_dest, label, prob):
|
|
12
|
-
assert(isinstance(role_src, str))
|
|
13
|
-
assert(isinstance(role_dest, str))
|
|
14
|
-
assert(isinstance(label, Label))
|
|
15
|
-
assert(isinstance(prob, float))
|
|
16
|
-
self.__role_src = role_src
|
|
17
|
-
self.__role_dest = role_dest
|
|
18
|
-
self.__label = label
|
|
19
|
-
self.__prob = prob
|
|
20
|
-
|
|
21
|
-
@property
|
|
22
|
-
def Source(self):
|
|
23
|
-
return self.__role_src
|
|
24
|
-
|
|
25
|
-
@property
|
|
26
|
-
def Destination(self):
|
|
27
|
-
return self.__role_dest
|
|
28
|
-
|
|
29
|
-
@property
|
|
30
|
-
def Label(self):
|
|
31
|
-
return self.__label
|
|
32
|
-
|
|
33
|
-
@property
|
|
34
|
-
def Prob(self):
|
|
35
|
-
return self.__prob
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
class FrameRole(object):
|
|
2
|
-
|
|
3
|
-
def __init__(self, source, description):
|
|
4
|
-
assert(isinstance(source, str))
|
|
5
|
-
assert(isinstance(description, str))
|
|
6
|
-
self.__source = source
|
|
7
|
-
self.__description = description
|
|
8
|
-
|
|
9
|
-
@property
|
|
10
|
-
def Source(self):
|
|
11
|
-
return self.__source
|
|
12
|
-
|
|
13
|
-
@property
|
|
14
|
-
def Description(self):
|
|
15
|
-
return self.__description
|
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
from arekit.common.labels.base import Label
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class FrameState(object):
|
|
5
|
-
|
|
6
|
-
def __init__(self, role, label, prob):
|
|
7
|
-
assert(isinstance(role, str))
|
|
8
|
-
assert(isinstance(label, Label))
|
|
9
|
-
assert(isinstance(prob, float))
|
|
10
|
-
self.__role = role
|
|
11
|
-
self.__label = label
|
|
12
|
-
self.__prob = prob
|
|
13
|
-
|
|
14
|
-
@property
|
|
15
|
-
def Role(self):
|
|
16
|
-
return self.__role
|
|
17
|
-
|
|
18
|
-
@property
|
|
19
|
-
def Label(self):
|
|
20
|
-
return self.__label
|
|
21
|
-
|
|
22
|
-
@property
|
|
23
|
-
def Prob(self):
|
|
24
|
-
return self.__prob
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
from enum import Enum
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class RuSentiFramesVersions(Enum):
|
|
5
|
-
|
|
6
|
-
# Papers for description:
|
|
7
|
-
# Distant Supervision for Sentiment Attitude Extraction (RANLP-2019)
|
|
8
|
-
# Nicolay Rusnachenko, Natalia Loukachevitch, Elena Tutubalina
|
|
9
|
-
# https://www.aclweb.org/anthology/R19-1118/
|
|
10
|
-
# https://github.com/nicolay-r/RuSentiFrames/tree/v1.0
|
|
11
|
-
V10 = "v1_0"
|
|
12
|
-
|
|
13
|
-
# Papers for description:
|
|
14
|
-
# Sentiment Frames for Attitude Extraction in Russian (DIALOG-2020)
|
|
15
|
-
# Natalia Loukachevitch, Nicolay Rusnachenko
|
|
16
|
-
# https://github.com/nicolay-r/RuSentiFrames/tree/v2.0
|
|
17
|
-
V20 = "v2_0"
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class RuSentiFramesVersionsService:
|
|
21
|
-
|
|
22
|
-
@staticmethod
|
|
23
|
-
def __iter_supported_types():
|
|
24
|
-
return iter(RuSentiFramesVersions)
|
|
25
|
-
|
|
26
|
-
@staticmethod
|
|
27
|
-
def get_name_by_type(version_type):
|
|
28
|
-
assert(isinstance(version_type, RuSentiFramesVersions))
|
|
29
|
-
return version_type.value
|
|
30
|
-
|
|
31
|
-
@staticmethod
|
|
32
|
-
def get_type_by_name(name):
|
|
33
|
-
for version_type in RuSentiFramesVersionsService.__iter_supported_types():
|
|
34
|
-
if version_type.value == name:
|
|
35
|
-
return version_type
|
|
36
|
-
|
|
37
|
-
raise Exception("RuSentiFrames version by name `{}` was hot found!".format(name))
|
|
38
|
-
|
|
39
|
-
@staticmethod
|
|
40
|
-
def iter_supported_names():
|
|
41
|
-
for version_type in RuSentiFramesVersionsService.__iter_supported_types():
|
|
42
|
-
yield version_type.value
|
|
File without changes
|
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
from arekit.common.synonyms.base import SynonymsCollection
|
|
2
|
-
from arekit.contrib.source.brat.doc import BratDocument
|
|
3
|
-
from arekit.contrib.source.brat.sentences_reader import BratDocumentSentencesReader
|
|
4
|
-
from arekit.contrib.source.rusentrel.entities import RuSentRelDocumentEntityCollection
|
|
5
|
-
from arekit.contrib.source.rusentrel.io_utils import RuSentRelVersions, RuSentRelIOUtils
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class RuSentRelDocumentsReader(object):
|
|
9
|
-
|
|
10
|
-
# region class methods
|
|
11
|
-
|
|
12
|
-
@staticmethod
|
|
13
|
-
def hide_first_entry(line, entry, hide_with=" "):
|
|
14
|
-
|
|
15
|
-
index = line.find(entry)
|
|
16
|
-
|
|
17
|
-
if index >= 0:
|
|
18
|
-
pad = hide_with * len(entry)
|
|
19
|
-
before = line[0:index]
|
|
20
|
-
after = line[index+len(entry):]
|
|
21
|
-
line = "".join([before, pad, after])
|
|
22
|
-
|
|
23
|
-
return line
|
|
24
|
-
|
|
25
|
-
@staticmethod
|
|
26
|
-
def read_document(doc_id, synonyms, version=RuSentRelVersions.V11, target_doc_id=None):
|
|
27
|
-
assert(isinstance(synonyms, SynonymsCollection))
|
|
28
|
-
assert(isinstance(version, RuSentRelVersions))
|
|
29
|
-
assert(isinstance(target_doc_id, int) or target_doc_id is None)
|
|
30
|
-
|
|
31
|
-
def file_to_doc(input_file):
|
|
32
|
-
|
|
33
|
-
sentences = BratDocumentSentencesReader.from_file(
|
|
34
|
-
input_file=input_file,
|
|
35
|
-
entities=entities,
|
|
36
|
-
line_handler=lambda line: RuSentRelDocumentsReader.hide_first_entry(line, entry="{Author, Unknown}"),
|
|
37
|
-
skip_entity_func=lambda entity: entity.Value in ['author', 'unknown'])
|
|
38
|
-
|
|
39
|
-
return BratDocument(doc_id=target_doc_id if target_doc_id is not None else doc_id,
|
|
40
|
-
sentences=sentences,
|
|
41
|
-
text_relations=[])
|
|
42
|
-
|
|
43
|
-
entities = RuSentRelDocumentEntityCollection.read_collection(
|
|
44
|
-
doc_id=doc_id,
|
|
45
|
-
synonyms=synonyms,
|
|
46
|
-
version=version)
|
|
47
|
-
|
|
48
|
-
return RuSentRelIOUtils.read_from_zip(
|
|
49
|
-
inner_path=RuSentRelIOUtils.get_doc_innerpath(index=doc_id, version=version),
|
|
50
|
-
process_func=file_to_doc,
|
|
51
|
-
version=version)
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
from arekit.common.entities.collection import EntityCollection
|
|
2
|
-
from arekit.common.synonyms.base import SynonymsCollection
|
|
3
|
-
from arekit.contrib.source.brat.annot import BratAnnotationParser
|
|
4
|
-
from arekit.contrib.source.rusentrel.io_utils import RuSentRelVersions, RuSentRelIOUtils
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class RuSentRelDocumentEntityCollection(EntityCollection):
|
|
8
|
-
|
|
9
|
-
def __init__(self, entities, value_to_group_id_func):
|
|
10
|
-
super(RuSentRelDocumentEntityCollection, self).__init__(
|
|
11
|
-
entities=entities,
|
|
12
|
-
value_to_group_id_func=value_to_group_id_func)
|
|
13
|
-
|
|
14
|
-
self._sort_entities(key=lambda entity: entity.IndexBegin)
|
|
15
|
-
|
|
16
|
-
@classmethod
|
|
17
|
-
def read_collection(cls, doc_id, synonyms, version=RuSentRelVersions.V11):
|
|
18
|
-
assert (isinstance(synonyms, SynonymsCollection))
|
|
19
|
-
assert (isinstance(doc_id, int))
|
|
20
|
-
|
|
21
|
-
return RuSentRelIOUtils.read_from_zip(
|
|
22
|
-
inner_path=RuSentRelIOUtils.get_entity_innerpath(index=doc_id, version=version),
|
|
23
|
-
process_func=lambda input_file: cls(
|
|
24
|
-
entities=BratAnnotationParser.parse_annotations(input_file)["entities"],
|
|
25
|
-
value_to_group_id_func=synonyms.get_synonym_group_index),
|
|
26
|
-
version=version)
|
|
@@ -1,125 +0,0 @@
|
|
|
1
|
-
from os import path
|
|
2
|
-
|
|
3
|
-
from enum import Enum
|
|
4
|
-
|
|
5
|
-
from arekit.contrib.source.zip_utils import ZipArchiveUtils
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class RuSentRelVersions(Enum):
|
|
9
|
-
""" Original collection repository: https://github.com/nicolay-r/RuSentRel
|
|
10
|
-
Paper: https://arxiv.org/abs/1808.08932
|
|
11
|
-
"""
|
|
12
|
-
V11 = "v1_1"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class RuSentRelIOUtils(ZipArchiveUtils):
|
|
16
|
-
|
|
17
|
-
TEST_FOLDER = "test"
|
|
18
|
-
TRAIN_FOLDER = "train"
|
|
19
|
-
ETALON_FOLDER = "etalon"
|
|
20
|
-
|
|
21
|
-
@staticmethod
|
|
22
|
-
def get_archive_filepath(version):
|
|
23
|
-
assert(version, str)
|
|
24
|
-
return path.join(RuSentRelIOUtils.get_data_root(), "rusentrel-{}.zip".format(version))
|
|
25
|
-
|
|
26
|
-
# region internal methods
|
|
27
|
-
|
|
28
|
-
@staticmethod
|
|
29
|
-
def get_sentiment_opin_filepath(index, version, prefix='art'):
|
|
30
|
-
root = RuSentRelIOUtils.__get_root_by_index(index, version=version, keep_etalon=True)
|
|
31
|
-
return path.join(root, "{prefix}{index}.opin.txt".format(prefix=prefix, index=index))
|
|
32
|
-
|
|
33
|
-
@staticmethod
|
|
34
|
-
def get_entity_innerpath(index, version):
|
|
35
|
-
assert(isinstance(index, int))
|
|
36
|
-
assert(isinstance(version, RuSentRelVersions))
|
|
37
|
-
inner_root = RuSentRelIOUtils.__get_root_by_index(doc_id=index, version=version)
|
|
38
|
-
return path.join(inner_root, "art{}.ann".format(index))
|
|
39
|
-
|
|
40
|
-
@staticmethod
|
|
41
|
-
def get_doc_innerpath(index, version):
|
|
42
|
-
assert(isinstance(index, int))
|
|
43
|
-
assert(isinstance(version, RuSentRelVersions))
|
|
44
|
-
inner_root = RuSentRelIOUtils.__get_root_by_index(doc_id=index, version=version)
|
|
45
|
-
return path.join(inner_root, "art{}.txt".format(index))
|
|
46
|
-
|
|
47
|
-
@staticmethod
|
|
48
|
-
def get_synonyms_innerpath():
|
|
49
|
-
return "synonyms.txt"
|
|
50
|
-
|
|
51
|
-
# endregion
|
|
52
|
-
|
|
53
|
-
@staticmethod
|
|
54
|
-
def __get_root_by_index(doc_id, version, keep_etalon=False):
|
|
55
|
-
assert(RuSentRelIOUtils.__is_supported(version))
|
|
56
|
-
assert(isinstance(version, RuSentRelVersions))
|
|
57
|
-
assert(isinstance(doc_id, int))
|
|
58
|
-
other_dir = RuSentRelIOUtils.ETALON_FOLDER if keep_etalon else RuSentRelIOUtils.TEST_FOLDER
|
|
59
|
-
test_indices = set(RuSentRelIOUtils.__iter_indicies_from_dataset(version, RuSentRelIOUtils.TEST_FOLDER))
|
|
60
|
-
return other_dir if doc_id in test_indices else RuSentRelIOUtils.TRAIN_FOLDER
|
|
61
|
-
|
|
62
|
-
@staticmethod
|
|
63
|
-
def __is_supported(version):
|
|
64
|
-
assert(isinstance(version, RuSentRelVersions))
|
|
65
|
-
return version == RuSentRelVersions.V11
|
|
66
|
-
|
|
67
|
-
@staticmethod
|
|
68
|
-
def __number_from_string(s):
|
|
69
|
-
digit_chars = [chr for chr in s if chr.isdigit()]
|
|
70
|
-
|
|
71
|
-
if len(digit_chars) == 0:
|
|
72
|
-
return None
|
|
73
|
-
|
|
74
|
-
return int("".join(digit_chars))
|
|
75
|
-
|
|
76
|
-
@staticmethod
|
|
77
|
-
def __iter_indicies_from_dataset(version, folder_name):
|
|
78
|
-
assert(isinstance(folder_name, str))
|
|
79
|
-
assert(RuSentRelIOUtils.__is_supported(version))
|
|
80
|
-
|
|
81
|
-
used = set()
|
|
82
|
-
|
|
83
|
-
for filename in RuSentRelIOUtils.iter_filenames_from_zip(version):
|
|
84
|
-
if not folder_name in filename:
|
|
85
|
-
continue
|
|
86
|
-
|
|
87
|
-
index = RuSentRelIOUtils.__number_from_string(filename)
|
|
88
|
-
|
|
89
|
-
if index is None:
|
|
90
|
-
continue
|
|
91
|
-
|
|
92
|
-
if index in used:
|
|
93
|
-
continue
|
|
94
|
-
|
|
95
|
-
used.add(index)
|
|
96
|
-
|
|
97
|
-
yield index
|
|
98
|
-
|
|
99
|
-
# region public methods
|
|
100
|
-
|
|
101
|
-
@staticmethod
|
|
102
|
-
def iter_test_indices(version):
|
|
103
|
-
assert(RuSentRelIOUtils.__is_supported(version))
|
|
104
|
-
indices_iter = RuSentRelIOUtils.__iter_indicies_from_dataset(
|
|
105
|
-
version=version, folder_name="{}/".format(RuSentRelIOUtils.TEST_FOLDER))
|
|
106
|
-
for index in indices_iter:
|
|
107
|
-
yield index
|
|
108
|
-
|
|
109
|
-
@staticmethod
|
|
110
|
-
def iter_train_indices(version):
|
|
111
|
-
assert(RuSentRelIOUtils.__is_supported(version))
|
|
112
|
-
indices_iter = RuSentRelIOUtils.__iter_indicies_from_dataset(
|
|
113
|
-
version=version, folder_name="{}/".format(RuSentRelIOUtils.TRAIN_FOLDER))
|
|
114
|
-
for index in indices_iter:
|
|
115
|
-
yield index
|
|
116
|
-
|
|
117
|
-
@staticmethod
|
|
118
|
-
def iter_collection_indices(version):
|
|
119
|
-
assert(RuSentRelIOUtils.__is_supported(version))
|
|
120
|
-
for index in RuSentRelIOUtils.iter_train_indices(version):
|
|
121
|
-
yield index
|
|
122
|
-
for index in RuSentRelIOUtils.iter_test_indices(version):
|
|
123
|
-
yield index
|
|
124
|
-
|
|
125
|
-
# endregion
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
from arekit.common.labels.base import Label
|
|
2
|
-
from arekit.common.labels.str_fmt import StringLabelsFormatter
|
|
3
|
-
from arekit.contrib.source.rusentrel.const import NEG_LABEL_STR, POS_LABEL_STR
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class RuSentRelLabelsFormatter(StringLabelsFormatter):
|
|
7
|
-
|
|
8
|
-
def __init__(self, pos_label_type, neg_label_type):
|
|
9
|
-
assert(issubclass(pos_label_type, Label))
|
|
10
|
-
assert(issubclass(neg_label_type, Label))
|
|
11
|
-
stol = {NEG_LABEL_STR: neg_label_type, POS_LABEL_STR: pos_label_type}
|
|
12
|
-
super(RuSentRelLabelsFormatter, self).__init__(stol=stol)
|
|
File without changes
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from arekit.contrib.source.rusentrel.const import POS_LABEL_STR, NEG_LABEL_STR
|
|
2
|
-
from arekit.contrib.source.rusentrel.io_utils import RuSentRelIOUtils, RuSentRelVersions
|
|
3
|
-
from arekit.contrib.source.rusentrel.labels_fmt import RuSentRelLabelsFormatter
|
|
4
|
-
from arekit.contrib.source.rusentrel.opinions.provider import RuSentRelOpinionCollectionProvider
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class RuSentRelOpinions:
|
|
8
|
-
"""
|
|
9
|
-
Collection of sentiment opinions between entities
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
@staticmethod
|
|
13
|
-
def iter_from_doc(doc_id, labels_fmt, version=RuSentRelVersions.V11):
|
|
14
|
-
""" doc_id:
|
|
15
|
-
synonyms: None or SynonymsCollection
|
|
16
|
-
None corresponds to the related synonym collection from RuSentRel collection.
|
|
17
|
-
version: RuSentrelVersions enum
|
|
18
|
-
"""
|
|
19
|
-
assert(isinstance(version, RuSentRelVersions))
|
|
20
|
-
assert(isinstance(labels_fmt, RuSentRelLabelsFormatter))
|
|
21
|
-
assert(labels_fmt.supports_value(POS_LABEL_STR))
|
|
22
|
-
assert(labels_fmt.supports_value(NEG_LABEL_STR))
|
|
23
|
-
|
|
24
|
-
return RuSentRelIOUtils.iter_from_zip(
|
|
25
|
-
inner_path=RuSentRelIOUtils.get_sentiment_opin_filepath(index=doc_id, version=version),
|
|
26
|
-
process_func=lambda input_file: RuSentRelOpinionCollectionProvider._iter_opinions_from_file(
|
|
27
|
-
input_file=input_file,
|
|
28
|
-
labels_formatter=labels_fmt,
|
|
29
|
-
error_on_non_supported=True),
|
|
30
|
-
version=version)
|