arekit 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arekit/common/context/terms_mapper.py +2 -2
- arekit/common/data/const.py +5 -4
- arekit/common/{experiment/api/ops_doc.py → data/doc_provider.py} +1 -1
- arekit/common/data/input/providers/columns/sample.py +6 -1
- arekit/common/data/input/providers/instances/base.py +1 -1
- arekit/common/data/input/providers/rows/base.py +36 -13
- arekit/common/data/input/providers/rows/samples.py +57 -55
- arekit/common/data/input/providers/sample/cropped.py +2 -2
- arekit/common/data/input/sample.py +1 -1
- arekit/common/data/rows_fmt.py +82 -0
- arekit/common/data/rows_parser.py +43 -0
- arekit/common/data/storages/base.py +23 -18
- arekit/common/data/views/samples.py +2 -8
- arekit/common/{news → docs}/base.py +2 -2
- arekit/common/{news → docs}/entities_grouping.py +2 -1
- arekit/common/{news → docs}/entity.py +2 -1
- arekit/common/{news → docs}/parsed/base.py +5 -5
- arekit/common/docs/parsed/providers/base.py +68 -0
- arekit/common/{news → docs}/parsed/providers/base_pairs.py +2 -2
- arekit/common/{news → docs}/parsed/providers/entity_service.py +27 -22
- arekit/common/{news → docs}/parsed/providers/opinion_pairs.py +2 -2
- arekit/common/{news → docs}/parsed/providers/text_opinion_pairs.py +6 -6
- arekit/common/docs/parsed/service.py +31 -0
- arekit/common/docs/parser.py +66 -0
- arekit/common/{news → docs}/sentence.py +1 -1
- arekit/common/entities/base.py +11 -2
- arekit/common/experiment/api/base_samples_io.py +1 -1
- arekit/common/frames/variants/collection.py +2 -2
- arekit/common/linkage/base.py +2 -2
- arekit/common/linkage/meta.py +23 -0
- arekit/common/linkage/opinions.py +1 -1
- arekit/common/linkage/text_opinions.py +2 -2
- arekit/common/opinions/annot/algo/base.py +1 -1
- arekit/common/opinions/annot/algo/pair_based.py +15 -13
- arekit/common/opinions/annot/algo/predefined.py +4 -4
- arekit/common/opinions/annot/algo_based.py +5 -5
- arekit/common/opinions/annot/base.py +3 -3
- arekit/common/opinions/base.py +7 -7
- arekit/common/opinions/collection.py +3 -3
- arekit/common/pipeline/base.py +12 -16
- arekit/common/pipeline/batching.py +28 -0
- arekit/common/pipeline/context.py +5 -1
- arekit/common/pipeline/items/base.py +38 -1
- arekit/common/pipeline/items/flatten.py +5 -1
- arekit/common/pipeline/items/handle.py +2 -1
- arekit/common/pipeline/items/iter.py +2 -1
- arekit/common/pipeline/items/map.py +2 -1
- arekit/common/pipeline/items/map_nested.py +4 -0
- arekit/common/pipeline/utils.py +32 -0
- arekit/common/service/sqlite.py +36 -0
- arekit/common/synonyms/base.py +2 -2
- arekit/common/text/{partitioning/str.py → partitioning.py} +16 -11
- arekit/common/text_opinions/base.py +11 -11
- arekit/common/utils.py +33 -46
- arekit/contrib/networks/embedding.py +3 -3
- arekit/contrib/networks/embedding_io.py +5 -5
- arekit/contrib/networks/input/const.py +0 -2
- arekit/contrib/networks/input/providers/sample.py +15 -29
- arekit/contrib/networks/input/rows_parser.py +47 -134
- arekit/contrib/prompt/sample.py +18 -16
- arekit/contrib/utils/data/contents/opinions.py +17 -5
- arekit/contrib/utils/data/doc_provider/dict_based.py +13 -0
- arekit/contrib/utils/data/{doc_ops → doc_provider}/dir_based.py +7 -7
- arekit/contrib/utils/data/readers/base.py +3 -0
- arekit/contrib/utils/data/readers/csv_pd.py +10 -4
- arekit/contrib/utils/data/readers/jsonl.py +3 -0
- arekit/contrib/utils/data/readers/sqlite.py +14 -0
- arekit/contrib/utils/data/service/balance.py +0 -1
- arekit/contrib/utils/data/storages/pandas_based.py +3 -5
- arekit/contrib/utils/data/storages/row_cache.py +18 -6
- arekit/contrib/utils/data/storages/sqlite_based.py +17 -0
- arekit/contrib/utils/data/writers/base.py +5 -0
- arekit/contrib/utils/data/writers/csv_native.py +3 -0
- arekit/contrib/utils/data/writers/csv_pd.py +3 -0
- arekit/contrib/utils/data/writers/json_opennre.py +31 -13
- arekit/contrib/utils/data/writers/sqlite_native.py +114 -0
- arekit/contrib/utils/io_utils/embedding.py +25 -33
- arekit/contrib/utils/io_utils/utils.py +3 -24
- arekit/contrib/utils/pipelines/items/sampling/base.py +31 -26
- arekit/contrib/utils/pipelines/items/sampling/networks.py +7 -10
- arekit/contrib/utils/pipelines/items/text/entities_default.py +2 -2
- arekit/contrib/utils/pipelines/items/text/frames.py +2 -3
- arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +3 -3
- arekit/contrib/utils/pipelines/items/text/frames_negation.py +2 -1
- arekit/contrib/utils/pipelines/items/text/tokenizer.py +3 -5
- arekit/contrib/utils/pipelines/items/text/translator.py +136 -0
- arekit/contrib/utils/pipelines/opinion_collections.py +5 -5
- arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +7 -7
- arekit/contrib/utils/pipelines/text_opinion/extraction.py +34 -22
- arekit/contrib/utils/pipelines/text_opinion/filters/base.py +1 -1
- arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py +1 -1
- arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py +3 -3
- arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py +4 -4
- arekit/contrib/utils/serializer.py +4 -23
- arekit-0.25.0.data/data/logo.png +0 -0
- arekit-0.25.0.dist-info/METADATA +82 -0
- arekit-0.25.0.dist-info/RECORD +259 -0
- {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/WHEEL +1 -1
- arekit/common/data/row_ids/base.py +0 -79
- arekit/common/data/row_ids/binary.py +0 -38
- arekit/common/data/row_ids/multiple.py +0 -14
- arekit/common/folding/base.py +0 -36
- arekit/common/folding/fixed.py +0 -42
- arekit/common/folding/nofold.py +0 -15
- arekit/common/folding/united.py +0 -46
- arekit/common/news/objects_parser.py +0 -37
- arekit/common/news/parsed/providers/base.py +0 -48
- arekit/common/news/parsed/service.py +0 -31
- arekit/common/news/parser.py +0 -34
- arekit/common/text/parser.py +0 -12
- arekit/common/text/partitioning/__init__.py +0 -0
- arekit/common/text/partitioning/base.py +0 -4
- arekit/common/text/partitioning/terms.py +0 -35
- arekit/contrib/source/__init__.py +0 -0
- arekit/contrib/source/brat/__init__.py +0 -0
- arekit/contrib/source/brat/annot.py +0 -83
- arekit/contrib/source/brat/entities/__init__.py +0 -0
- arekit/contrib/source/brat/entities/compound.py +0 -33
- arekit/contrib/source/brat/entities/entity.py +0 -42
- arekit/contrib/source/brat/entities/parser.py +0 -53
- arekit/contrib/source/brat/news.py +0 -28
- arekit/contrib/source/brat/opinions/__init__.py +0 -0
- arekit/contrib/source/brat/opinions/converter.py +0 -19
- arekit/contrib/source/brat/relation.py +0 -32
- arekit/contrib/source/brat/sentence.py +0 -69
- arekit/contrib/source/brat/sentences_reader.py +0 -128
- arekit/contrib/source/download.py +0 -41
- arekit/contrib/source/nerel/__init__.py +0 -0
- arekit/contrib/source/nerel/entities.py +0 -55
- arekit/contrib/source/nerel/folding/__init__.py +0 -0
- arekit/contrib/source/nerel/folding/fixed.py +0 -75
- arekit/contrib/source/nerel/io_utils.py +0 -62
- arekit/contrib/source/nerel/labels.py +0 -241
- arekit/contrib/source/nerel/reader.py +0 -46
- arekit/contrib/source/nerel/utils.py +0 -24
- arekit/contrib/source/nerel/versions.py +0 -12
- arekit/contrib/source/nerelbio/__init__.py +0 -0
- arekit/contrib/source/nerelbio/io_utils.py +0 -62
- arekit/contrib/source/nerelbio/labels.py +0 -265
- arekit/contrib/source/nerelbio/reader.py +0 -8
- arekit/contrib/source/nerelbio/versions.py +0 -8
- arekit/contrib/source/ruattitudes/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/collection.py +0 -36
- arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/entity/parser.py +0 -7
- arekit/contrib/source/ruattitudes/io_utils.py +0 -56
- arekit/contrib/source/ruattitudes/labels_fmt.py +0 -12
- arekit/contrib/source/ruattitudes/news.py +0 -51
- arekit/contrib/source/ruattitudes/news_brat.py +0 -44
- arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/opinions/base.py +0 -28
- arekit/contrib/source/ruattitudes/opinions/converter.py +0 -37
- arekit/contrib/source/ruattitudes/reader.py +0 -268
- arekit/contrib/source/ruattitudes/sentence.py +0 -73
- arekit/contrib/source/ruattitudes/synonyms.py +0 -17
- arekit/contrib/source/ruattitudes/text_object.py +0 -57
- arekit/contrib/source/rusentiframes/__init__.py +0 -0
- arekit/contrib/source/rusentiframes/collection.py +0 -157
- arekit/contrib/source/rusentiframes/effect.py +0 -24
- arekit/contrib/source/rusentiframes/io_utils.py +0 -19
- arekit/contrib/source/rusentiframes/labels_fmt.py +0 -22
- arekit/contrib/source/rusentiframes/polarity.py +0 -35
- arekit/contrib/source/rusentiframes/role.py +0 -15
- arekit/contrib/source/rusentiframes/state.py +0 -24
- arekit/contrib/source/rusentiframes/types.py +0 -42
- arekit/contrib/source/rusentiframes/value.py +0 -2
- arekit/contrib/source/rusentrel/__init__.py +0 -0
- arekit/contrib/source/rusentrel/const.py +0 -3
- arekit/contrib/source/rusentrel/entities.py +0 -26
- arekit/contrib/source/rusentrel/io_utils.py +0 -125
- arekit/contrib/source/rusentrel/labels_fmt.py +0 -12
- arekit/contrib/source/rusentrel/news_reader.py +0 -51
- arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
- arekit/contrib/source/rusentrel/opinions/collection.py +0 -30
- arekit/contrib/source/rusentrel/opinions/converter.py +0 -40
- arekit/contrib/source/rusentrel/opinions/provider.py +0 -54
- arekit/contrib/source/rusentrel/opinions/writer.py +0 -42
- arekit/contrib/source/rusentrel/synonyms.py +0 -17
- arekit/contrib/source/sentinerel/__init__.py +0 -0
- arekit/contrib/source/sentinerel/entities.py +0 -52
- arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
- arekit/contrib/source/sentinerel/folding/factory.py +0 -32
- arekit/contrib/source/sentinerel/folding/fixed.py +0 -73
- arekit/contrib/source/sentinerel/io_utils.py +0 -87
- arekit/contrib/source/sentinerel/labels.py +0 -53
- arekit/contrib/source/sentinerel/labels_scaler.py +0 -30
- arekit/contrib/source/sentinerel/reader.py +0 -42
- arekit/contrib/source/synonyms/__init__.py +0 -0
- arekit/contrib/source/synonyms/utils.py +0 -19
- arekit/contrib/source/zip_utils.py +0 -47
- arekit/contrib/utils/bert/rows.py +0 -0
- arekit/contrib/utils/bert/text_b_rus.py +0 -18
- arekit/contrib/utils/connotations/__init__.py +0 -0
- arekit/contrib/utils/connotations/rusentiframes_sentiment.py +0 -23
- arekit/contrib/utils/cv/__init__.py +0 -0
- arekit/contrib/utils/cv/doc_stat/__init__.py +0 -0
- arekit/contrib/utils/cv/doc_stat/base.py +0 -37
- arekit/contrib/utils/cv/doc_stat/sentence.py +0 -12
- arekit/contrib/utils/cv/splitters/__init__.py +0 -0
- arekit/contrib/utils/cv/splitters/base.py +0 -4
- arekit/contrib/utils/cv/splitters/default.py +0 -53
- arekit/contrib/utils/cv/splitters/statistical.py +0 -57
- arekit/contrib/utils/cv/two_class.py +0 -77
- arekit/contrib/utils/data/doc_ops/__init__.py +0 -0
- arekit/contrib/utils/data/doc_ops/dict_based.py +0 -13
- arekit/contrib/utils/data/ext.py +0 -31
- arekit/contrib/utils/data/views/__init__.py +0 -0
- arekit/contrib/utils/data/views/linkages/__init__.py +0 -0
- arekit/contrib/utils/data/views/linkages/base.py +0 -58
- arekit/contrib/utils/data/views/linkages/multilabel.py +0 -48
- arekit/contrib/utils/data/views/linkages/utils.py +0 -24
- arekit/contrib/utils/data/views/opinions.py +0 -14
- arekit/contrib/utils/download.py +0 -78
- arekit/contrib/utils/entities/formatters/str_rus_cased_fmt.py +0 -78
- arekit/contrib/utils/entities/formatters/str_rus_nocased_fmt.py +0 -15
- arekit/contrib/utils/entities/formatters/str_simple_fmt.py +0 -24
- arekit/contrib/utils/entities/formatters/str_simple_uppercase_fmt.py +0 -21
- arekit/contrib/utils/io_utils/opinions.py +0 -39
- arekit/contrib/utils/io_utils/samples.py +0 -78
- arekit/contrib/utils/lexicons/__init__.py +0 -0
- arekit/contrib/utils/lexicons/lexicon.py +0 -43
- arekit/contrib/utils/lexicons/relation.py +0 -45
- arekit/contrib/utils/lexicons/rusentilex.py +0 -34
- arekit/contrib/utils/nn/__init__.py +0 -0
- arekit/contrib/utils/nn/rows.py +0 -83
- arekit/contrib/utils/pipelines/items/sampling/bert.py +0 -5
- arekit/contrib/utils/pipelines/items/text/terms_splitter.py +0 -10
- arekit/contrib/utils/pipelines/items/to_output.py +0 -101
- arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/doc_ops.py +0 -27
- arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +0 -59
- arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +0 -60
- arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/doc_ops.py +0 -29
- arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +0 -59
- arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +0 -79
- arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/doc_ops.py +0 -56
- arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +0 -19
- arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +0 -58
- arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/doc_ops.py +0 -21
- arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +0 -100
- arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/doc_ops.py +0 -29
- arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +0 -62
- arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +0 -175
- arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +0 -50
- arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +0 -88
- arekit/contrib/utils/resources.py +0 -26
- arekit/contrib/utils/sources/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +0 -63
- arekit/contrib/utils/utils_folding.py +0 -19
- arekit/download_data.py +0 -11
- arekit-0.23.1.dist-info/METADATA +0 -23
- arekit-0.23.1.dist-info/RECORD +0 -403
- /arekit/common/{data/row_ids → docs}/__init__.py +0 -0
- /arekit/common/{folding → docs/parsed}/__init__.py +0 -0
- /arekit/common/{news → docs/parsed/providers}/__init__.py +0 -0
- /arekit/common/{news → docs}/parsed/term_position.py +0 -0
- /arekit/common/{news/parsed → service}/__init__.py +0 -0
- /arekit/{common/news/parsed/providers → contrib/utils/data/doc_provider}/__init__.py +0 -0
- {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/LICENSE +0 -0
- {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
from arekit.common.entities.base import Entity
|
|
2
|
+
from arekit.common.docs.entity import DocumentEntity
|
|
3
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BaseParsedDocumentServiceProvider(object):
|
|
7
|
+
|
|
8
|
+
def __init__(self, entity_index_func=None):
|
|
9
|
+
""" Outside enity indexing function
|
|
10
|
+
entity_index_func: provides id for a given entity, i.e.
|
|
11
|
+
func(entity) -> int (id)
|
|
12
|
+
"""
|
|
13
|
+
assert(callable(entity_index_func) or entity_index_func is None)
|
|
14
|
+
self._doc_entities = None
|
|
15
|
+
self.__entity_map = {}
|
|
16
|
+
self.__entity_index_func = entity_index_func
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def Name(self):
|
|
20
|
+
raise NotImplementedError()
|
|
21
|
+
|
|
22
|
+
def init_parsed_doc(self, parsed_doc):
|
|
23
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
24
|
+
|
|
25
|
+
def __iter_childs_and_root_node(entity):
|
|
26
|
+
""" Note: Entity has childs and we would like to iterate over childs
|
|
27
|
+
to conider them as well as keep the root Node.
|
|
28
|
+
"""
|
|
29
|
+
# We first add childs.
|
|
30
|
+
for child_entity in entity.iter_childs():
|
|
31
|
+
yield child_entity, True
|
|
32
|
+
|
|
33
|
+
# Return Root node.
|
|
34
|
+
yield entity, False
|
|
35
|
+
|
|
36
|
+
self._doc_entities = []
|
|
37
|
+
self.__entity_map.clear()
|
|
38
|
+
|
|
39
|
+
current_id = 0
|
|
40
|
+
for _, entity in enumerate(parsed_doc.iter_entities()):
|
|
41
|
+
|
|
42
|
+
child_doc_entities = []
|
|
43
|
+
for tree_entity, is_child in __iter_childs_and_root_node(entity):
|
|
44
|
+
|
|
45
|
+
doc_entity = DocumentEntity(id_in_doc=current_id,
|
|
46
|
+
value=tree_entity.Value,
|
|
47
|
+
e_type=tree_entity.Type,
|
|
48
|
+
display_value=tree_entity.DisplayValue,
|
|
49
|
+
childs=None if is_child else child_doc_entities,
|
|
50
|
+
group_index=tree_entity.GroupIndex)
|
|
51
|
+
current_id += 1
|
|
52
|
+
|
|
53
|
+
if is_child:
|
|
54
|
+
child_doc_entities.append(doc_entity)
|
|
55
|
+
|
|
56
|
+
self._doc_entities.append(doc_entity)
|
|
57
|
+
|
|
58
|
+
if self.__entity_index_func is not None:
|
|
59
|
+
self.__entity_map[self.__entity_index_func(tree_entity)] = doc_entity
|
|
60
|
+
|
|
61
|
+
def get_document_entity(self, entity):
|
|
62
|
+
""" Maps entity to the related one with DocumentEntity type
|
|
63
|
+
"""
|
|
64
|
+
assert(isinstance(entity, Entity))
|
|
65
|
+
return self.__entity_map[self.__entity_index_func(entity)]
|
|
66
|
+
|
|
67
|
+
def contains_entity(self, entity):
|
|
68
|
+
return self.__entity_index_func(entity) in self.__entity_map
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
from arekit.common.labels.provider.base import BasePairLabelProvider
|
|
2
|
-
from arekit.common.
|
|
2
|
+
from arekit.common.docs.parsed.providers.base import BaseParsedDocumentServiceProvider
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
class BasePairProvider(
|
|
5
|
+
class BasePairProvider(BaseParsedDocumentServiceProvider):
|
|
6
6
|
|
|
7
7
|
@property
|
|
8
8
|
def Name(self):
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
2
|
|
|
3
3
|
from arekit.common.entities.base import Entity
|
|
4
|
-
from arekit.common.
|
|
5
|
-
from arekit.common.
|
|
6
|
-
from arekit.common.
|
|
7
|
-
from arekit.common.
|
|
4
|
+
from arekit.common.docs.entity import DocumentEntity
|
|
5
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
6
|
+
from arekit.common.docs.parsed.providers.base import BaseParsedDocumentServiceProvider
|
|
7
|
+
from arekit.common.docs.parsed.term_position import TermPositionTypes, TermPosition
|
|
8
8
|
from arekit.common.text_opinions.base import TextOpinion
|
|
9
9
|
|
|
10
10
|
|
|
@@ -30,18 +30,19 @@ class DistanceType(Enum):
|
|
|
30
30
|
return TermPositionTypes.SentenceIndex
|
|
31
31
|
|
|
32
32
|
|
|
33
|
-
class EntityServiceProvider(
|
|
33
|
+
class EntityServiceProvider(BaseParsedDocumentServiceProvider):
|
|
34
34
|
""" This class provides a helper functions for TextOpinions, which become a part of TextOpinionCollection.
|
|
35
35
|
The latter is important because of the dependency from Owner.
|
|
36
36
|
We utilize 'extract' prefix in methods to emphasize that these are methods of helper.
|
|
37
37
|
|
|
38
38
|
Wrapper over:
|
|
39
|
-
parsed
|
|
39
|
+
parsed doc, positions, text_opinions
|
|
40
40
|
"""
|
|
41
41
|
|
|
42
42
|
NAME = "entity-service-provider"
|
|
43
43
|
|
|
44
44
|
def __init__(self, entity_index_func):
|
|
45
|
+
assert(callable(entity_index_func))
|
|
45
46
|
super(EntityServiceProvider, self).__init__(entity_index_func=entity_index_func)
|
|
46
47
|
# Initialize API.
|
|
47
48
|
self.__iter_raw_terms_func = None
|
|
@@ -52,11 +53,11 @@ class EntityServiceProvider(BaseParsedNewsServiceProvider):
|
|
|
52
53
|
def Name(self):
|
|
53
54
|
return self.NAME
|
|
54
55
|
|
|
55
|
-
def
|
|
56
|
-
super(EntityServiceProvider, self).
|
|
57
|
-
assert(isinstance(
|
|
58
|
-
self.__iter_raw_terms_func = lambda:
|
|
59
|
-
self.
|
|
56
|
+
def init_parsed_doc(self, parsed_doc):
|
|
57
|
+
super(EntityServiceProvider, self).init_parsed_doc(parsed_doc)
|
|
58
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
59
|
+
self.__iter_raw_terms_func = lambda: parsed_doc.iter_terms(filter_func=None, term_only=False)
|
|
60
|
+
self.__entity_positions = self.__calculate_entity_positions()
|
|
60
61
|
|
|
61
62
|
# region public 'extract' methods
|
|
62
63
|
|
|
@@ -146,25 +147,29 @@ class EntityServiceProvider(BaseParsedNewsServiceProvider):
|
|
|
146
147
|
assert(end_type == EntityEndType.Source or end_type == EntityEndType.Target)
|
|
147
148
|
return text_opinion.SourceId if end_type == EntityEndType.Source else text_opinion.TargetId
|
|
148
149
|
|
|
149
|
-
def __init_entity_positions(self):
|
|
150
|
-
self.__entity_positions = self.__calculate_entity_positions()
|
|
151
|
-
|
|
152
150
|
def __calculate_entity_positions(self):
|
|
153
151
|
""" Note: here we consider the same order as in self._entities.
|
|
154
152
|
"""
|
|
155
|
-
|
|
156
|
-
t_ind_in_doc = 0
|
|
153
|
+
t_ind_in_doc = -1
|
|
157
154
|
|
|
155
|
+
positions = {}
|
|
158
156
|
for s_ind, t_ind_in_sent, term in self.__iter_raw_terms_func():
|
|
159
157
|
|
|
160
|
-
if isinstance(term, Entity):
|
|
161
|
-
position = TermPosition(term_ind_in_doc=t_ind_in_doc,
|
|
162
|
-
term_ind_in_sent=t_ind_in_sent,
|
|
163
|
-
s_ind=s_ind)
|
|
164
|
-
positions.append(position)
|
|
165
|
-
|
|
166
158
|
t_ind_in_doc += 1
|
|
167
159
|
|
|
160
|
+
if not isinstance(term, Entity):
|
|
161
|
+
continue
|
|
162
|
+
|
|
163
|
+
# We consider that entities within a single tree has the same positions.
|
|
164
|
+
for tree_entity in list(term.iter_childs()) + [term]:
|
|
165
|
+
|
|
166
|
+
key = self.get_document_entity(tree_entity).IdInDocument
|
|
167
|
+
assert(key not in positions)
|
|
168
|
+
|
|
169
|
+
positions[key] = TermPosition(term_ind_in_doc=t_ind_in_doc,
|
|
170
|
+
term_ind_in_sent=t_ind_in_sent,
|
|
171
|
+
s_ind=s_ind)
|
|
172
|
+
|
|
168
173
|
return positions
|
|
169
174
|
|
|
170
175
|
# endregion
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from arekit.common.entities.base import Entity
|
|
2
|
-
from arekit.common.
|
|
2
|
+
from arekit.common.docs.parsed.providers.base_pairs import BasePairProvider
|
|
3
3
|
from arekit.common.opinions.base import Opinion
|
|
4
4
|
|
|
5
5
|
|
|
@@ -17,4 +17,4 @@ class OpinionPairsProvider(BasePairProvider):
|
|
|
17
17
|
|
|
18
18
|
return Opinion(source_value=source_entity.Value,
|
|
19
19
|
target_value=target_entity.Value,
|
|
20
|
-
|
|
20
|
+
label=label)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
3
|
from arekit.common.entities.collection import EntityCollection
|
|
4
|
-
from arekit.common.
|
|
5
|
-
from arekit.common.
|
|
4
|
+
from arekit.common.docs.entity import DocumentEntity
|
|
5
|
+
from arekit.common.docs.parsed.providers.base_pairs import BasePairProvider
|
|
6
6
|
from arekit.common.opinions.base import Opinion
|
|
7
7
|
from arekit.common.text_opinions.base import TextOpinion
|
|
8
8
|
from arekit.common.labels.provider.constant import ConstantLabelProvider
|
|
@@ -36,9 +36,9 @@ class TextOpinionPairsProvider(BasePairProvider):
|
|
|
36
36
|
label=label,
|
|
37
37
|
text_opinion_id=None)
|
|
38
38
|
|
|
39
|
-
def
|
|
40
|
-
super(TextOpinionPairsProvider, self).
|
|
41
|
-
self.__doc_id =
|
|
39
|
+
def init_parsed_doc(self, parsed_doc):
|
|
40
|
+
super(TextOpinionPairsProvider, self).init_parsed_doc(parsed_doc)
|
|
41
|
+
self.__doc_id = parsed_doc.RelatedDocID
|
|
42
42
|
self.__entities_collection = EntityCollection(
|
|
43
43
|
entities=list(self._doc_entities),
|
|
44
44
|
value_to_group_id_func=self.__value_to_group_id_func)
|
|
@@ -68,7 +68,7 @@ class TextOpinionPairsProvider(BasePairProvider):
|
|
|
68
68
|
return
|
|
69
69
|
yield
|
|
70
70
|
|
|
71
|
-
label_provider = ConstantLabelProvider(label_instance=opinion.
|
|
71
|
+
label_provider = ConstantLabelProvider(label_instance=opinion.Label)
|
|
72
72
|
|
|
73
73
|
pairs_it = self._iter_from_entities(src_entity_doc_ids=list(map(lambda e: e.IdInDocument, source_entities)),
|
|
74
74
|
tgt_entity_doc_ids=list(map(lambda e: e.IdInDocument, target_entities)),
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
2
|
+
from arekit.common.docs.parsed.providers.base import BaseParsedDocumentServiceProvider
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class ParsedDocumentService(object):
|
|
6
|
+
""" Represents a collection of providers, combined with the parsed doc.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
def __init__(self, parsed_doc, providers):
|
|
10
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
11
|
+
assert(isinstance(providers, list))
|
|
12
|
+
self.__parsed_doc = parsed_doc
|
|
13
|
+
self.__providers = {}
|
|
14
|
+
|
|
15
|
+
for provider in providers:
|
|
16
|
+
assert(isinstance(provider, BaseParsedDocumentServiceProvider))
|
|
17
|
+
assert(provider.Name not in self.__providers)
|
|
18
|
+
|
|
19
|
+
# Link provider with the related name.
|
|
20
|
+
self.__providers[provider.Name] = provider
|
|
21
|
+
|
|
22
|
+
# Post initialize with the related parsed doc.
|
|
23
|
+
provider.init_parsed_doc(self.__parsed_doc)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def ParsedDocument(self):
|
|
28
|
+
return self.__parsed_doc
|
|
29
|
+
|
|
30
|
+
def get_provider(self, name):
|
|
31
|
+
return self.__providers[name]
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
from tqdm import tqdm
|
|
2
|
+
from arekit.common.docs.base import Document
|
|
3
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
4
|
+
from arekit.common.pipeline.base import BasePipelineLauncher
|
|
5
|
+
from arekit.common.pipeline.batching import BatchingPipelineLauncher
|
|
6
|
+
from arekit.common.pipeline.context import PipelineContext
|
|
7
|
+
from arekit.common.pipeline.utils import BatchIterator
|
|
8
|
+
from arekit.common.text.parsed import BaseParsedText
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DocumentParsers(object):
|
|
12
|
+
|
|
13
|
+
@staticmethod
|
|
14
|
+
def parse(doc, pipeline_items, parent_ppl_ctx=None, src_key="input", show_progress=False):
|
|
15
|
+
""" This document parser is based on single text parts (sentences)
|
|
16
|
+
that passes sequentially through the pipeline of transformations.
|
|
17
|
+
"""
|
|
18
|
+
assert(isinstance(doc, Document))
|
|
19
|
+
assert(isinstance(pipeline_items, list))
|
|
20
|
+
assert(isinstance(parent_ppl_ctx, PipelineContext) or parent_ppl_ctx is None)
|
|
21
|
+
|
|
22
|
+
parsed_sentences = []
|
|
23
|
+
|
|
24
|
+
data_it = range(doc.SentencesCount)
|
|
25
|
+
progress_it = tqdm(data_it, disable=not show_progress)
|
|
26
|
+
|
|
27
|
+
for sent_ind in progress_it:
|
|
28
|
+
|
|
29
|
+
# Composing the context from a single sentence.
|
|
30
|
+
ctx = PipelineContext({src_key: doc.get_sentence(sent_ind)}, parent_ctx=parent_ppl_ctx)
|
|
31
|
+
|
|
32
|
+
# Apply all the operations.
|
|
33
|
+
BasePipelineLauncher.run(pipeline=pipeline_items, pipeline_ctx=ctx, src_key=src_key)
|
|
34
|
+
|
|
35
|
+
# Collecting the result.
|
|
36
|
+
parsed_sentences.append(BaseParsedText(terms=ctx.provide("result")))
|
|
37
|
+
|
|
38
|
+
return ParsedDocument(doc_id=doc.ID, parsed_sentences=parsed_sentences)
|
|
39
|
+
|
|
40
|
+
@staticmethod
|
|
41
|
+
def parse_batch(doc, pipeline_items, batch_size, parent_ppl_ctx=None, src_key="input", show_progress=False):
|
|
42
|
+
""" This document parser is based on batch of sentences.
|
|
43
|
+
"""
|
|
44
|
+
assert(isinstance(batch_size, int) and batch_size > 0)
|
|
45
|
+
assert(isinstance(doc, Document))
|
|
46
|
+
assert(isinstance(pipeline_items, list))
|
|
47
|
+
assert(isinstance(parent_ppl_ctx, PipelineContext) or parent_ppl_ctx is None)
|
|
48
|
+
|
|
49
|
+
parsed_sentences = []
|
|
50
|
+
|
|
51
|
+
data_it = BatchIterator(data_iter=iter(range(doc.SentencesCount)), batch_size=batch_size)
|
|
52
|
+
progress_it = tqdm(data_it, total=round(doc.SentencesCount / batch_size), disable=not show_progress)
|
|
53
|
+
|
|
54
|
+
for batch in progress_it:
|
|
55
|
+
|
|
56
|
+
# Composing the context from a single sentence.
|
|
57
|
+
ctx = PipelineContext({src_key: [doc.get_sentence(s_ind) for s_ind in batch]},
|
|
58
|
+
parent_ctx=parent_ppl_ctx)
|
|
59
|
+
|
|
60
|
+
# Apply all the operations.
|
|
61
|
+
BatchingPipelineLauncher.run(pipeline=pipeline_items, pipeline_ctx=ctx, src_key=src_key)
|
|
62
|
+
|
|
63
|
+
# Collecting the result.
|
|
64
|
+
parsed_sentences += [BaseParsedText(terms=result) for result in ctx.provide("result")]
|
|
65
|
+
|
|
66
|
+
return ParsedDocument(doc_id=doc.ID, parsed_sentences=parsed_sentences)
|
arekit/common/entities/base.py
CHANGED
|
@@ -1,14 +1,16 @@
|
|
|
1
1
|
class Entity(object):
|
|
2
2
|
|
|
3
|
-
def __init__(self, value, e_type, display_value=None, group_index=None):
|
|
3
|
+
def __init__(self, value, e_type, childs=None, display_value=None, group_index=None):
|
|
4
4
|
assert(isinstance(value, str) and len(value) > 0)
|
|
5
5
|
assert(isinstance(e_type, str) or e_type is None)
|
|
6
6
|
assert(isinstance(display_value, str) or display_value is None)
|
|
7
7
|
assert(isinstance(group_index, int) or group_index is None)
|
|
8
|
-
|
|
8
|
+
assert(isinstance(childs, list) or childs is None)
|
|
9
|
+
self.__value = value
|
|
9
10
|
self.__type = e_type
|
|
10
11
|
self.__display_value = display_value
|
|
11
12
|
self.__group_index = group_index
|
|
13
|
+
self.__childs = childs
|
|
12
14
|
|
|
13
15
|
@property
|
|
14
16
|
def GroupIndex(self):
|
|
@@ -40,3 +42,10 @@ class Entity(object):
|
|
|
40
42
|
assert(isinstance(value, int) and value >= -1)
|
|
41
43
|
assert(self.__group_index is None)
|
|
42
44
|
self.__group_index = value
|
|
45
|
+
|
|
46
|
+
def iter_childs(self):
|
|
47
|
+
if self.__childs is None:
|
|
48
|
+
return
|
|
49
|
+
yield
|
|
50
|
+
for child in self.__childs:
|
|
51
|
+
yield child
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
from collections.abc import Iterable
|
|
2
2
|
from arekit.common.frames.variants.base import FrameVariant
|
|
3
3
|
|
|
4
4
|
|
|
@@ -23,7 +23,7 @@ class FrameVariantsCollection(object):
|
|
|
23
23
|
# region public methods
|
|
24
24
|
|
|
25
25
|
def fill_from_iterable(self, variants_with_id, overwrite_existed_variant, raise_error_on_existed_variant):
|
|
26
|
-
assert(isinstance(variants_with_id,
|
|
26
|
+
assert(isinstance(variants_with_id, Iterable))
|
|
27
27
|
assert(isinstance(overwrite_existed_variant, bool))
|
|
28
28
|
assert(isinstance(raise_error_on_existed_variant, bool))
|
|
29
29
|
assert(len(self.__variants) == 0)
|
arekit/common/linkage/base.py
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
import
|
|
1
|
+
from collections.abc import Iterable
|
|
2
2
|
|
|
3
3
|
|
|
4
4
|
class LinkedDataWrapper(object):
|
|
5
5
|
|
|
6
6
|
def __init__(self, linked_data):
|
|
7
|
-
assert(isinstance(linked_data,
|
|
7
|
+
assert(isinstance(linked_data, Iterable))
|
|
8
8
|
self.__linked_data = list(linked_data)
|
|
9
9
|
self.__tag = None
|
|
10
10
|
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from arekit.common.linkage.base import LinkedDataWrapper
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class MetaEmptyLinkedDataWrapper(LinkedDataWrapper):
|
|
5
|
+
""" This is a placeholder data-wrapper utilized for passing system information
|
|
6
|
+
while iterating through the data pipelines.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
def __init__(self, doc_id, meta_data=None):
|
|
10
|
+
""" meta_data:
|
|
11
|
+
optional parameter which serves any information need in further.
|
|
12
|
+
"""
|
|
13
|
+
super(MetaEmptyLinkedDataWrapper, self).__init__([])
|
|
14
|
+
self.__doc_id = doc_id
|
|
15
|
+
self.__meta_data = meta_data
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def RelatedDocID(self):
|
|
19
|
+
return self.__doc_id
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def MetaData(self):
|
|
23
|
+
return self.__meta_data
|
|
@@ -15,8 +15,8 @@ class TextOpinionsLinkage(LinkedDataWrapper):
|
|
|
15
15
|
return self.First.DocID
|
|
16
16
|
|
|
17
17
|
def get_linked_label(self):
|
|
18
|
-
return self.First.
|
|
18
|
+
return self.First.Label
|
|
19
19
|
|
|
20
20
|
def _get_data_label(self, item):
|
|
21
21
|
assert(isinstance(item, TextOpinion))
|
|
22
|
-
return item.
|
|
22
|
+
return item.Label
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from arekit.common.entities.types import OpinionEntityType
|
|
2
2
|
from arekit.common.labels.provider.base import BasePairLabelProvider
|
|
3
|
-
from arekit.common.
|
|
4
|
-
from arekit.common.
|
|
5
|
-
from arekit.common.
|
|
6
|
-
from arekit.common.
|
|
3
|
+
from arekit.common.docs.entity import DocumentEntity
|
|
4
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
5
|
+
from arekit.common.docs.parsed.providers.entity_service import EntityServiceProvider, DistanceType
|
|
6
|
+
from arekit.common.docs.parsed.providers.opinion_pairs import OpinionPairsProvider
|
|
7
7
|
from arekit.common.opinions.annot.algo.base import BaseOpinionAnnotationAlgorithm
|
|
8
8
|
from arekit.common.opinions.base import Opinion
|
|
9
9
|
|
|
@@ -16,7 +16,8 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
|
|
|
16
16
|
[1] Extracting Sentiment Attitudes from Analytical Texts https://arxiv.org/pdf/1808.08932.pdf
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
|
-
def __init__(self, dist_in_terms_bound, label_provider, dist_in_sents=0,
|
|
19
|
+
def __init__(self, dist_in_terms_bound, label_provider, entity_index_func, dist_in_sents=0,
|
|
20
|
+
is_entity_ignored_func=None):
|
|
20
21
|
"""
|
|
21
22
|
dist_in_terms_bound: int
|
|
22
23
|
max allowed distance in term (less than passed value)
|
|
@@ -25,6 +26,7 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
|
|
|
25
26
|
"""
|
|
26
27
|
assert(isinstance(dist_in_terms_bound, int) or dist_in_terms_bound is None)
|
|
27
28
|
assert(isinstance(label_provider, BasePairLabelProvider))
|
|
29
|
+
assert(callable(entity_index_func))
|
|
28
30
|
assert(isinstance(dist_in_sents, int))
|
|
29
31
|
assert(callable(is_entity_ignored_func) or is_entity_ignored_func is None)
|
|
30
32
|
|
|
@@ -32,6 +34,7 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
|
|
|
32
34
|
self.__dist_in_terms_bound = dist_in_terms_bound
|
|
33
35
|
self.__dist_in_sents = dist_in_sents
|
|
34
36
|
self.__is_entity_ignored_func = is_entity_ignored_func
|
|
37
|
+
self.__entity_index_func = entity_index_func
|
|
35
38
|
|
|
36
39
|
# region private methods
|
|
37
40
|
|
|
@@ -68,7 +71,7 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
|
|
|
68
71
|
if existed_opinions is not None:
|
|
69
72
|
o = Opinion(source_value=e1.Value,
|
|
70
73
|
target_value=e2.Value,
|
|
71
|
-
|
|
74
|
+
label=self.__label_provider.provide(source=e1, target=e2))
|
|
72
75
|
if existed_opinions.has_synonymous_opinion(opinion=o):
|
|
73
76
|
return
|
|
74
77
|
|
|
@@ -76,8 +79,8 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
|
|
|
76
79
|
|
|
77
80
|
# endregion
|
|
78
81
|
|
|
79
|
-
def iter_opinions(self,
|
|
80
|
-
assert(isinstance(
|
|
82
|
+
def iter_opinions(self, parsed_doc, existed_opinions=None):
|
|
83
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
81
84
|
|
|
82
85
|
def __filter_pair_func(e1, e2):
|
|
83
86
|
key = self.__try_create_pair_key(entity_service=entity_service_provider,
|
|
@@ -87,11 +90,10 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
|
|
|
87
90
|
return key is not None
|
|
88
91
|
|
|
89
92
|
# Initialize providers.
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
entity_service_provider.init_parsed_news(parsed_news)
|
|
93
|
+
opinions_provider = OpinionPairsProvider(entity_index_func=self.__entity_index_func)
|
|
94
|
+
entity_service_provider = EntityServiceProvider(entity_index_func=self.__entity_index_func)
|
|
95
|
+
opinions_provider.init_parsed_doc(parsed_doc)
|
|
96
|
+
entity_service_provider.init_parsed_doc(parsed_doc)
|
|
95
97
|
|
|
96
98
|
return opinions_provider.iter_from_all(label_provider=self.__label_provider,
|
|
97
99
|
filter_func=__filter_pair_func)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from arekit.common.
|
|
1
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
2
2
|
from arekit.common.opinions.annot.algo.base import BaseOpinionAnnotationAlgorithm
|
|
3
3
|
|
|
4
4
|
|
|
@@ -11,6 +11,6 @@ class PredefinedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
|
|
|
11
11
|
assert(callable(get_opinions_by_doc_id_func))
|
|
12
12
|
self.__get_opinions_by_doc_id_func = get_opinions_by_doc_id_func
|
|
13
13
|
|
|
14
|
-
def iter_opinions(self,
|
|
15
|
-
assert(isinstance(
|
|
16
|
-
return self.__get_opinions_by_doc_id_func(
|
|
14
|
+
def iter_opinions(self, parsed_doc, existed_opinions=None):
|
|
15
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
16
|
+
return self.__get_opinions_by_doc_id_func(parsed_doc.RelatedDocID)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
|
-
from arekit.common.
|
|
3
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
4
4
|
from arekit.common.opinions.annot.algo.base import BaseOpinionAnnotationAlgorithm
|
|
5
5
|
from arekit.common.opinions.annot.base import BaseOpinionAnnotator
|
|
6
6
|
from arekit.common.opinions.collection import OpinionCollection
|
|
@@ -31,14 +31,14 @@ class AlgorithmBasedOpinionAnnotator(BaseOpinionAnnotator):
|
|
|
31
31
|
|
|
32
32
|
# region private methods
|
|
33
33
|
|
|
34
|
-
def _annot_collection_core(self,
|
|
35
|
-
assert(isinstance(
|
|
34
|
+
def _annot_collection_core(self, parsed_doc):
|
|
35
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
36
36
|
|
|
37
|
-
opinions = self.__get_existed_opinions_func(
|
|
37
|
+
opinions = self.__get_existed_opinions_func(parsed_doc.RelatedDocID)
|
|
38
38
|
assert(isinstance(opinions, OpinionCollection) or opinions is None)
|
|
39
39
|
|
|
40
40
|
annotated_opinions_it = self.__annot_algo.iter_opinions(
|
|
41
|
-
|
|
41
|
+
parsed_doc=parsed_doc, existed_opinions=opinions)
|
|
42
42
|
|
|
43
43
|
collection = self.__create_empty_collection_func()
|
|
44
44
|
assert(isinstance(collection, OpinionCollection))
|
|
@@ -4,12 +4,12 @@ class BaseOpinionAnnotator(object):
|
|
|
4
4
|
using OpinOps and DocOps API.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
-
def _annot_collection_core(self,
|
|
7
|
+
def _annot_collection_core(self, parsed_doc):
|
|
8
8
|
raise NotImplementedError
|
|
9
9
|
|
|
10
10
|
# region public methods
|
|
11
11
|
|
|
12
|
-
def annotate_collection(self,
|
|
13
|
-
return self._annot_collection_core(
|
|
12
|
+
def annotate_collection(self, parsed_doc):
|
|
13
|
+
return self._annot_collection_core(parsed_doc=parsed_doc)
|
|
14
14
|
|
|
15
15
|
# endregion
|
arekit/common/opinions/base.py
CHANGED
|
@@ -7,13 +7,13 @@ class Opinion(object):
|
|
|
7
7
|
""" Source opinion description
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
def __init__(self, source_value, target_value,
|
|
10
|
+
def __init__(self, source_value, target_value, label):
|
|
11
11
|
assert(isinstance(source_value, str))
|
|
12
12
|
assert(isinstance(target_value, str))
|
|
13
|
-
assert(isinstance(
|
|
14
|
-
self.__source_value = source_value
|
|
15
|
-
self.__target_value = target_value
|
|
16
|
-
self.
|
|
13
|
+
assert(isinstance(label, Label))
|
|
14
|
+
self.__source_value = source_value
|
|
15
|
+
self.__target_value = target_value
|
|
16
|
+
self.__label = label
|
|
17
17
|
self.__tag = None
|
|
18
18
|
|
|
19
19
|
# region properties
|
|
@@ -27,8 +27,8 @@ class Opinion(object):
|
|
|
27
27
|
return self.__target_value
|
|
28
28
|
|
|
29
29
|
@property
|
|
30
|
-
def
|
|
31
|
-
return self.
|
|
30
|
+
def Label(self):
|
|
31
|
+
return self.__label
|
|
32
32
|
|
|
33
33
|
@property
|
|
34
34
|
def Tag(self):
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
from collections.abc import Iterable
|
|
2
2
|
|
|
3
3
|
from arekit.common import log_utils
|
|
4
4
|
from arekit.common.labels.base import Label
|
|
@@ -23,7 +23,7 @@ class OpinionCollection(object):
|
|
|
23
23
|
raise_exception_on_duplicates: bool
|
|
24
24
|
denotes whether there is a need to fire exception for duplicates in opinions list.
|
|
25
25
|
"""
|
|
26
|
-
assert(isinstance(opinions,
|
|
26
|
+
assert(isinstance(opinions, Iterable) or isinstance(opinions, type(None)))
|
|
27
27
|
assert(isinstance(synonyms, SynonymsCollection))
|
|
28
28
|
assert(isinstance(error_on_duplicates, bool))
|
|
29
29
|
assert(isinstance(error_on_synonym_end_missed, bool))
|
|
@@ -76,7 +76,7 @@ class OpinionCollection(object):
|
|
|
76
76
|
f_o = self.__by_synonyms[s_id]
|
|
77
77
|
if label is None:
|
|
78
78
|
return f_o
|
|
79
|
-
elif f_o.
|
|
79
|
+
elif f_o.Label == label:
|
|
80
80
|
return f_o
|
|
81
81
|
else:
|
|
82
82
|
return None
|