arekit 0.25.1__py3-none-any.whl → 0.25.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arekit/common/context/terms_mapper.py +5 -2
- arekit/common/data/input/providers/rows/samples.py +8 -12
- arekit/common/data/input/providers/sample/cropped.py +4 -3
- arekit/common/data/input/terms_mapper.py +4 -8
- arekit/common/data/storages/base.py +0 -3
- arekit/common/docs/entities_grouping.py +5 -3
- arekit/common/docs/parsed/base.py +3 -3
- arekit/common/docs/parsed/providers/base.py +3 -5
- arekit/common/docs/parsed/providers/entity_service.py +7 -28
- arekit/common/docs/parsed/providers/opinion_pairs.py +6 -6
- arekit/common/docs/parsed/providers/text_opinion_pairs.py +4 -4
- arekit/common/docs/parsed/service.py +2 -2
- arekit/common/model/labeling/single.py +7 -3
- arekit/common/opinions/annot/algo/pair_based.py +9 -5
- arekit/common/pipeline/base.py +0 -2
- arekit/common/pipeline/batching.py +0 -3
- arekit/contrib/bert/input/providers/cropped_sample.py +2 -5
- arekit/contrib/bert/terms/mapper.py +2 -2
- arekit/contrib/prompt/sample.py +2 -6
- arekit/contrib/utils/bert/samplers.py +4 -2
- arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +8 -5
- arekit/contrib/utils/pipelines/text_opinion/extraction.py +12 -5
- {arekit-0.25.1.dist-info → arekit-0.25.2.dist-info}/METADATA +9 -6
- {arekit-0.25.1.dist-info → arekit-0.25.2.dist-info}/RECORD +28 -36
- {arekit-0.25.1.dist-info → arekit-0.25.2.dist-info}/WHEEL +1 -1
- arekit/common/experiment/__init__.py +0 -0
- arekit/common/experiment/api/__init__.py +0 -0
- arekit/common/experiment/api/base_samples_io.py +0 -20
- arekit/common/experiment/data_type.py +0 -17
- arekit/contrib/utils/data/storages/pandas_based.py +0 -108
- arekit/contrib/utils/entities/formatters/str_display.py +0 -11
- arekit/contrib/utils/pipelines/items/text/entities_default.py +0 -23
- arekit/contrib/utils/processing/__init__.py +0 -0
- {arekit-0.25.1.data → arekit-0.25.2.data}/data/logo.png +0 -0
- {arekit-0.25.1.dist-info → arekit-0.25.2.dist-info}/LICENSE +0 -0
- {arekit-0.25.1.dist-info → arekit-0.25.2.dist-info}/top_level.txt +0 -0
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
from collections.abc import Iterable
|
|
2
2
|
|
|
3
3
|
from arekit.common.context.token import Token
|
|
4
|
-
from arekit.common.entities.base import Entity
|
|
5
4
|
from arekit.common.frames.text_variant import TextFrameVariant
|
|
6
5
|
|
|
7
6
|
|
|
8
7
|
class TextTermsMapper(object):
|
|
9
8
|
|
|
9
|
+
def __init__(self, is_entity_func):
|
|
10
|
+
assert(callable(is_entity_func))
|
|
11
|
+
self.__is_entity_func = is_entity_func
|
|
12
|
+
|
|
10
13
|
def iter_mapped(self, terms):
|
|
11
14
|
""" Performs mapping operation of each terms in a sequence
|
|
12
15
|
"""
|
|
@@ -22,7 +25,7 @@ class TextTermsMapper(object):
|
|
|
22
25
|
m_term = self.map_token(i, term)
|
|
23
26
|
elif isinstance(term, TextFrameVariant):
|
|
24
27
|
m_term = self.map_text_frame_variant(i, term)
|
|
25
|
-
elif
|
|
28
|
+
elif self.__is_entity_func(term):
|
|
26
29
|
m_term = self.map_entity(i, term)
|
|
27
30
|
else:
|
|
28
31
|
raise Exception("Unsupported type {}".format(term))
|
|
@@ -9,13 +9,11 @@ from arekit.common.data.input.providers.label.multiple import MultipleLabelProvi
|
|
|
9
9
|
from arekit.common.data.input.providers.rows.base import BaseRowProvider
|
|
10
10
|
from arekit.common.data.input.providers.text.single import BaseSingleTextProvider
|
|
11
11
|
from arekit.common.data.rows_fmt import create_base_column_fmt
|
|
12
|
-
from arekit.common.entities.base import Entity
|
|
13
|
-
from arekit.common.labels.base import Label
|
|
14
|
-
|
|
15
|
-
from arekit.common.linkage.text_opinions import TextOpinionsLinkage
|
|
16
12
|
from arekit.common.docs.parsed.base import ParsedDocument
|
|
17
13
|
from arekit.common.docs.parsed.providers.entity_service import EntityEndType, EntityServiceProvider
|
|
18
14
|
from arekit.common.docs.parsed.term_position import TermPositionTypes
|
|
15
|
+
from arekit.common.labels.base import Label
|
|
16
|
+
from arekit.common.linkage.text_opinions import TextOpinionsLinkage
|
|
19
17
|
from arekit.common.text_opinions.base import TextOpinion
|
|
20
18
|
|
|
21
19
|
|
|
@@ -26,13 +24,15 @@ class BaseSampleRowProvider(BaseRowProvider):
|
|
|
26
24
|
""" Rows provider for samples storage.
|
|
27
25
|
"""
|
|
28
26
|
|
|
29
|
-
def __init__(self, label_provider, text_provider):
|
|
27
|
+
def __init__(self, is_entity_func, label_provider, text_provider):
|
|
28
|
+
assert(callable(is_entity_func))
|
|
30
29
|
assert(isinstance(label_provider, LabelProvider))
|
|
31
30
|
assert(isinstance(text_provider, BaseSingleTextProvider))
|
|
32
31
|
super(BaseSampleRowProvider, self).__init__()
|
|
33
32
|
|
|
34
33
|
self._label_provider = label_provider
|
|
35
34
|
self.__text_provider = text_provider
|
|
35
|
+
self.__is_entity_func = is_entity_func
|
|
36
36
|
self.__instances_provider = self.__create_instances_provider(label_provider)
|
|
37
37
|
self.__store_labels = None
|
|
38
38
|
self._val_fmt = create_base_column_fmt(fmt_type="writer")
|
|
@@ -65,7 +65,7 @@ class BaseSampleRowProvider(BaseRowProvider):
|
|
|
65
65
|
parsed_doc=parsed_doc, sentence_ind=sentence_ind, s_ind=s_ind, t_ind=t_ind)
|
|
66
66
|
|
|
67
67
|
# Entity indices from the related context.
|
|
68
|
-
entities = list(filter(
|
|
68
|
+
entities = list(filter(self.__is_entity_func, sentence_terms))
|
|
69
69
|
|
|
70
70
|
# Values mapping.
|
|
71
71
|
vm = {
|
|
@@ -76,7 +76,7 @@ class BaseSampleRowProvider(BaseRowProvider):
|
|
|
76
76
|
const.SENT_IND: sentence_ind,
|
|
77
77
|
const.ENTITY_VALUES: entities,
|
|
78
78
|
const.ENTITY_TYPES: entities,
|
|
79
|
-
const.ENTITIES: [str(i) for i, t in enumerate(sentence_terms) if
|
|
79
|
+
const.ENTITIES: [str(i) for i, t in enumerate(sentence_terms) if self.__is_entity_func(t)],
|
|
80
80
|
const.S_IND: actual_s_ind,
|
|
81
81
|
const.T_IND: actual_t_ind,
|
|
82
82
|
const.LABEL_UINT: None,
|
|
@@ -143,9 +143,6 @@ class BaseSampleRowProvider(BaseRowProvider):
|
|
|
143
143
|
|
|
144
144
|
def __provide_rows(self, row_dict, parsed_doc, entity_service,
|
|
145
145
|
text_opinion_linkage, index_in_linked, idle_mode):
|
|
146
|
-
"""
|
|
147
|
-
Providing Rows depending on row_id_formatter type
|
|
148
|
-
"""
|
|
149
146
|
assert(isinstance(parsed_doc, ParsedDocument))
|
|
150
147
|
assert(isinstance(row_dict, OrderedDict))
|
|
151
148
|
assert(isinstance(text_opinion_linkage, TextOpinionsLinkage))
|
|
@@ -153,7 +150,6 @@ class BaseSampleRowProvider(BaseRowProvider):
|
|
|
153
150
|
etalon_label = self.__instances_provider.provide_label(text_opinion_linkage)
|
|
154
151
|
for instance in self.__instances_provider.iter_instances(text_opinion_linkage):
|
|
155
152
|
yield self.__create_row(row=row_dict,
|
|
156
|
-
row_id=0,
|
|
157
153
|
parsed_doc=parsed_doc,
|
|
158
154
|
entity_service=entity_service,
|
|
159
155
|
text_opinions_linkage=instance,
|
|
@@ -162,7 +158,7 @@ class BaseSampleRowProvider(BaseRowProvider):
|
|
|
162
158
|
etalon_label=etalon_label,
|
|
163
159
|
idle_mode=idle_mode)
|
|
164
160
|
|
|
165
|
-
def __create_row(self, row,
|
|
161
|
+
def __create_row(self, row, parsed_doc, entity_service, text_opinions_linkage,
|
|
166
162
|
index_in_linked, etalon_label, idle_mode):
|
|
167
163
|
"""
|
|
168
164
|
Composing row in following format:
|
|
@@ -8,10 +8,11 @@ class CroppedSampleRowProvider(BaseSampleRowProvider):
|
|
|
8
8
|
attitude inside.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
-
def __init__(self, crop_window_size, label_scaler,
|
|
11
|
+
def __init__(self, crop_window_size, label_scaler, **kwargs):
|
|
12
12
|
assert(isinstance(crop_window_size, int) and crop_window_size > 0)
|
|
13
|
-
super(CroppedSampleRowProvider, self).__init__(
|
|
14
|
-
|
|
13
|
+
super(CroppedSampleRowProvider, self).__init__(
|
|
14
|
+
label_provider=MultipleLabelProvider(label_scaler),
|
|
15
|
+
**kwargs)
|
|
15
16
|
self.__crop_window_size = crop_window_size
|
|
16
17
|
|
|
17
18
|
@staticmethod
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from arekit.common.context.terms_mapper import TextTermsMapper
|
|
2
2
|
from arekit.common.context.token import Token
|
|
3
|
-
from arekit.common.entities.base import Entity
|
|
4
3
|
from arekit.common.entities.str_fmt import StringEntitiesFormatter
|
|
5
4
|
from arekit.common.entities.types import OpinionEntityType
|
|
6
5
|
from arekit.common.frames.text_variant import TextFrameVariant
|
|
@@ -12,9 +11,12 @@ class OpinionContainingTextTermsMapper(TextTermsMapper):
|
|
|
12
11
|
The latter might be utilized with synonyms collection
|
|
13
12
|
"""
|
|
14
13
|
|
|
15
|
-
def __init__(self, entity_formatter):
|
|
14
|
+
def __init__(self, entity_formatter, entity_group_ind_func, **kwargs):
|
|
16
15
|
assert(isinstance(entity_formatter, StringEntitiesFormatter))
|
|
16
|
+
assert(callable(entity_group_ind_func))
|
|
17
|
+
super(OpinionContainingTextTermsMapper, self).__init__(**kwargs)
|
|
17
18
|
self.__entities_formatter = entity_formatter
|
|
19
|
+
self.__syn_group = entity_group_ind_func
|
|
18
20
|
self.__s_ind = None
|
|
19
21
|
self.__t_ind = None
|
|
20
22
|
self.__s_group = None
|
|
@@ -24,12 +26,6 @@ class OpinionContainingTextTermsMapper(TextTermsMapper):
|
|
|
24
26
|
def StringEntitiesFormatter(self):
|
|
25
27
|
return self.__entities_formatter
|
|
26
28
|
|
|
27
|
-
def __syn_group(self, entity):
|
|
28
|
-
""" Note: here we guarantee that entity has GroupIndex.
|
|
29
|
-
"""
|
|
30
|
-
assert(isinstance(entity, Entity))
|
|
31
|
-
return entity.GroupIndex if entity is not None else None
|
|
32
|
-
|
|
33
29
|
def set_s_ind(self, s_ind):
|
|
34
30
|
assert(isinstance(s_ind, int))
|
|
35
31
|
self.__s_ind = s_ind
|
|
@@ -1,17 +1,19 @@
|
|
|
1
|
-
from arekit.common.entities.base import Entity
|
|
2
1
|
from arekit.common.pipeline.items.base import BasePipelineItem
|
|
3
2
|
|
|
4
3
|
|
|
5
4
|
class EntitiesGroupingPipelineItem(BasePipelineItem):
|
|
6
5
|
|
|
7
|
-
def __init__(self, value_to_group_id_func, **kwargs):
|
|
6
|
+
def __init__(self, value_to_group_id_func, is_entity_func, **kwargs):
|
|
8
7
|
assert(callable(value_to_group_id_func))
|
|
8
|
+
assert(callable(is_entity_func))
|
|
9
9
|
super(EntitiesGroupingPipelineItem, self).__init__(**kwargs)
|
|
10
|
+
|
|
10
11
|
self.__value_to_group_id_func = value_to_group_id_func
|
|
12
|
+
self.__is_entity_func = is_entity_func
|
|
11
13
|
|
|
12
14
|
def apply_core(self, input_data, pipeline_ctx):
|
|
13
15
|
assert(isinstance(input_data, list))
|
|
14
16
|
|
|
15
|
-
for entity in filter(lambda term:
|
|
17
|
+
for entity in filter(lambda term: self.__is_entity_func(term), input_data):
|
|
16
18
|
group_index = self.__value_to_group_id_func(entity.Value)
|
|
17
19
|
entity.set_group_index(group_index)
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from collections.abc import Iterable
|
|
2
2
|
|
|
3
|
-
from arekit.common.entities.base import Entity
|
|
4
3
|
from arekit.common.text.enums import TermFormat
|
|
5
4
|
from arekit.common.text.parsed import BaseParsedText
|
|
6
5
|
|
|
@@ -73,8 +72,9 @@ class ParsedDocument(object):
|
|
|
73
72
|
assert(isinstance(s_ind, int))
|
|
74
73
|
return self.__parsed_sentences[s_ind]
|
|
75
74
|
|
|
76
|
-
def iter_entities(self):
|
|
77
|
-
|
|
75
|
+
def iter_entities(self, is_entity_func):
|
|
76
|
+
assert(callable(is_entity_func))
|
|
77
|
+
for entity in self.__iter_all_raw_terms(term_only=True, filter_func=is_entity_func):
|
|
78
78
|
yield entity
|
|
79
79
|
|
|
80
80
|
def iter_terms(self, filter_func=None, term_only=True):
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from arekit.common.entities.base import Entity
|
|
2
1
|
from arekit.common.docs.entity import DocumentEntity
|
|
3
2
|
from arekit.common.docs.parsed.base import ParsedDocument
|
|
4
3
|
|
|
@@ -6,7 +5,7 @@ from arekit.common.docs.parsed.base import ParsedDocument
|
|
|
6
5
|
class BaseParsedDocumentServiceProvider(object):
|
|
7
6
|
|
|
8
7
|
def __init__(self, entity_index_func=None):
|
|
9
|
-
""" Outside
|
|
8
|
+
""" Outside entity indexing function
|
|
10
9
|
entity_index_func: provides id for a given entity, i.e.
|
|
11
10
|
func(entity) -> int (id)
|
|
12
11
|
"""
|
|
@@ -19,7 +18,7 @@ class BaseParsedDocumentServiceProvider(object):
|
|
|
19
18
|
def Name(self):
|
|
20
19
|
raise NotImplementedError()
|
|
21
20
|
|
|
22
|
-
def init_parsed_doc(self, parsed_doc):
|
|
21
|
+
def init_parsed_doc(self, parsed_doc, is_entity_func):
|
|
23
22
|
assert(isinstance(parsed_doc, ParsedDocument))
|
|
24
23
|
|
|
25
24
|
def __iter_childs_and_root_node(entity):
|
|
@@ -37,7 +36,7 @@ class BaseParsedDocumentServiceProvider(object):
|
|
|
37
36
|
self.__entity_map.clear()
|
|
38
37
|
|
|
39
38
|
current_id = 0
|
|
40
|
-
for _, entity in enumerate(parsed_doc.iter_entities()):
|
|
39
|
+
for _, entity in enumerate(parsed_doc.iter_entities(is_entity_func=is_entity_func)):
|
|
41
40
|
|
|
42
41
|
child_doc_entities = []
|
|
43
42
|
for tree_entity, is_child in __iter_childs_and_root_node(entity):
|
|
@@ -61,7 +60,6 @@ class BaseParsedDocumentServiceProvider(object):
|
|
|
61
60
|
def get_document_entity(self, entity):
|
|
62
61
|
""" Maps entity to the related one with DocumentEntity type
|
|
63
62
|
"""
|
|
64
|
-
assert(isinstance(entity, Entity))
|
|
65
63
|
return self.__entity_map[self.__entity_index_func(entity)]
|
|
66
64
|
|
|
67
65
|
def contains_entity(self, entity):
|
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
from enum import Enum
|
|
2
2
|
|
|
3
|
-
from arekit.common.entities.base import Entity
|
|
4
3
|
from arekit.common.docs.entity import DocumentEntity
|
|
5
|
-
from arekit.common.docs.parsed.base import ParsedDocument
|
|
6
4
|
from arekit.common.docs.parsed.providers.base import BaseParsedDocumentServiceProvider
|
|
7
5
|
from arekit.common.docs.parsed.term_position import TermPositionTypes, TermPosition
|
|
8
6
|
from arekit.common.text_opinions.base import TextOpinion
|
|
@@ -41,9 +39,8 @@ class EntityServiceProvider(BaseParsedDocumentServiceProvider):
|
|
|
41
39
|
|
|
42
40
|
NAME = "entity-service-provider"
|
|
43
41
|
|
|
44
|
-
def __init__(self,
|
|
45
|
-
|
|
46
|
-
super(EntityServiceProvider, self).__init__(entity_index_func=entity_index_func)
|
|
42
|
+
def __init__(self, **kwargs):
|
|
43
|
+
super(EntityServiceProvider, self).__init__(**kwargs)
|
|
47
44
|
# Initialize API.
|
|
48
45
|
self.__iter_raw_terms_func = None
|
|
49
46
|
# Initialize entity positions.
|
|
@@ -53,24 +50,16 @@ class EntityServiceProvider(BaseParsedDocumentServiceProvider):
|
|
|
53
50
|
def Name(self):
|
|
54
51
|
return self.NAME
|
|
55
52
|
|
|
56
|
-
def init_parsed_doc(self, parsed_doc):
|
|
57
|
-
super(EntityServiceProvider, self).init_parsed_doc(parsed_doc)
|
|
58
|
-
assert(isinstance(parsed_doc, ParsedDocument))
|
|
53
|
+
def init_parsed_doc(self, parsed_doc, is_entity_func):
|
|
54
|
+
super(EntityServiceProvider, self).init_parsed_doc(parsed_doc=parsed_doc, is_entity_func=is_entity_func)
|
|
59
55
|
self.__iter_raw_terms_func = lambda: parsed_doc.iter_terms(filter_func=None, term_only=False)
|
|
60
|
-
self.__entity_positions = self.__calculate_entity_positions()
|
|
61
|
-
|
|
62
|
-
# region public 'extract' methods
|
|
63
|
-
|
|
64
|
-
def extract_entity_value(self, text_opinion, end_type):
|
|
65
|
-
return self.__extract_entity_value(text_opinion=text_opinion, end_type=end_type)
|
|
56
|
+
self.__entity_positions = self.__calculate_entity_positions(is_entity_func=is_entity_func)
|
|
66
57
|
|
|
67
58
|
def extract_entity_position(self, text_opinion, end_type, position_type=None):
|
|
68
59
|
return self.__get_entity_position(text_opinion=text_opinion,
|
|
69
60
|
end_type=end_type,
|
|
70
61
|
position_type=position_type)
|
|
71
62
|
|
|
72
|
-
# endregion
|
|
73
|
-
|
|
74
63
|
# region public 'calculate' methods
|
|
75
64
|
|
|
76
65
|
@staticmethod
|
|
@@ -112,20 +101,10 @@ class EntityServiceProvider(BaseParsedDocumentServiceProvider):
|
|
|
112
101
|
|
|
113
102
|
return e_pos.get_index(position_type)
|
|
114
103
|
|
|
115
|
-
def get_entity_value(self, id_in_document):
|
|
116
|
-
entity = self._doc_entities[id_in_document]
|
|
117
|
-
assert(isinstance(entity, Entity))
|
|
118
|
-
return entity.Value
|
|
119
|
-
|
|
120
104
|
# endregion
|
|
121
105
|
|
|
122
106
|
# region private methods
|
|
123
107
|
|
|
124
|
-
def __extract_entity_value(self, text_opinion, end_type):
|
|
125
|
-
assert(isinstance(text_opinion, TextOpinion))
|
|
126
|
-
end_id = self.__get_end_id(text_opinion=text_opinion, end_type=end_type)
|
|
127
|
-
return self.get_entity_value(end_id)
|
|
128
|
-
|
|
129
108
|
def __get_entity_position(self, text_opinion, end_type, position_type=None):
|
|
130
109
|
assert(isinstance(text_opinion, TextOpinion))
|
|
131
110
|
end_id = self.__get_end_id(text_opinion=text_opinion, end_type=end_type)
|
|
@@ -147,7 +126,7 @@ class EntityServiceProvider(BaseParsedDocumentServiceProvider):
|
|
|
147
126
|
assert(end_type == EntityEndType.Source or end_type == EntityEndType.Target)
|
|
148
127
|
return text_opinion.SourceId if end_type == EntityEndType.Source else text_opinion.TargetId
|
|
149
128
|
|
|
150
|
-
def __calculate_entity_positions(self):
|
|
129
|
+
def __calculate_entity_positions(self, is_entity_func):
|
|
151
130
|
""" Note: here we consider the same order as in self._entities.
|
|
152
131
|
"""
|
|
153
132
|
t_ind_in_doc = -1
|
|
@@ -157,7 +136,7 @@ class EntityServiceProvider(BaseParsedDocumentServiceProvider):
|
|
|
157
136
|
|
|
158
137
|
t_ind_in_doc += 1
|
|
159
138
|
|
|
160
|
-
if not
|
|
139
|
+
if not is_entity_func(term):
|
|
161
140
|
continue
|
|
162
141
|
|
|
163
142
|
# We consider that entities within a single tree has the same positions.
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from arekit.common.entities.base import Entity
|
|
2
1
|
from arekit.common.docs.parsed.providers.base_pairs import BasePairProvider
|
|
3
2
|
from arekit.common.opinions.base import Opinion
|
|
4
3
|
|
|
@@ -7,14 +6,15 @@ class OpinionPairsProvider(BasePairProvider):
|
|
|
7
6
|
|
|
8
7
|
NAME = "opinion-pairs-provider"
|
|
9
8
|
|
|
9
|
+
def __init__(self, entity_value_func, **kwargs):
|
|
10
|
+
super(OpinionPairsProvider, self).__init__(**kwargs)
|
|
11
|
+
self.__entity_value_func = entity_value_func
|
|
12
|
+
|
|
10
13
|
@property
|
|
11
14
|
def Name(self):
|
|
12
15
|
return self.NAME
|
|
13
16
|
|
|
14
17
|
def _create_pair(self, source_entity, target_entity, label):
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
return Opinion(source_value=source_entity.Value,
|
|
19
|
-
target_value=target_entity.Value,
|
|
18
|
+
return Opinion(source_value=self.__entity_value_func(source_entity),
|
|
19
|
+
target_value=self.__entity_value_func(target_entity),
|
|
20
20
|
label=label)
|
|
@@ -16,8 +16,8 @@ class TextOpinionPairsProvider(BasePairProvider):
|
|
|
16
16
|
|
|
17
17
|
NAME = "text-opinion-pairs-provider"
|
|
18
18
|
|
|
19
|
-
def __init__(self, value_to_group_id_func):
|
|
20
|
-
super(TextOpinionPairsProvider, self).__init__()
|
|
19
|
+
def __init__(self, value_to_group_id_func, **kwargs):
|
|
20
|
+
super(TextOpinionPairsProvider, self).__init__(**kwargs)
|
|
21
21
|
self.__value_to_group_id_func = value_to_group_id_func
|
|
22
22
|
self.__doc_id = None
|
|
23
23
|
self.__entities_collection = None
|
|
@@ -36,8 +36,8 @@ class TextOpinionPairsProvider(BasePairProvider):
|
|
|
36
36
|
label=label,
|
|
37
37
|
text_opinion_id=None)
|
|
38
38
|
|
|
39
|
-
def init_parsed_doc(self, parsed_doc):
|
|
40
|
-
super(TextOpinionPairsProvider, self).init_parsed_doc(parsed_doc)
|
|
39
|
+
def init_parsed_doc(self, parsed_doc, is_entity_func):
|
|
40
|
+
super(TextOpinionPairsProvider, self).init_parsed_doc(parsed_doc=parsed_doc, is_entity_func=is_entity_func)
|
|
41
41
|
self.__doc_id = parsed_doc.RelatedDocID
|
|
42
42
|
self.__entities_collection = EntityCollection(
|
|
43
43
|
entities=list(self._doc_entities),
|
|
@@ -6,7 +6,7 @@ class ParsedDocumentService(object):
|
|
|
6
6
|
""" Represents a collection of providers, combined with the parsed doc.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
-
def __init__(self, parsed_doc, providers):
|
|
9
|
+
def __init__(self, parsed_doc, providers, is_entity_func):
|
|
10
10
|
assert(isinstance(parsed_doc, ParsedDocument))
|
|
11
11
|
assert(isinstance(providers, list))
|
|
12
12
|
self.__parsed_doc = parsed_doc
|
|
@@ -20,7 +20,7 @@ class ParsedDocumentService(object):
|
|
|
20
20
|
self.__providers[provider.Name] = provider
|
|
21
21
|
|
|
22
22
|
# Post initialize with the related parsed doc.
|
|
23
|
-
provider.init_parsed_doc(self.__parsed_doc)
|
|
23
|
+
provider.init_parsed_doc(self.__parsed_doc, is_entity_func=is_entity_func)
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
@property
|
|
@@ -1,11 +1,15 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
|
|
3
1
|
from arekit.common.model.labeling.base import LabelsHelper
|
|
4
2
|
from arekit.common.model.labeling.modes import LabelCalculationMode
|
|
5
3
|
|
|
6
4
|
|
|
7
5
|
class SingleLabelsHelper(LabelsHelper):
|
|
8
6
|
|
|
7
|
+
@staticmethod
|
|
8
|
+
def __sign(x):
|
|
9
|
+
if x == 0:
|
|
10
|
+
return 0
|
|
11
|
+
return -1 if x < 0 else 1
|
|
12
|
+
|
|
9
13
|
def aggregate_labels(self, labels_list, label_calc_mode):
|
|
10
14
|
assert(isinstance(labels_list, list))
|
|
11
15
|
assert(isinstance(label_calc_mode, LabelCalculationMode))
|
|
@@ -18,7 +22,7 @@ class SingleLabelsHelper(LabelsHelper):
|
|
|
18
22
|
if label_calc_mode == LabelCalculationMode.AVERAGE:
|
|
19
23
|
int_labels = [self._label_scaler.label_to_int(label)
|
|
20
24
|
for label in labels_list]
|
|
21
|
-
label = self._label_scaler.int_to_label(
|
|
25
|
+
label = self._label_scaler.int_to_label(SingleLabelsHelper.__sign(sum(int_labels)))
|
|
22
26
|
|
|
23
27
|
return label
|
|
24
28
|
|
|
@@ -16,8 +16,9 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
|
|
|
16
16
|
[1] Extracting Sentiment Attitudes from Analytical Texts https://arxiv.org/pdf/1808.08932.pdf
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
|
-
def __init__(self, dist_in_terms_bound, label_provider,
|
|
20
|
-
|
|
19
|
+
def __init__(self, dist_in_terms_bound, label_provider,
|
|
20
|
+
is_entity_func, entity_index_func, entity_value_func,
|
|
21
|
+
dist_in_sents=0, is_entity_ignored_func=None):
|
|
21
22
|
"""
|
|
22
23
|
dist_in_terms_bound: int
|
|
23
24
|
max allowed distance in term (less than passed value)
|
|
@@ -34,7 +35,9 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
|
|
|
34
35
|
self.__dist_in_terms_bound = dist_in_terms_bound
|
|
35
36
|
self.__dist_in_sents = dist_in_sents
|
|
36
37
|
self.__is_entity_ignored_func = is_entity_ignored_func
|
|
38
|
+
self.__is_entity_func = is_entity_func
|
|
37
39
|
self.__entity_index_func = entity_index_func
|
|
40
|
+
self.__entity_value_func = entity_value_func
|
|
38
41
|
|
|
39
42
|
# region private methods
|
|
40
43
|
|
|
@@ -90,10 +93,11 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
|
|
|
90
93
|
return key is not None
|
|
91
94
|
|
|
92
95
|
# Initialize providers.
|
|
93
|
-
opinions_provider = OpinionPairsProvider(entity_index_func=self.__entity_index_func
|
|
96
|
+
opinions_provider = OpinionPairsProvider(entity_index_func=self.__entity_index_func,
|
|
97
|
+
entity_value_func=self.__entity_value_func)
|
|
94
98
|
entity_service_provider = EntityServiceProvider(entity_index_func=self.__entity_index_func)
|
|
95
|
-
opinions_provider.init_parsed_doc(parsed_doc)
|
|
96
|
-
entity_service_provider.init_parsed_doc(parsed_doc)
|
|
99
|
+
opinions_provider.init_parsed_doc(parsed_doc=parsed_doc, is_entity_func=self.__is_entity_func)
|
|
100
|
+
entity_service_provider.init_parsed_doc(parsed_doc=parsed_doc, is_entity_func=self.__is_entity_func)
|
|
97
101
|
|
|
98
102
|
return opinions_provider.iter_from_all(label_provider=self.__label_provider,
|
|
99
103
|
filter_func=__filter_pair_func)
|
arekit/common/pipeline/base.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from arekit.common.pipeline.context import PipelineContext
|
|
2
|
-
from arekit.common.pipeline.items.base import BasePipelineItem
|
|
3
2
|
|
|
4
3
|
|
|
5
4
|
class BasePipelineLauncher:
|
|
@@ -11,7 +10,6 @@ class BasePipelineLauncher:
|
|
|
11
10
|
assert(isinstance(src_key, str) or src_key is None)
|
|
12
11
|
|
|
13
12
|
for ind, item in enumerate(filter(lambda itm: itm is not None, pipeline)):
|
|
14
|
-
assert(isinstance(item, BasePipelineItem))
|
|
15
13
|
do_force_key = src_key is not None and ind == 0
|
|
16
14
|
input_data = item.get_source(pipeline_ctx, force_key=src_key if do_force_key else None) \
|
|
17
15
|
if has_input or ind > 0 else None
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
from arekit.common.pipeline.context import PipelineContext
|
|
2
|
-
from arekit.common.pipeline.items.base import BasePipelineItem
|
|
3
2
|
|
|
4
3
|
|
|
5
4
|
class BatchingPipelineLauncher:
|
|
@@ -11,8 +10,6 @@ class BatchingPipelineLauncher:
|
|
|
11
10
|
assert(isinstance(src_key, str) or src_key is None)
|
|
12
11
|
|
|
13
12
|
for ind, item in enumerate(filter(lambda itm: itm is not None, pipeline)):
|
|
14
|
-
assert (isinstance(item, BasePipelineItem))
|
|
15
|
-
|
|
16
13
|
# Handle the content of the batch or batch itself.
|
|
17
14
|
content = item.get_source(pipeline_ctx, call_func=False, force_key=src_key if ind == 0 else None)
|
|
18
15
|
handled_batch = [item._src_func(i) if item._src_func is not None else i for i in content]
|
|
@@ -5,13 +5,10 @@ from arekit.contrib.bert.input.providers.text_pair import PairTextProvider
|
|
|
5
5
|
|
|
6
6
|
class CroppedBertSampleRowProvider(CroppedSampleRowProvider):
|
|
7
7
|
|
|
8
|
-
def __init__(self,
|
|
8
|
+
def __init__(self, text_b_template, text_terms_mapper, **kwargs):
|
|
9
9
|
|
|
10
10
|
text_provider = BaseSingleTextProvider(text_terms_mapper=text_terms_mapper) \
|
|
11
11
|
if text_b_template is None else PairTextProvider(text_b_prompt=text_b_template,
|
|
12
12
|
text_terms_mapper=text_terms_mapper)
|
|
13
13
|
|
|
14
|
-
super(CroppedBertSampleRowProvider, self).__init__(
|
|
15
|
-
crop_window_size=crop_window_size,
|
|
16
|
-
label_scaler=label_scaler,
|
|
17
|
-
text_provider=text_provider)
|
|
14
|
+
super(CroppedBertSampleRowProvider, self).__init__(text_provider=text_provider, **kwargs)
|
|
@@ -7,11 +7,11 @@ class BertDefaultStringTextTermsMapper(OpinionContainingTextTermsMapper):
|
|
|
7
7
|
a base class assumes to provide an orginal frame variant value.
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
|
-
def __init__(self,
|
|
10
|
+
def __init__(self, word_separator=' ', **kwargs):
|
|
11
11
|
""" See https://github.com/nicolay-r/AREkit/issues/377
|
|
12
12
|
for a greater details.
|
|
13
13
|
"""
|
|
14
|
-
super(BertDefaultStringTextTermsMapper, self).__init__(
|
|
14
|
+
super(BertDefaultStringTextTermsMapper, self).__init__(**kwargs)
|
|
15
15
|
self.__word_separator = word_separator
|
|
16
16
|
|
|
17
17
|
def map_entity(self, e_ind, entity):
|
arekit/contrib/prompt/sample.py
CHANGED
|
@@ -8,7 +8,7 @@ class PromptedSampleRowProvider(CroppedSampleRowProvider):
|
|
|
8
8
|
""" Sample, enriched with the prompt technique.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
|
-
def __init__(self,
|
|
11
|
+
def __init__(self, prompt, label_fmt=None, **kwargs):
|
|
12
12
|
""" crop_window_size: int
|
|
13
13
|
crop window size for the original text.
|
|
14
14
|
prompt: str
|
|
@@ -17,12 +17,8 @@ class PromptedSampleRowProvider(CroppedSampleRowProvider):
|
|
|
17
17
|
text, s_ind, t_ind, s_val, t_val, label_uint
|
|
18
18
|
"""
|
|
19
19
|
assert(isinstance(prompt, str))
|
|
20
|
-
assert(isinstance(text_provider, BaseSingleTextProvider))
|
|
21
20
|
assert(isinstance(label_fmt, StringLabelsFormatter) or label_fmt is None)
|
|
22
|
-
|
|
23
|
-
super(PromptedSampleRowProvider, self).__init__(crop_window_size=crop_window_size,
|
|
24
|
-
label_scaler=label_scaler,
|
|
25
|
-
text_provider=text_provider)
|
|
21
|
+
super(PromptedSampleRowProvider, self).__init__(**kwargs)
|
|
26
22
|
|
|
27
23
|
self.__prompt = prompt
|
|
28
24
|
self.__labels_fmt = label_fmt
|
|
@@ -5,7 +5,7 @@ from arekit.common.data.input.terms_mapper import OpinionContainingTextTermsMapp
|
|
|
5
5
|
from arekit.contrib.bert.input.providers.text_pair import PairTextProvider
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
def create_sample_provider(label_scaler, text_terms_mapper, text_b_prompt=None):
|
|
8
|
+
def create_sample_provider(is_entity_func, label_scaler, text_terms_mapper, text_b_prompt=None):
|
|
9
9
|
assert(isinstance(text_terms_mapper, OpinionContainingTextTermsMapper))
|
|
10
10
|
|
|
11
11
|
text_provider = BaseSingleTextProvider(text_terms_mapper=text_terms_mapper) \
|
|
@@ -14,4 +14,6 @@ def create_sample_provider(label_scaler, text_terms_mapper, text_b_prompt=None):
|
|
|
14
14
|
|
|
15
15
|
label_provider = MultipleLabelProvider(label_scaler=label_scaler)
|
|
16
16
|
|
|
17
|
-
return BaseSampleRowProvider(text_provider=text_provider,
|
|
17
|
+
return BaseSampleRowProvider(text_provider=text_provider,
|
|
18
|
+
label_provider=label_provider,
|
|
19
|
+
is_entity_func=is_entity_func)
|
|
@@ -9,7 +9,7 @@ class AlgorithmBasedTextOpinionAnnotator(AlgorithmBasedOpinionAnnotator):
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
def __init__(self, value_to_group_id_func, annot_algo, create_empty_collection_func,
|
|
12
|
-
get_doc_existed_opinions_func=None):
|
|
12
|
+
is_entity_func, get_doc_existed_opinions_func=None):
|
|
13
13
|
""" get_doc_existed_opinions_func: func or None
|
|
14
14
|
function that provides existed opinions for a document;
|
|
15
15
|
if None, then we consider an absence of the existed document-level opinions.
|
|
@@ -20,14 +20,17 @@ class AlgorithmBasedTextOpinionAnnotator(AlgorithmBasedOpinionAnnotator):
|
|
|
20
20
|
create_empty_collection_func=create_empty_collection_func,
|
|
21
21
|
get_doc_existed_opinions_func=get_doc_existed_opinions_func)
|
|
22
22
|
self.__value_to_group_id_func = value_to_group_id_func
|
|
23
|
+
self.__is_entity_func = is_entity_func
|
|
23
24
|
|
|
24
25
|
def __create_service(self, parsed_doc):
|
|
25
|
-
return ParsedDocumentService(
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
return ParsedDocumentService(
|
|
27
|
+
parsed_doc=parsed_doc,
|
|
28
|
+
providers=[TextOpinionPairsProvider(self.__value_to_group_id_func, entity_index_func=None)],
|
|
29
|
+
is_entity_func=self.__is_entity_func
|
|
30
|
+
)
|
|
28
31
|
|
|
29
32
|
def annotate_collection(self, parsed_doc):
|
|
30
|
-
service = self.__create_service(parsed_doc)
|
|
33
|
+
service = self.__create_service(parsed_doc=parsed_doc)
|
|
31
34
|
topp = service.get_provider(TextOpinionPairsProvider.NAME)
|
|
32
35
|
for opinion in super(AlgorithmBasedTextOpinionAnnotator, self).annotate_collection(parsed_doc):
|
|
33
36
|
for text_opinion in topp.iter_from_opinion(opinion):
|
|
@@ -12,7 +12,8 @@ from arekit.contrib.utils.pipelines.text_opinion.filters.base import TextOpinion
|
|
|
12
12
|
from arekit.contrib.utils.pipelines.text_opinion.filters.limitation import FrameworkLimitationsTextOpinionFilter
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
def __iter_text_opinion_linkages(parsed_doc, annotators,
|
|
15
|
+
def __iter_text_opinion_linkages(parsed_doc, annotators,
|
|
16
|
+
is_entity_func, entity_index_func,
|
|
16
17
|
text_opinion_filters, use_meta):
|
|
17
18
|
""" use_meta: bool
|
|
18
19
|
this is mainly for the progress-bar and other console parameters to stay up-to-date
|
|
@@ -27,7 +28,9 @@ def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
|
|
|
27
28
|
def __to_id(text_opinion):
|
|
28
29
|
return "{}_{}".format(text_opinion.SourceId, text_opinion.TargetId)
|
|
29
30
|
|
|
30
|
-
service = ParsedDocumentService(parsed_doc=parsed_doc,
|
|
31
|
+
service = ParsedDocumentService(parsed_doc=parsed_doc,
|
|
32
|
+
providers=[EntityServiceProvider(entity_index_func=entity_index_func)],
|
|
33
|
+
is_entity_func=is_entity_func)
|
|
31
34
|
esp = service.get_provider(EntityServiceProvider.NAME)
|
|
32
35
|
|
|
33
36
|
predefined = set()
|
|
@@ -62,9 +65,12 @@ def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
|
|
|
62
65
|
yield MetaEmptyLinkedDataWrapper(doc_id=parsed_doc.RelatedDocID)
|
|
63
66
|
|
|
64
67
|
|
|
65
|
-
def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotators,
|
|
68
|
+
def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotators,
|
|
69
|
+
is_entity_func, entity_index_func, batch_size,
|
|
66
70
|
text_opinion_filters=None, use_meta_between_docs=True):
|
|
67
71
|
assert(callable(get_doc_by_id_func))
|
|
72
|
+
assert(callable(is_entity_func))
|
|
73
|
+
assert(callable(entity_index_func))
|
|
68
74
|
assert(isinstance(annotators, list))
|
|
69
75
|
assert(isinstance(text_opinion_filters, list) or text_opinion_filters is None)
|
|
70
76
|
assert(isinstance(use_meta_between_docs, bool))
|
|
@@ -83,9 +89,10 @@ def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotat
|
|
|
83
89
|
|
|
84
90
|
# (parsed_doc) -> (text_opinions)
|
|
85
91
|
MapPipelineItem(map_func=lambda parsed_doc: __iter_text_opinion_linkages(
|
|
86
|
-
annotators=annotators, parsed_doc=parsed_doc,
|
|
92
|
+
annotators=annotators, parsed_doc=parsed_doc,
|
|
93
|
+
is_entity_func=is_entity_func, entity_index_func=entity_index_func,
|
|
87
94
|
text_opinion_filters=actual_text_opinion_filters, use_meta=use_meta_between_docs)),
|
|
88
95
|
|
|
89
96
|
# linkages[] -> linkages
|
|
90
97
|
FlattenIterPipelineItem()
|
|
91
|
-
]
|
|
98
|
+
]
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: arekit
|
|
3
|
-
Version: 0.25.
|
|
3
|
+
Version: 0.25.2
|
|
4
4
|
Summary: Document level Attitude and Relation Extraction toolkit (AREkit) for sampling and prompting mass-media news into datasets for ML-model training
|
|
5
5
|
Home-page: https://github.com/nicolay-r/AREkit
|
|
6
6
|
Author: Nicolay Rusnachenko
|
|
7
7
|
Author-email: rusnicolay@gmail.com
|
|
8
8
|
License: MIT License
|
|
9
9
|
Keywords: natural language processing,relation extraction,sentiment analysis
|
|
10
|
+
Platform: UNKNOWN
|
|
10
11
|
Classifier: Programming Language :: Python
|
|
11
12
|
Classifier: Programming Language :: Python :: 3.6
|
|
12
13
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
@@ -14,14 +15,14 @@ Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
|
14
15
|
Classifier: Topic :: Text Processing :: Linguistic
|
|
15
16
|
Requires-Python: >=3.6
|
|
16
17
|
Description-Content-Type: text/markdown
|
|
17
|
-
|
|
18
|
+
Requires-Dist: enum34 (==1.1.10)
|
|
18
19
|
Requires-Dist: tqdm
|
|
19
|
-
Requires-Dist: enum34==1.1.10
|
|
20
|
-
Requires-Dist: numpy>=1.14.5
|
|
21
20
|
|
|
22
|
-
# AREkit 0.25.
|
|
21
|
+
# AREkit 0.25.2
|
|
23
22
|
|
|
24
23
|

|
|
24
|
+
[](https://pypistats.org/packages/arekit)
|
|
25
|
+
|
|
25
26
|
|
|
26
27
|
<p align="center">
|
|
27
28
|
<img src="logo.png"/>
|
|
@@ -59,7 +60,7 @@ for sentence level relations preparation (dubbed as contexts);
|
|
|
59
60
|
## Installation
|
|
60
61
|
|
|
61
62
|
```bash
|
|
62
|
-
pip install git+https://github.com/nicolay-r/AREkit.git@0.25.
|
|
63
|
+
pip install git+https://github.com/nicolay-r/AREkit.git@0.25.2-rc
|
|
63
64
|
```
|
|
64
65
|
|
|
65
66
|
## Usage
|
|
@@ -79,3 +80,5 @@ if you use or extend our work, please cite as follows:
|
|
|
79
80
|
organization={Springer}
|
|
80
81
|
}
|
|
81
82
|
```
|
|
83
|
+
|
|
84
|
+
|
|
@@ -4,7 +4,7 @@ arekit/common/bound.py,sha256=lPpHY6ct_CU9e4qXeYjhJfWbTj6Sb_NVtZ1CJheQPNE,1402
|
|
|
4
4
|
arekit/common/log_utils.py,sha256=OfEQxbExkuRAl9dxlgFEqcFhI4HHoMYT7WE8ud0IPOM,924
|
|
5
5
|
arekit/common/utils.py,sha256=N061ENJJgvsB338Q9cixc6RWyuikSPQq4Tc8mmgwy9s,2659
|
|
6
6
|
arekit/common/context/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
arekit/common/context/terms_mapper.py,sha256=
|
|
7
|
+
arekit/common/context/terms_mapper.py,sha256=tBs_dMettLjVrqwPwTMZg3Pgxo6PZJpu-Qh6ZOWWFJA,1532
|
|
8
8
|
arekit/common/context/token.py,sha256=CpWAlvprUnJfCtYvO8lwdfU_ofSKAOGOudXTwppyzSk,459
|
|
9
9
|
arekit/common/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
arekit/common/data/const.py,sha256=J74zim3CGJlLJp-AVn5z9TTuBfmttjiM_8sRW1Pc-iE,457
|
|
@@ -13,7 +13,7 @@ arekit/common/data/rows_fmt.py,sha256=klq9HdzSnhbRBhOw7O4ctp3PZ5L6ZVy-0eIV2vLLYY
|
|
|
13
13
|
arekit/common/data/rows_parser.py,sha256=qYSEETvhX_0_JuAqm0bjK_V28_53qq7OY9JAnBdRC78,1513
|
|
14
14
|
arekit/common/data/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
arekit/common/data/input/sample.py,sha256=6JeGxsLbEUXVKPWA1hIlkTDNOaYg4bHCJWw0ULrLByg,2143
|
|
16
|
-
arekit/common/data/input/terms_mapper.py,sha256=
|
|
16
|
+
arekit/common/data/input/terms_mapper.py,sha256=pOD8lGsdM-23maXr9nlHM1QMJ3hsx_5HGe6X3aQcq6k,3133
|
|
17
17
|
arekit/common/data/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
arekit/common/data/input/providers/const.py,sha256=GDvPkgP7hllHW3QiueMBQgQyu2CtNFI4JYNNja2Im6Q,187
|
|
19
19
|
arekit/common/data/input/providers/contents.py,sha256=jT1LJE_5Igw5H2e1jKsWWciHSbPVg649phT177SzhEA,261
|
|
@@ -30,38 +30,34 @@ arekit/common/data/input/providers/label/binary.py,sha256=jPD6Jn8DYMrdI3jN8ueoWv
|
|
|
30
30
|
arekit/common/data/input/providers/label/multiple.py,sha256=HWbHF_CwwbiLQbYm5dgvnXAm0b6tJOyFYFEUBxuWAqI,492
|
|
31
31
|
arekit/common/data/input/providers/rows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
32
|
arekit/common/data/input/providers/rows/base.py,sha256=syH7ZEW3Agwfb1IR0G7n_Amy3Kkg0EZk2V7kH3r7ADg,2517
|
|
33
|
-
arekit/common/data/input/providers/rows/samples.py,sha256=
|
|
33
|
+
arekit/common/data/input/providers/rows/samples.py,sha256=iUBmKTnevAyfXDb4d6_Wntfw59wWASqSteXOhD5ez64,9334
|
|
34
34
|
arekit/common/data/input/providers/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
arekit/common/data/input/providers/sample/cropped.py,sha256=
|
|
35
|
+
arekit/common/data/input/providers/sample/cropped.py,sha256=RSoDIoqIodANBW7zmj91ltgw4eYGISCWfl6zLuQXwFM,1831
|
|
36
36
|
arekit/common/data/input/providers/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
37
|
arekit/common/data/input/providers/text/single.py,sha256=vm3sShIYZcmses-hmZX9cOfveWXCYGwvKLgQ0qs3VXQ,1604
|
|
38
38
|
arekit/common/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
|
-
arekit/common/data/storages/base.py,sha256=
|
|
39
|
+
arekit/common/data/storages/base.py,sha256=xMMfHhG68ZraERLbipCN_OhqpLBSDq_S56qAtxGsU7Y,2595
|
|
40
40
|
arekit/common/docs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
41
41
|
arekit/common/docs/base.py,sha256=uXUOtpR9BEsDBfDHg4eLqOjfSVOV_o9VPii3nSxLZuY,734
|
|
42
|
-
arekit/common/docs/entities_grouping.py,sha256=
|
|
42
|
+
arekit/common/docs/entities_grouping.py,sha256=9Xr5NsrWD9_jjKLFE7HOqjkOibzjz840ef04CekkXNU,765
|
|
43
43
|
arekit/common/docs/entity.py,sha256=TxrZMdIEgjk-PgCyskCkVis2KAw_M7vTBp3ppP6G05M,662
|
|
44
44
|
arekit/common/docs/parser.py,sha256=dzWjpbbYt-C9UU9sSy_Holnm0kQxJqtz1_6va6kS_L4,1780
|
|
45
45
|
arekit/common/docs/sentence.py,sha256=nZCCFj2yk71POoXCBfEMN3pteM2qQdj60eEzxMVY_3k,302
|
|
46
46
|
arekit/common/docs/parsed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
|
-
arekit/common/docs/parsed/base.py,sha256=
|
|
48
|
-
arekit/common/docs/parsed/service.py,sha256=
|
|
47
|
+
arekit/common/docs/parsed/base.py,sha256=e43kQyxeO-eaPKr3-5SyZ4N33QIDDePTE_CGmEliO7c,3168
|
|
48
|
+
arekit/common/docs/parsed/service.py,sha256=k_4k9EQ7iFq97bvAZHz6dtxCltiJQMd3Suv5W_t7MBE,1076
|
|
49
49
|
arekit/common/docs/parsed/term_position.py,sha256=H9eQQeanLxwP6og30TQUnpcXymGEPwXClRpaE8VnpLs,1040
|
|
50
50
|
arekit/common/docs/parsed/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
|
-
arekit/common/docs/parsed/providers/base.py,sha256=
|
|
51
|
+
arekit/common/docs/parsed/providers/base.py,sha256=9MPqxC8mTD4naXH_AoOH0bIPNR7wR9GkOL-Nm2D6Kdo,2543
|
|
52
52
|
arekit/common/docs/parsed/providers/base_pairs.py,sha256=RDYjspkENPQU2pn7Jp5mFrL9566eVWgXMEzWBQlMdRo,2195
|
|
53
|
-
arekit/common/docs/parsed/providers/entity_service.py,sha256=
|
|
54
|
-
arekit/common/docs/parsed/providers/opinion_pairs.py,sha256=
|
|
55
|
-
arekit/common/docs/parsed/providers/text_opinion_pairs.py,sha256=
|
|
53
|
+
arekit/common/docs/parsed/providers/entity_service.py,sha256=An_urYXU4r1PKIUNfhlGCjK6UNLwr3EkebkiaodBsRg,5895
|
|
54
|
+
arekit/common/docs/parsed/providers/opinion_pairs.py,sha256=dSd698VSbVefT0VbuQehaErquFixBfs42OAdX3BJH5M,693
|
|
55
|
+
arekit/common/docs/parsed/providers/text_opinion_pairs.py,sha256=MK1-m2_LJgjeis6AvY1hwT2N8rqHRCpIp7oWqXzgk9I,3215
|
|
56
56
|
arekit/common/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
57
|
arekit/common/entities/base.py,sha256=kpJFo4pCRVBQX6T8PibLKspp9UwoIrkHDoFMTM9KkUs,1646
|
|
58
58
|
arekit/common/entities/collection.py,sha256=ySSriMYP6zzdto1mC0V9VPXmkAqyJN3mmGoqoNValGI,1931
|
|
59
59
|
arekit/common/entities/str_fmt.py,sha256=gAPeS8RXdhh8Px_u5eOAPbtLREiiyMueid0lQoa4EbQ,250
|
|
60
60
|
arekit/common/entities/types.py,sha256=pxFB0gsevdsmnduN_Ffk7_P2TRiMt6NAHyrutuKOFvs,145
|
|
61
|
-
arekit/common/experiment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
62
|
-
arekit/common/experiment/data_type.py,sha256=DezUkfwLTf6XLYheqPiaWyx3ZwcldsJ8wDV8aNgJtDk,227
|
|
63
|
-
arekit/common/experiment/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
|
-
arekit/common/experiment/api/base_samples_io.py,sha256=SN8CnbEYaazE3SldvnENfjoNRHsTejtrg4jJfqfZLMs,516
|
|
65
61
|
arekit/common/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
62
|
arekit/common/frames/text_variant.py,sha256=TlWR4jnuF7HW9BMHhOTKkr768V_Ub0wd0E5A4YTwD0c,875
|
|
67
63
|
arekit/common/frames/connotations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -89,7 +85,7 @@ arekit/common/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
|
89
85
|
arekit/common/model/labeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
90
86
|
arekit/common/model/labeling/base.py,sha256=uj7_igCWEU23OjnzabNy0LyxoZ6S_qSfCA-ZaoL1erA,727
|
|
91
87
|
arekit/common/model/labeling/modes.py,sha256=DiwC6Aomke-ojwwpR2pcd4qgQSwmRdGCvQlyHHhN3YY,127
|
|
92
|
-
arekit/common/model/labeling/single.py,sha256=
|
|
88
|
+
arekit/common/model/labeling/single.py,sha256=HJMFffbxfmV6dKK8t-MKjD-bOx_wuWUs35zmcSWcUL0,878
|
|
93
89
|
arekit/common/opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
94
90
|
arekit/common/opinions/base.py,sha256=eIx1RzsngCkpnF2Utju5i_Qp7gqF_rDIe_UDeMGXtmo,2112
|
|
95
91
|
arekit/common/opinions/collection.py,sha256=bdx-CIYYdE-DrjyB1mRTGtkLb-lrGPTSLl25xv5EHnM,4938
|
|
@@ -101,11 +97,11 @@ arekit/common/opinions/annot/algo_based.py,sha256=cvDGDmUoUaQ1Xcbyouxrjs0CkHRfRo
|
|
|
101
97
|
arekit/common/opinions/annot/base.py,sha256=IvwrwT8O3s6b2_R0arpMR4Uog7kuWQZUAyRP5cq_27A,382
|
|
102
98
|
arekit/common/opinions/annot/algo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
103
99
|
arekit/common/opinions/annot/algo/base.py,sha256=ymll-4-SplCY7CLswjOZEC1vsVHIEzUP0JMYgvL8hbo,124
|
|
104
|
-
arekit/common/opinions/annot/algo/pair_based.py,sha256=
|
|
100
|
+
arekit/common/opinions/annot/algo/pair_based.py,sha256=0m0l-KEDvtARDEnl8Sr_MeEJp3yT1re_VsNAO2ZQQUM,4762
|
|
105
101
|
arekit/common/opinions/annot/algo/predefined.py,sha256=zU39SADPKnykHCNB-Bmn_0bvd6gYWWYmfgfi-68hHSs,741
|
|
106
102
|
arekit/common/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
107
|
-
arekit/common/pipeline/base.py,sha256=
|
|
108
|
-
arekit/common/pipeline/batching.py,sha256=
|
|
103
|
+
arekit/common/pipeline/base.py,sha256=RHpZs4OT2t9wGTMUxtpBM7q-zCrNQbf3-BFDy9Bcz4M,839
|
|
104
|
+
arekit/common/pipeline/batching.py,sha256=zm1SLSJz8T9gXrBdiztzS2f7VSWb4uFcYkzEu5TIfrE,1119
|
|
109
105
|
arekit/common/pipeline/context.py,sha256=Fw25lBVakHNAXjtkdEqopR-Jh59cDKGWD2jCJxBrj7Y,1126
|
|
110
106
|
arekit/common/pipeline/conts.py,sha256=NAQNsHt1kK3HnxWv3M6yXi0c7C6Mx6ZZ6KZc0yE0eas,70
|
|
111
107
|
arekit/common/pipeline/utils.py,sha256=5VqH1LtRa4tYUbyiRvWdBmP4biFhTKq9vhr8QiRFFkY,882
|
|
@@ -130,15 +126,15 @@ arekit/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
130
126
|
arekit/contrib/bert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
131
127
|
arekit/contrib/bert/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
132
128
|
arekit/contrib/bert/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
133
|
-
arekit/contrib/bert/input/providers/cropped_sample.py,sha256=
|
|
129
|
+
arekit/contrib/bert/input/providers/cropped_sample.py,sha256=WJNAzILJDMYYhGpxg1r1F3f1X71kVV30gDhkgwH59H0,755
|
|
134
130
|
arekit/contrib/bert/input/providers/text_pair.py,sha256=_1d-he0n42y3ksj8RjJlNHgHnaQUEq0aQhUdTPRMKgg,2817
|
|
135
131
|
arekit/contrib/bert/terms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
136
|
-
arekit/contrib/bert/terms/mapper.py,sha256=
|
|
132
|
+
arekit/contrib/bert/terms/mapper.py,sha256=YMY1JasNc___83ihiV1KqzwGyC3qs3ZNN90NmHqBEZ0,976
|
|
137
133
|
arekit/contrib/prompt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
138
|
-
arekit/contrib/prompt/sample.py,sha256=
|
|
134
|
+
arekit/contrib/prompt/sample.py,sha256=iDwe65pUBIrk0Hjh8v7o1XesRPxCVsJojw-dcASPmWc,2867
|
|
139
135
|
arekit/contrib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
140
136
|
arekit/contrib/utils/bert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
141
|
-
arekit/contrib/utils/bert/samplers.py,sha256=
|
|
137
|
+
arekit/contrib/utils/bert/samplers.py,sha256=vleluRLRFzDkGRZ_ReeHsY8IJAS-TxJgoTTro4mYrs4,1102
|
|
142
138
|
arekit/contrib/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
143
139
|
arekit/contrib/utils/data/contents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
144
140
|
arekit/contrib/utils/data/contents/opinions.py,sha256=MSV7NytEe15adKhhHCq5KiCj6ZBq31nV-u2rcSfFCgE,1738
|
|
@@ -147,7 +143,6 @@ arekit/contrib/utils/data/doc_provider/dict_based.py,sha256=zUOiiIbj5zby4xqMb0m9
|
|
|
147
143
|
arekit/contrib/utils/data/doc_provider/dir_based.py,sha256=FTw3kLV_CYtPoUoHl39IrP6RjLvTecCno9May95jVXw,1916
|
|
148
144
|
arekit/contrib/utils/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
149
145
|
arekit/contrib/utils/data/storages/jsonl_based.py,sha256=dz8uizu9t1C215o0HEL8y4LiDKR4aC_-OwDu_xF0xIM,522
|
|
150
|
-
arekit/contrib/utils/data/storages/pandas_based.py,sha256=gMkWUFHZE9Oe1Uy04vEBcUfTIAdh46r5zpjlPAwwG2g,3842
|
|
151
146
|
arekit/contrib/utils/data/storages/row_cache.py,sha256=MRK0uJFvw6O99k2aFb3JLZhLUBo2JUO-WYQ4EeRRu6M,2051
|
|
152
147
|
arekit/contrib/utils/data/storages/sqlite_based.py,sha256=cIYAHyiB4CMftKgrgLqw-L4F1WnhbspjwWLSPqH5NHk,682
|
|
153
148
|
arekit/contrib/utils/data/writers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -155,7 +150,6 @@ arekit/contrib/utils/data/writers/base.py,sha256=JLwf5WVl_U319sdMev8YOn4OoCcrgNI
|
|
|
155
150
|
arekit/contrib/utils/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
156
151
|
arekit/contrib/utils/entities/filter.py,sha256=aHTExIMFaMdy4QL8iYE23eiby3qLImAakXR6gNqG6fs,145
|
|
157
152
|
arekit/contrib/utils/entities/formatters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
158
|
-
arekit/contrib/utils/entities/formatters/str_display.py,sha256=N8igv7EVaTFayvLXkyBGtm67KwHaeP-M-L8d7oqBG9Q,401
|
|
159
153
|
arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py,sha256=rEUIma9O3kOBWIguGtJ69JH-00Dhm0vUBOd5yNcKweY,653
|
|
160
154
|
arekit/contrib/utils/io_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
161
155
|
arekit/contrib/utils/io_utils/utils.py,sha256=310SIJTsNLn2OZrGPer9W4ZP52PHkjBK3zsyqxVs3h0,537
|
|
@@ -163,24 +157,22 @@ arekit/contrib/utils/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
|
163
157
|
arekit/contrib/utils/pipelines/opinion_collections.py,sha256=y9-klVJGCN9mPd7t1ECllAiCnAb3MKVXC1PnYddp5sQ,3195
|
|
164
158
|
arekit/contrib/utils/pipelines/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
165
159
|
arekit/contrib/utils/pipelines/items/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
166
|
-
arekit/contrib/utils/pipelines/items/text/entities_default.py,sha256=vNx5ir2mf7a1gg_OeqUsf_p1Fu2k7QIFxVpe-CuwZ84,727
|
|
167
160
|
arekit/contrib/utils/pipelines/items/text/frames.py,sha256=pZQybYfgEQB1DM3PtmsgrtB2Xl0HejmP4rhT0nR_YKE,2586
|
|
168
161
|
arekit/contrib/utils/pipelines/text_opinion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
169
|
-
arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=
|
|
162
|
+
arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=kKBQTvZxYYf9tBYmUv3Ipj9OOYKmHnYG0y5Gyjt27yA,4587
|
|
170
163
|
arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
171
|
-
arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py,sha256=
|
|
164
|
+
arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py,sha256=69xmuxqVmsYxBYpV2gYF7j3Z5iPk0ndjnOZe2Yy5WDA,1911
|
|
172
165
|
arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
173
166
|
arekit/contrib/utils/pipelines/text_opinion/filters/base.py,sha256=GnKnJB4MKqiMSJny3a9Na7l7Csm7abbt6GADBCY18Mw,143
|
|
174
167
|
arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py,sha256=3Pjq4IJJMT7dYpK266lN66WQJUnQO3P0rG6wcAvJOOA,649
|
|
175
168
|
arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py,sha256=pdWFJaKh4kKIsUuBNp3WNy5Rj80CjWEy2wp-0axFnrI,1254
|
|
176
169
|
arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py,sha256=4AFS5zhocJuYphGO2ZMWmYTtIhGItKDTkB0--AmjgnA,1151
|
|
177
|
-
arekit/contrib/utils/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
178
170
|
arekit/contrib/utils/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
179
171
|
arekit/contrib/utils/synonyms/simple.py,sha256=ST9EwuWP88FzbyV8Gi0-biTPgGOsZ7OWyaBWHL_U_eo,557
|
|
180
172
|
arekit/contrib/utils/synonyms/stemmer_based.py,sha256=q19P_XOCWN2_JrBtybAt7ToMIr1ambw4ahr0fSEEHmQ,1400
|
|
181
|
-
arekit-0.25.
|
|
182
|
-
arekit-0.25.
|
|
183
|
-
arekit-0.25.
|
|
184
|
-
arekit-0.25.
|
|
185
|
-
arekit-0.25.
|
|
186
|
-
arekit-0.25.
|
|
173
|
+
arekit-0.25.2.data/data/logo.png,sha256=S8OZ4MGGD72Pf5co7ngYbXKkJH1EUhbErUXv1ZjUWiU,45718
|
|
174
|
+
arekit-0.25.2.dist-info/LICENSE,sha256=JO9tIbxAvhwDv73cX-gUStr9yA-TY7wusUeLHRx7JuY,1076
|
|
175
|
+
arekit-0.25.2.dist-info/METADATA,sha256=CsXviPZIM44LGhiyBRH-MK0DGOP7UAc4GHbvSaLcwxw,3252
|
|
176
|
+
arekit-0.25.2.dist-info/WHEEL,sha256=g4nMs7d-Xl9-xC9XovUrsDHGXt-FT0E17Yqo92DEfvY,92
|
|
177
|
+
arekit-0.25.2.dist-info/top_level.txt,sha256=4pXuFE8IE0lBsqi6ZsR7figx0H939VIX4_-76YIbkOQ,7
|
|
178
|
+
arekit-0.25.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
class BaseSamplesIO(object):
|
|
2
|
-
""" Represents base experiment utils for input/output for:
|
|
3
|
-
samples -- data that utilized for experiments;
|
|
4
|
-
results -- evaluation of experiments.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
@property
|
|
8
|
-
def Reader(self):
|
|
9
|
-
raise NotImplementedError()
|
|
10
|
-
|
|
11
|
-
@property
|
|
12
|
-
def Writer(self):
|
|
13
|
-
""" For serialization
|
|
14
|
-
"""
|
|
15
|
-
raise NotImplementedError()
|
|
16
|
-
|
|
17
|
-
def create_target(self, data_type):
|
|
18
|
-
""" Path for reaiding/viewing
|
|
19
|
-
"""
|
|
20
|
-
raise NotImplementedError()
|
|
@@ -1,108 +0,0 @@
|
|
|
1
|
-
import importlib
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
|
|
5
|
-
from arekit.common.data.input.providers.columns.base import BaseColumnsProvider
|
|
6
|
-
from arekit.common.data.storages.base import BaseRowsStorage, logger
|
|
7
|
-
from arekit.common.utils import progress_bar_iter
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class PandasBasedRowsStorage(BaseRowsStorage):
|
|
11
|
-
""" Storage Kernel functions implementation,
|
|
12
|
-
based on the pandas DataFrames.
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
def __init__(self, df=None, **kwargs):
|
|
16
|
-
super(PandasBasedRowsStorage, self).__init__(**kwargs)
|
|
17
|
-
self._df = df
|
|
18
|
-
|
|
19
|
-
@property
|
|
20
|
-
def DataFrame(self):
|
|
21
|
-
# TODO. Temporary hack, however this should be removed in future.
|
|
22
|
-
return self._df
|
|
23
|
-
|
|
24
|
-
@staticmethod
|
|
25
|
-
def __create_empty(cols_with_types):
|
|
26
|
-
""" cols_with_types: list of pairs ("name", dtype)
|
|
27
|
-
"""
|
|
28
|
-
assert(isinstance(cols_with_types, list))
|
|
29
|
-
data = np.empty(0, dtype=np.dtype(cols_with_types))
|
|
30
|
-
pd = importlib.import_module("pandas")
|
|
31
|
-
return pd.DataFrame(data)
|
|
32
|
-
|
|
33
|
-
def __filter(self, column_name, value):
|
|
34
|
-
return self._df[self._df[column_name] == value]
|
|
35
|
-
|
|
36
|
-
@staticmethod
|
|
37
|
-
def __iter_rows_core(df):
|
|
38
|
-
for row_index, row in df.iterrows():
|
|
39
|
-
yield row_index, row
|
|
40
|
-
|
|
41
|
-
def __fill_with_blank_rows(self, row_id_column_name, rows_count):
|
|
42
|
-
assert(isinstance(row_id_column_name, str))
|
|
43
|
-
assert(isinstance(rows_count, int))
|
|
44
|
-
self._df[row_id_column_name] = list(range(rows_count))
|
|
45
|
-
self._df.set_index(row_id_column_name, inplace=True)
|
|
46
|
-
|
|
47
|
-
# region protected methods
|
|
48
|
-
|
|
49
|
-
def iter_column_names(self):
|
|
50
|
-
return iter(self._df.columns)
|
|
51
|
-
|
|
52
|
-
def iter_column_types(self):
|
|
53
|
-
return iter(self._df.dtypes)
|
|
54
|
-
|
|
55
|
-
def _set_row_value(self, row_ind, column, value):
|
|
56
|
-
self._df.at[row_ind, column] = value
|
|
57
|
-
|
|
58
|
-
def _iter_rows(self):
|
|
59
|
-
for row_index, row in self.__iter_rows_core(self._df):
|
|
60
|
-
yield row_index, row.to_dict()
|
|
61
|
-
|
|
62
|
-
def _get_rows_count(self):
|
|
63
|
-
return len(self._df)
|
|
64
|
-
|
|
65
|
-
# endregion
|
|
66
|
-
|
|
67
|
-
# region public methods
|
|
68
|
-
|
|
69
|
-
def fill(self, iter_rows_func, columns_provider, row_handler=None, rows_count=None, desc=""):
|
|
70
|
-
""" NOTE: We provide the rows counting which is required
|
|
71
|
-
in order to know an expected amount of rows in advace
|
|
72
|
-
due to the specifics of the pandas memory allocation
|
|
73
|
-
for the DataFrames.
|
|
74
|
-
The latter allows us avoid rows appending, which
|
|
75
|
-
may significantly affects on performance once the size
|
|
76
|
-
of DataFrame becomes relatively large.
|
|
77
|
-
"""
|
|
78
|
-
assert(isinstance(columns_provider, BaseColumnsProvider))
|
|
79
|
-
|
|
80
|
-
logger.info("Rows calculation process started. [Required by Pandas-Based storage kernel]")
|
|
81
|
-
logged_rows_it = progress_bar_iter(
|
|
82
|
-
iterable=iter_rows_func(True),
|
|
83
|
-
desc="Calculating rows count ({reason})".format(reason=desc),
|
|
84
|
-
unit="rows")
|
|
85
|
-
rows_count = sum(1 for _ in logged_rows_it)
|
|
86
|
-
|
|
87
|
-
logger.info("Filling with blank rows: {}".format(rows_count))
|
|
88
|
-
self.__fill_with_blank_rows(row_id_column_name=columns_provider.ROW_ID,
|
|
89
|
-
rows_count=rows_count)
|
|
90
|
-
logger.info("Completed!")
|
|
91
|
-
|
|
92
|
-
super(PandasBasedRowsStorage, self).fill(iter_rows_func=iter_rows_func,
|
|
93
|
-
row_handler=row_handler,
|
|
94
|
-
columns_provider=columns_provider,
|
|
95
|
-
rows_count=rows_count)
|
|
96
|
-
|
|
97
|
-
def get_row(self, row_index):
|
|
98
|
-
return self._df.iloc[row_index]
|
|
99
|
-
|
|
100
|
-
def init_empty(self, columns_provider):
|
|
101
|
-
cols_with_types = columns_provider.get_columns_list_with_types()
|
|
102
|
-
self._df = self.__create_empty(cols_with_types)
|
|
103
|
-
|
|
104
|
-
def free(self):
|
|
105
|
-
del self._df
|
|
106
|
-
super(PandasBasedRowsStorage, self).free()
|
|
107
|
-
|
|
108
|
-
# endregion
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
from arekit.common.entities.base import Entity
|
|
2
|
-
from arekit.common.entities.str_fmt import StringEntitiesFormatter
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class StringEntitiesDisplayValueFormatter(StringEntitiesFormatter):
|
|
6
|
-
""" Provides the contents of the DisplayValue property.
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
def to_string(self, original_value, entity_type):
|
|
10
|
-
assert(isinstance(original_value, Entity))
|
|
11
|
-
return original_value.DisplayValue
|
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
from arekit.common.entities.base import Entity
|
|
2
|
-
from arekit.common.pipeline.items.base import BasePipelineItem
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class TextEntitiesParser(BasePipelineItem):
|
|
6
|
-
|
|
7
|
-
def __init__(self, **kwargs):
|
|
8
|
-
super(TextEntitiesParser, self).__init__(**kwargs)
|
|
9
|
-
|
|
10
|
-
@staticmethod
|
|
11
|
-
def __process_word(word):
|
|
12
|
-
assert(isinstance(word, str))
|
|
13
|
-
|
|
14
|
-
# If this is a special word which is related to the [entity] mention.
|
|
15
|
-
if word[0] == "[" and word[-1] == "]":
|
|
16
|
-
entity = Entity(value=word[1:-1], e_type=None)
|
|
17
|
-
return entity
|
|
18
|
-
|
|
19
|
-
return word
|
|
20
|
-
|
|
21
|
-
def apply_core(self, input_data, pipeline_ctx):
|
|
22
|
-
assert(isinstance(input_data, list))
|
|
23
|
-
return [self.__process_word(w) for w in input_data]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|