arekit 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arekit/common/context/terms_mapper.py +2 -2
- arekit/common/data/const.py +5 -4
- arekit/common/{experiment/api/ops_doc.py → data/doc_provider.py} +1 -1
- arekit/common/data/input/providers/columns/sample.py +6 -1
- arekit/common/data/input/providers/instances/base.py +1 -1
- arekit/common/data/input/providers/rows/base.py +36 -13
- arekit/common/data/input/providers/rows/samples.py +57 -55
- arekit/common/data/input/providers/sample/cropped.py +2 -2
- arekit/common/data/input/sample.py +1 -1
- arekit/common/data/rows_fmt.py +82 -0
- arekit/common/data/rows_parser.py +43 -0
- arekit/common/data/storages/base.py +23 -18
- arekit/common/data/views/samples.py +2 -8
- arekit/common/{news → docs}/base.py +2 -2
- arekit/common/{news → docs}/entities_grouping.py +2 -1
- arekit/common/{news → docs}/entity.py +2 -1
- arekit/common/{news → docs}/parsed/base.py +5 -5
- arekit/common/docs/parsed/providers/base.py +68 -0
- arekit/common/{news → docs}/parsed/providers/base_pairs.py +2 -2
- arekit/common/{news → docs}/parsed/providers/entity_service.py +27 -22
- arekit/common/{news → docs}/parsed/providers/opinion_pairs.py +2 -2
- arekit/common/{news → docs}/parsed/providers/text_opinion_pairs.py +6 -6
- arekit/common/docs/parsed/service.py +31 -0
- arekit/common/docs/parser.py +66 -0
- arekit/common/{news → docs}/sentence.py +1 -1
- arekit/common/entities/base.py +11 -2
- arekit/common/experiment/api/base_samples_io.py +1 -1
- arekit/common/frames/variants/collection.py +2 -2
- arekit/common/linkage/base.py +2 -2
- arekit/common/linkage/meta.py +23 -0
- arekit/common/linkage/opinions.py +1 -1
- arekit/common/linkage/text_opinions.py +2 -2
- arekit/common/opinions/annot/algo/base.py +1 -1
- arekit/common/opinions/annot/algo/pair_based.py +15 -13
- arekit/common/opinions/annot/algo/predefined.py +4 -4
- arekit/common/opinions/annot/algo_based.py +5 -5
- arekit/common/opinions/annot/base.py +3 -3
- arekit/common/opinions/base.py +7 -7
- arekit/common/opinions/collection.py +3 -3
- arekit/common/pipeline/base.py +12 -16
- arekit/common/pipeline/batching.py +28 -0
- arekit/common/pipeline/context.py +5 -1
- arekit/common/pipeline/items/base.py +38 -1
- arekit/common/pipeline/items/flatten.py +5 -1
- arekit/common/pipeline/items/handle.py +2 -1
- arekit/common/pipeline/items/iter.py +2 -1
- arekit/common/pipeline/items/map.py +2 -1
- arekit/common/pipeline/items/map_nested.py +4 -0
- arekit/common/pipeline/utils.py +32 -0
- arekit/common/service/sqlite.py +36 -0
- arekit/common/synonyms/base.py +2 -2
- arekit/common/text/{partitioning/str.py → partitioning.py} +16 -11
- arekit/common/text_opinions/base.py +11 -11
- arekit/common/utils.py +33 -46
- arekit/contrib/networks/embedding.py +3 -3
- arekit/contrib/networks/embedding_io.py +5 -5
- arekit/contrib/networks/input/const.py +0 -2
- arekit/contrib/networks/input/providers/sample.py +15 -29
- arekit/contrib/networks/input/rows_parser.py +47 -134
- arekit/contrib/prompt/sample.py +18 -16
- arekit/contrib/utils/data/contents/opinions.py +17 -5
- arekit/contrib/utils/data/doc_provider/dict_based.py +13 -0
- arekit/contrib/utils/data/{doc_ops → doc_provider}/dir_based.py +7 -7
- arekit/contrib/utils/data/readers/base.py +3 -0
- arekit/contrib/utils/data/readers/csv_pd.py +10 -4
- arekit/contrib/utils/data/readers/jsonl.py +3 -0
- arekit/contrib/utils/data/readers/sqlite.py +14 -0
- arekit/contrib/utils/data/service/balance.py +0 -1
- arekit/contrib/utils/data/storages/pandas_based.py +3 -5
- arekit/contrib/utils/data/storages/row_cache.py +18 -6
- arekit/contrib/utils/data/storages/sqlite_based.py +17 -0
- arekit/contrib/utils/data/writers/base.py +5 -0
- arekit/contrib/utils/data/writers/csv_native.py +3 -0
- arekit/contrib/utils/data/writers/csv_pd.py +3 -0
- arekit/contrib/utils/data/writers/json_opennre.py +31 -13
- arekit/contrib/utils/data/writers/sqlite_native.py +114 -0
- arekit/contrib/utils/io_utils/embedding.py +25 -33
- arekit/contrib/utils/io_utils/utils.py +3 -24
- arekit/contrib/utils/pipelines/items/sampling/base.py +31 -26
- arekit/contrib/utils/pipelines/items/sampling/networks.py +7 -10
- arekit/contrib/utils/pipelines/items/text/entities_default.py +2 -2
- arekit/contrib/utils/pipelines/items/text/frames.py +2 -3
- arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +3 -3
- arekit/contrib/utils/pipelines/items/text/frames_negation.py +2 -1
- arekit/contrib/utils/pipelines/items/text/tokenizer.py +3 -5
- arekit/contrib/utils/pipelines/items/text/translator.py +136 -0
- arekit/contrib/utils/pipelines/opinion_collections.py +5 -5
- arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +7 -7
- arekit/contrib/utils/pipelines/text_opinion/extraction.py +34 -22
- arekit/contrib/utils/pipelines/text_opinion/filters/base.py +1 -1
- arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py +1 -1
- arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py +3 -3
- arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py +4 -4
- arekit/contrib/utils/serializer.py +4 -23
- arekit-0.25.0.data/data/logo.png +0 -0
- arekit-0.25.0.dist-info/METADATA +82 -0
- arekit-0.25.0.dist-info/RECORD +259 -0
- {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/WHEEL +1 -1
- arekit/common/data/row_ids/base.py +0 -79
- arekit/common/data/row_ids/binary.py +0 -38
- arekit/common/data/row_ids/multiple.py +0 -14
- arekit/common/folding/base.py +0 -36
- arekit/common/folding/fixed.py +0 -42
- arekit/common/folding/nofold.py +0 -15
- arekit/common/folding/united.py +0 -46
- arekit/common/news/objects_parser.py +0 -37
- arekit/common/news/parsed/providers/base.py +0 -48
- arekit/common/news/parsed/service.py +0 -31
- arekit/common/news/parser.py +0 -34
- arekit/common/text/parser.py +0 -12
- arekit/common/text/partitioning/__init__.py +0 -0
- arekit/common/text/partitioning/base.py +0 -4
- arekit/common/text/partitioning/terms.py +0 -35
- arekit/contrib/source/__init__.py +0 -0
- arekit/contrib/source/brat/__init__.py +0 -0
- arekit/contrib/source/brat/annot.py +0 -83
- arekit/contrib/source/brat/entities/__init__.py +0 -0
- arekit/contrib/source/brat/entities/compound.py +0 -33
- arekit/contrib/source/brat/entities/entity.py +0 -42
- arekit/contrib/source/brat/entities/parser.py +0 -53
- arekit/contrib/source/brat/news.py +0 -28
- arekit/contrib/source/brat/opinions/__init__.py +0 -0
- arekit/contrib/source/brat/opinions/converter.py +0 -19
- arekit/contrib/source/brat/relation.py +0 -32
- arekit/contrib/source/brat/sentence.py +0 -69
- arekit/contrib/source/brat/sentences_reader.py +0 -128
- arekit/contrib/source/download.py +0 -41
- arekit/contrib/source/nerel/__init__.py +0 -0
- arekit/contrib/source/nerel/entities.py +0 -55
- arekit/contrib/source/nerel/folding/__init__.py +0 -0
- arekit/contrib/source/nerel/folding/fixed.py +0 -75
- arekit/contrib/source/nerel/io_utils.py +0 -62
- arekit/contrib/source/nerel/labels.py +0 -241
- arekit/contrib/source/nerel/reader.py +0 -46
- arekit/contrib/source/nerel/utils.py +0 -24
- arekit/contrib/source/nerel/versions.py +0 -12
- arekit/contrib/source/nerelbio/__init__.py +0 -0
- arekit/contrib/source/nerelbio/io_utils.py +0 -62
- arekit/contrib/source/nerelbio/labels.py +0 -265
- arekit/contrib/source/nerelbio/reader.py +0 -8
- arekit/contrib/source/nerelbio/versions.py +0 -8
- arekit/contrib/source/ruattitudes/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/collection.py +0 -36
- arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/entity/parser.py +0 -7
- arekit/contrib/source/ruattitudes/io_utils.py +0 -56
- arekit/contrib/source/ruattitudes/labels_fmt.py +0 -12
- arekit/contrib/source/ruattitudes/news.py +0 -51
- arekit/contrib/source/ruattitudes/news_brat.py +0 -44
- arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/opinions/base.py +0 -28
- arekit/contrib/source/ruattitudes/opinions/converter.py +0 -37
- arekit/contrib/source/ruattitudes/reader.py +0 -268
- arekit/contrib/source/ruattitudes/sentence.py +0 -73
- arekit/contrib/source/ruattitudes/synonyms.py +0 -17
- arekit/contrib/source/ruattitudes/text_object.py +0 -57
- arekit/contrib/source/rusentiframes/__init__.py +0 -0
- arekit/contrib/source/rusentiframes/collection.py +0 -157
- arekit/contrib/source/rusentiframes/effect.py +0 -24
- arekit/contrib/source/rusentiframes/io_utils.py +0 -19
- arekit/contrib/source/rusentiframes/labels_fmt.py +0 -22
- arekit/contrib/source/rusentiframes/polarity.py +0 -35
- arekit/contrib/source/rusentiframes/role.py +0 -15
- arekit/contrib/source/rusentiframes/state.py +0 -24
- arekit/contrib/source/rusentiframes/types.py +0 -42
- arekit/contrib/source/rusentiframes/value.py +0 -2
- arekit/contrib/source/rusentrel/__init__.py +0 -0
- arekit/contrib/source/rusentrel/const.py +0 -3
- arekit/contrib/source/rusentrel/entities.py +0 -26
- arekit/contrib/source/rusentrel/io_utils.py +0 -125
- arekit/contrib/source/rusentrel/labels_fmt.py +0 -12
- arekit/contrib/source/rusentrel/news_reader.py +0 -51
- arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
- arekit/contrib/source/rusentrel/opinions/collection.py +0 -30
- arekit/contrib/source/rusentrel/opinions/converter.py +0 -40
- arekit/contrib/source/rusentrel/opinions/provider.py +0 -54
- arekit/contrib/source/rusentrel/opinions/writer.py +0 -42
- arekit/contrib/source/rusentrel/synonyms.py +0 -17
- arekit/contrib/source/sentinerel/__init__.py +0 -0
- arekit/contrib/source/sentinerel/entities.py +0 -52
- arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
- arekit/contrib/source/sentinerel/folding/factory.py +0 -32
- arekit/contrib/source/sentinerel/folding/fixed.py +0 -73
- arekit/contrib/source/sentinerel/io_utils.py +0 -87
- arekit/contrib/source/sentinerel/labels.py +0 -53
- arekit/contrib/source/sentinerel/labels_scaler.py +0 -30
- arekit/contrib/source/sentinerel/reader.py +0 -42
- arekit/contrib/source/synonyms/__init__.py +0 -0
- arekit/contrib/source/synonyms/utils.py +0 -19
- arekit/contrib/source/zip_utils.py +0 -47
- arekit/contrib/utils/bert/rows.py +0 -0
- arekit/contrib/utils/bert/text_b_rus.py +0 -18
- arekit/contrib/utils/connotations/__init__.py +0 -0
- arekit/contrib/utils/connotations/rusentiframes_sentiment.py +0 -23
- arekit/contrib/utils/cv/__init__.py +0 -0
- arekit/contrib/utils/cv/doc_stat/__init__.py +0 -0
- arekit/contrib/utils/cv/doc_stat/base.py +0 -37
- arekit/contrib/utils/cv/doc_stat/sentence.py +0 -12
- arekit/contrib/utils/cv/splitters/__init__.py +0 -0
- arekit/contrib/utils/cv/splitters/base.py +0 -4
- arekit/contrib/utils/cv/splitters/default.py +0 -53
- arekit/contrib/utils/cv/splitters/statistical.py +0 -57
- arekit/contrib/utils/cv/two_class.py +0 -77
- arekit/contrib/utils/data/doc_ops/__init__.py +0 -0
- arekit/contrib/utils/data/doc_ops/dict_based.py +0 -13
- arekit/contrib/utils/data/ext.py +0 -31
- arekit/contrib/utils/data/views/__init__.py +0 -0
- arekit/contrib/utils/data/views/linkages/__init__.py +0 -0
- arekit/contrib/utils/data/views/linkages/base.py +0 -58
- arekit/contrib/utils/data/views/linkages/multilabel.py +0 -48
- arekit/contrib/utils/data/views/linkages/utils.py +0 -24
- arekit/contrib/utils/data/views/opinions.py +0 -14
- arekit/contrib/utils/download.py +0 -78
- arekit/contrib/utils/entities/formatters/str_rus_cased_fmt.py +0 -78
- arekit/contrib/utils/entities/formatters/str_rus_nocased_fmt.py +0 -15
- arekit/contrib/utils/entities/formatters/str_simple_fmt.py +0 -24
- arekit/contrib/utils/entities/formatters/str_simple_uppercase_fmt.py +0 -21
- arekit/contrib/utils/io_utils/opinions.py +0 -39
- arekit/contrib/utils/io_utils/samples.py +0 -78
- arekit/contrib/utils/lexicons/__init__.py +0 -0
- arekit/contrib/utils/lexicons/lexicon.py +0 -43
- arekit/contrib/utils/lexicons/relation.py +0 -45
- arekit/contrib/utils/lexicons/rusentilex.py +0 -34
- arekit/contrib/utils/nn/__init__.py +0 -0
- arekit/contrib/utils/nn/rows.py +0 -83
- arekit/contrib/utils/pipelines/items/sampling/bert.py +0 -5
- arekit/contrib/utils/pipelines/items/text/terms_splitter.py +0 -10
- arekit/contrib/utils/pipelines/items/to_output.py +0 -101
- arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/doc_ops.py +0 -27
- arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +0 -59
- arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +0 -60
- arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/doc_ops.py +0 -29
- arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +0 -59
- arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +0 -79
- arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/doc_ops.py +0 -56
- arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +0 -19
- arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +0 -58
- arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/doc_ops.py +0 -21
- arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +0 -100
- arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/doc_ops.py +0 -29
- arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +0 -62
- arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +0 -175
- arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +0 -50
- arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +0 -88
- arekit/contrib/utils/resources.py +0 -26
- arekit/contrib/utils/sources/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +0 -63
- arekit/contrib/utils/utils_folding.py +0 -19
- arekit/download_data.py +0 -11
- arekit-0.23.1.dist-info/METADATA +0 -23
- arekit-0.23.1.dist-info/RECORD +0 -403
- /arekit/common/{data/row_ids → docs}/__init__.py +0 -0
- /arekit/common/{folding → docs/parsed}/__init__.py +0 -0
- /arekit/common/{news → docs/parsed/providers}/__init__.py +0 -0
- /arekit/common/{news → docs}/parsed/term_position.py +0 -0
- /arekit/common/{news/parsed → service}/__init__.py +0 -0
- /arekit/{common/news/parsed/providers → contrib/utils/data/doc_provider}/__init__.py +0 -0
- {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/LICENSE +0 -0
- {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/top_level.txt +0 -0
arekit/common/pipeline/base.py
CHANGED
|
@@ -2,24 +2,20 @@ from arekit.common.pipeline.context import PipelineContext
|
|
|
2
2
|
from arekit.common.pipeline.items.base import BasePipelineItem
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
class
|
|
5
|
+
class BasePipelineLauncher:
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
@staticmethod
|
|
8
|
+
def run(pipeline, pipeline_ctx, src_key=None, has_input=True):
|
|
8
9
|
assert(isinstance(pipeline, list))
|
|
9
|
-
|
|
10
|
+
assert(isinstance(pipeline_ctx, PipelineContext))
|
|
11
|
+
assert(isinstance(src_key, str) or src_key is None)
|
|
10
12
|
|
|
11
|
-
|
|
12
|
-
assert(isinstance(params_dict, dict) or params_dict is None)
|
|
13
|
-
|
|
14
|
-
pipeline_ctx = PipelineContext(d=params_dict if params_dict is not None else dict(),
|
|
15
|
-
parent_ctx=parent_ctx)
|
|
16
|
-
|
|
17
|
-
for item in filter(lambda itm: itm is not None, self.__pipeline):
|
|
13
|
+
for ind, item in enumerate(filter(lambda itm: itm is not None, pipeline)):
|
|
18
14
|
assert(isinstance(item, BasePipelineItem))
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
15
|
+
do_force_key = src_key is not None and ind == 0
|
|
16
|
+
input_data = item.get_source(pipeline_ctx, force_key=src_key if do_force_key else None) \
|
|
17
|
+
if has_input or ind > 0 else None
|
|
18
|
+
item_result = item.apply(input_data=input_data, pipeline_ctx=pipeline_ctx)
|
|
19
|
+
pipeline_ctx.update(param=item.ResultKey, value=item_result, is_new_key=False)
|
|
22
20
|
|
|
23
|
-
|
|
24
|
-
assert(isinstance(item, BasePipelineItem))
|
|
25
|
-
self.__pipeline.append(item)
|
|
21
|
+
return pipeline_ctx
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from arekit.common.pipeline.context import PipelineContext
|
|
2
|
+
from arekit.common.pipeline.items.base import BasePipelineItem
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BatchingPipelineLauncher:
|
|
6
|
+
|
|
7
|
+
@staticmethod
|
|
8
|
+
def run(pipeline, pipeline_ctx, src_key=None):
|
|
9
|
+
assert(isinstance(pipeline, list))
|
|
10
|
+
assert(isinstance(pipeline_ctx, PipelineContext))
|
|
11
|
+
assert(isinstance(src_key, str) or src_key is None)
|
|
12
|
+
|
|
13
|
+
for ind, item in enumerate(filter(lambda itm: itm is not None, pipeline)):
|
|
14
|
+
assert (isinstance(item, BasePipelineItem))
|
|
15
|
+
|
|
16
|
+
# Handle the content of the batch or batch itself.
|
|
17
|
+
content = item.get_source(pipeline_ctx, call_func=False, force_key=src_key if ind == 0 else None)
|
|
18
|
+
handled_batch = [item._src_func(i) if item._src_func is not None else i for i in content]
|
|
19
|
+
|
|
20
|
+
if item.SupportBatching:
|
|
21
|
+
batch_result = list(item.apply(input_data=handled_batch, pipeline_ctx=pipeline_ctx))
|
|
22
|
+
else:
|
|
23
|
+
batch_result = [item.apply(input_data=input_data, pipeline_ctx=pipeline_ctx)
|
|
24
|
+
for input_data in handled_batch]
|
|
25
|
+
|
|
26
|
+
pipeline_ctx.update(param=item.ResultKey, value=batch_result, is_new_key=False)
|
|
27
|
+
|
|
28
|
+
return pipeline_ctx
|
|
@@ -13,6 +13,8 @@ class PipelineContext(object):
|
|
|
13
13
|
self._d[PARENT_CTX] = parent_ctx
|
|
14
14
|
|
|
15
15
|
def __provide(self, param):
|
|
16
|
+
if param not in self._d:
|
|
17
|
+
raise Exception(f"Key `{param}` is not in dictionary.\n{self._d}")
|
|
16
18
|
return self._d[param]
|
|
17
19
|
|
|
18
20
|
# region public
|
|
@@ -23,7 +25,9 @@ class PipelineContext(object):
|
|
|
23
25
|
def provide_or_none(self, param):
|
|
24
26
|
return self.__provide(param) if param in self._d else None
|
|
25
27
|
|
|
26
|
-
def update(self, param, value):
|
|
28
|
+
def update(self, param, value, is_new_key=False):
|
|
29
|
+
if is_new_key and param in self._d:
|
|
30
|
+
raise Exception(f"Key `{param}` is already presented in pipeline context dictionary.")
|
|
27
31
|
self._d[param] = value
|
|
28
32
|
|
|
29
33
|
# endregion
|
|
@@ -1,9 +1,46 @@
|
|
|
1
|
+
from arekit.common.pipeline.context import PipelineContext
|
|
2
|
+
|
|
3
|
+
|
|
1
4
|
class BasePipelineItem(object):
|
|
2
5
|
""" Single pipeline item that might be instatiated and embedded into pipeline.
|
|
3
6
|
"""
|
|
4
7
|
|
|
8
|
+
def __init__(self, src_key="result", result_key="result", src_func=None):
|
|
9
|
+
assert(isinstance(src_key, str) or src_key is None)
|
|
10
|
+
assert(callable(src_func) or src_func is None)
|
|
11
|
+
self.__src_key = src_key
|
|
12
|
+
self._src_func = src_func
|
|
13
|
+
self.__result_key = result_key
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def ResultKey(self):
|
|
17
|
+
return self.__result_key
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def SupportBatching(self):
|
|
21
|
+
""" By default pipeline item is not designed for batching.
|
|
22
|
+
"""
|
|
23
|
+
return False
|
|
24
|
+
|
|
25
|
+
def get_source(self, src_ctx, call_func=True, force_key=None):
|
|
26
|
+
""" Extract input element for processing.
|
|
27
|
+
"""
|
|
28
|
+
assert(isinstance(src_ctx, PipelineContext))
|
|
29
|
+
|
|
30
|
+
# If there is no information about key, then we consider absence of the source.
|
|
31
|
+
if self.__src_key is None:
|
|
32
|
+
return None
|
|
33
|
+
|
|
34
|
+
# Extracting actual source.
|
|
35
|
+
src_data = src_ctx.provide(self.__src_key if force_key is None else force_key)
|
|
36
|
+
if self._src_func is not None and call_func:
|
|
37
|
+
src_data = self._src_func(src_data)
|
|
38
|
+
|
|
39
|
+
return src_data
|
|
40
|
+
|
|
5
41
|
def apply_core(self, input_data, pipeline_ctx):
|
|
6
|
-
|
|
42
|
+
"""By default we do nothing."""
|
|
43
|
+
pass
|
|
7
44
|
|
|
8
45
|
def apply(self, input_data, pipeline_ctx=None):
|
|
9
46
|
""" Performs input processing an update it for a further pipeline items.
|
|
@@ -5,10 +5,14 @@ class FlattenIterPipelineItem(BasePipelineItem):
|
|
|
5
5
|
""" Considered to flat iterations of items that represent iterations.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
|
+
def __init__(self, **kwargs):
|
|
9
|
+
super(FlattenIterPipelineItem, self).__init__(**kwargs)
|
|
10
|
+
pass
|
|
11
|
+
|
|
8
12
|
def __flat_iter(self, iter_data):
|
|
9
13
|
for iter_item in iter_data:
|
|
10
14
|
for item in iter_item:
|
|
11
15
|
yield item
|
|
12
16
|
|
|
13
17
|
def apply_core(self, input_data, pipeline_ctx):
|
|
14
|
-
return self.__flat_iter(input_data)
|
|
18
|
+
return self.__flat_iter(input_data)
|
|
@@ -3,8 +3,9 @@ from arekit.common.pipeline.items.base import BasePipelineItem
|
|
|
3
3
|
|
|
4
4
|
class HandleIterPipelineItem(BasePipelineItem):
|
|
5
5
|
|
|
6
|
-
def __init__(self, handle_func=None):
|
|
6
|
+
def __init__(self, handle_func=None, **kwargs):
|
|
7
7
|
assert(callable(handle_func))
|
|
8
|
+
super(HandleIterPipelineItem, self).__init__(**kwargs)
|
|
8
9
|
self.__handle_func = handle_func
|
|
9
10
|
|
|
10
11
|
def __updated_data(self, items_iter):
|
|
@@ -3,8 +3,9 @@ from arekit.common.pipeline.items.base import BasePipelineItem
|
|
|
3
3
|
|
|
4
4
|
class FilterPipelineItem(BasePipelineItem):
|
|
5
5
|
|
|
6
|
-
def __init__(self, filter_func=None):
|
|
6
|
+
def __init__(self, filter_func=None, **kwargs):
|
|
7
7
|
assert(callable(filter_func))
|
|
8
|
+
super(FilterPipelineItem, self).__init__(**kwargs)
|
|
8
9
|
self.__filter_func = filter_func
|
|
9
10
|
|
|
10
11
|
def apply_core(self, input_data, pipeline_ctx):
|
|
@@ -3,8 +3,9 @@ from arekit.common.pipeline.items.base import BasePipelineItem
|
|
|
3
3
|
|
|
4
4
|
class MapPipelineItem(BasePipelineItem):
|
|
5
5
|
|
|
6
|
-
def __init__(self, map_func=None):
|
|
6
|
+
def __init__(self, map_func=None, **kwargs):
|
|
7
7
|
assert(callable(map_func))
|
|
8
|
+
super(MapPipelineItem, self).__init__(**kwargs)
|
|
8
9
|
self._map_func = map_func
|
|
9
10
|
|
|
10
11
|
def apply_core(self, input_data, pipeline_ctx):
|
|
@@ -9,5 +9,9 @@ class MapNestedPipelineItem(MapPipelineItem):
|
|
|
9
9
|
suppose to be mapped with the passed pipeline context.
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
+
def __init__(self, **kwargs):
|
|
13
|
+
super(MapNestedPipelineItem, self).__init__(**kwargs)
|
|
14
|
+
pass
|
|
15
|
+
|
|
12
16
|
def apply_core(self, input_data, pipeline_ctx):
|
|
13
17
|
return map(lambda item: self._map_func(item, pipeline_ctx), input_data)
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
class BatchIterator:
|
|
2
|
+
|
|
3
|
+
def __init__(self, data_iter, batch_size, end_value=None):
|
|
4
|
+
assert(isinstance(batch_size, int) and batch_size > 0)
|
|
5
|
+
assert(callable(end_value) or end_value is None)
|
|
6
|
+
self.__data_iter = data_iter
|
|
7
|
+
self.__index = 0
|
|
8
|
+
self.__batch_size = batch_size
|
|
9
|
+
self.__end_value = end_value
|
|
10
|
+
|
|
11
|
+
def __iter__(self):
|
|
12
|
+
return self
|
|
13
|
+
|
|
14
|
+
def __next__(self):
|
|
15
|
+
buffer = []
|
|
16
|
+
while True:
|
|
17
|
+
try:
|
|
18
|
+
data = next(self.__data_iter)
|
|
19
|
+
except StopIteration:
|
|
20
|
+
break
|
|
21
|
+
buffer.append(data)
|
|
22
|
+
if len(buffer) == self.__batch_size:
|
|
23
|
+
break
|
|
24
|
+
|
|
25
|
+
if len(buffer) > 0:
|
|
26
|
+
self.__index += 1
|
|
27
|
+
return buffer
|
|
28
|
+
|
|
29
|
+
if self.__end_value is None:
|
|
30
|
+
raise StopIteration
|
|
31
|
+
else:
|
|
32
|
+
return self.__end_value()
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import sqlite3
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class SQLiteProvider(object):
|
|
5
|
+
|
|
6
|
+
@staticmethod
|
|
7
|
+
def write(data_it, target, data2col_func, table_name, columns, sqlite3_column_types,
|
|
8
|
+
id_column_name="id", id_column_type="TEXT"):
|
|
9
|
+
assert(callable(data2col_func))
|
|
10
|
+
assert(isinstance(columns, list))
|
|
11
|
+
assert(isinstance(sqlite3_column_types, list))
|
|
12
|
+
assert(len(columns) == len(sqlite3_column_types))
|
|
13
|
+
|
|
14
|
+
with sqlite3.connect(target) as con:
|
|
15
|
+
cur = con.cursor()
|
|
16
|
+
|
|
17
|
+
# Provide the ID column.
|
|
18
|
+
columns = [id_column_name] + columns
|
|
19
|
+
sqlite3_column_types = [id_column_type] + sqlite3_column_types
|
|
20
|
+
|
|
21
|
+
# Compose the whole columns list.
|
|
22
|
+
content = ", ".join([" ".join(item) for item in zip(columns, sqlite3_column_types)])
|
|
23
|
+
cur.execute(f"CREATE TABLE IF NOT EXISTS {table_name}({content})")
|
|
24
|
+
cur.execute(f"CREATE INDEX IF NOT EXISTS i_id ON {table_name}({id_column_name})")
|
|
25
|
+
|
|
26
|
+
for uid, data in data_it:
|
|
27
|
+
r = cur.execute(f"SELECT EXISTS(SELECT 1 FROM {table_name} WHERE {id_column_name}='{uid}');")
|
|
28
|
+
ans = r.fetchone()[0]
|
|
29
|
+
if ans == 1:
|
|
30
|
+
continue
|
|
31
|
+
|
|
32
|
+
params = ", ".join(tuple(['?'] * (len(columns))))
|
|
33
|
+
cur.execute(f"INSERT INTO {table_name} VALUES ({params})", [str(uid)] + data2col_func(data))
|
|
34
|
+
con.commit()
|
|
35
|
+
|
|
36
|
+
cur.close()
|
arekit/common/synonyms/base.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
from collections.abc import Iterable
|
|
2
2
|
|
|
3
3
|
from arekit.common import log_utils
|
|
4
4
|
|
|
@@ -12,7 +12,7 @@ class SynonymsCollection(object):
|
|
|
12
12
|
debug: bool
|
|
13
13
|
utilized for logging the salient information during usage.
|
|
14
14
|
"""
|
|
15
|
-
assert(isinstance(iter_group_values_lists,
|
|
15
|
+
assert(isinstance(iter_group_values_lists, Iterable) or iter_group_values_lists is None)
|
|
16
16
|
assert(isinstance(is_read_only, bool))
|
|
17
17
|
assert(isinstance(debug, bool))
|
|
18
18
|
|
|
@@ -1,28 +1,34 @@
|
|
|
1
|
-
import
|
|
1
|
+
from collections.abc import Iterable
|
|
2
2
|
|
|
3
3
|
from arekit.common.bound import Bound
|
|
4
|
-
from arekit.common.text.partitioning.base import BasePartitioning
|
|
5
4
|
|
|
6
5
|
|
|
7
|
-
class
|
|
6
|
+
class Partitioning(object):
|
|
8
7
|
""" NOTE: considering that provided parts
|
|
9
8
|
has no intersections between each other
|
|
10
9
|
"""
|
|
11
10
|
|
|
11
|
+
list_reg_types = {
|
|
12
|
+
"str": lambda p, item: p.append(item),
|
|
13
|
+
"list": lambda p, item: p.extend(item)
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
def __init__(self, text_fmt):
|
|
17
|
+
assert(isinstance(text_fmt, str) and text_fmt in self.list_reg_types)
|
|
18
|
+
self.__reg_part = self.list_reg_types[text_fmt]
|
|
19
|
+
|
|
12
20
|
def provide(self, text, parts_it):
|
|
13
|
-
assert(isinstance(
|
|
14
|
-
assert(isinstance(parts_it, collections.Iterable))
|
|
21
|
+
assert(isinstance(parts_it, Iterable))
|
|
15
22
|
|
|
16
|
-
start = 0
|
|
17
23
|
parts = []
|
|
24
|
+
start = 0
|
|
25
|
+
|
|
18
26
|
for value, bound in parts_it:
|
|
19
27
|
assert(isinstance(bound, Bound))
|
|
20
28
|
assert(bound.Position >= start)
|
|
21
29
|
|
|
22
30
|
# Release everything till the current value position.
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
parts.append(part)
|
|
31
|
+
self.__reg_part(p=parts, item=text[start:bound.Position])
|
|
26
32
|
|
|
27
33
|
# Release the entity value.
|
|
28
34
|
parts.extend([value])
|
|
@@ -30,7 +36,6 @@ class StringPartitioning(BasePartitioning):
|
|
|
30
36
|
start = bound.Position + bound.Length
|
|
31
37
|
|
|
32
38
|
# Release everything after the last entity.
|
|
33
|
-
|
|
34
|
-
parts.extend([last_part])
|
|
39
|
+
self.__reg_part(p=parts, item=text[start:len(text)])
|
|
35
40
|
|
|
36
41
|
return parts
|
|
@@ -3,7 +3,7 @@ from arekit.common.labels.base import Label
|
|
|
3
3
|
|
|
4
4
|
class TextOpinion(object):
|
|
5
5
|
"""
|
|
6
|
-
Represents a relation which were found in
|
|
6
|
+
Represents a relation which were found in doc article
|
|
7
7
|
and composed between two named entities
|
|
8
8
|
(it was found especially by Opinion with predefined label)
|
|
9
9
|
allows to modify label using set_label
|
|
@@ -31,28 +31,28 @@ class TextOpinion(object):
|
|
|
31
31
|
return cls.__try_create_copy_core(other=other, keep_text_opinion_id=keep_text_opinion_id)
|
|
32
32
|
|
|
33
33
|
@staticmethod
|
|
34
|
-
def try_convert(other,
|
|
34
|
+
def try_convert(other, convert_entity_id_func):
|
|
35
35
|
""" Creates a copy of `other` opinion with different id of opinion participants.
|
|
36
|
-
Use cases: required for
|
|
36
|
+
Use cases: required for BaseParsedDocumentServiceProvider, when we decided to bring the outside
|
|
37
37
|
opinion into one which is based on DocumentEntities.
|
|
38
38
|
"""
|
|
39
39
|
assert(isinstance(other, TextOpinion))
|
|
40
|
-
assert(callable(
|
|
40
|
+
assert(callable(convert_entity_id_func))
|
|
41
41
|
return TextOpinion.__try_create_copy_core(other=other,
|
|
42
|
-
|
|
42
|
+
convert_entity_id_func=convert_entity_id_func,
|
|
43
43
|
keep_text_opinion_id=False)
|
|
44
44
|
|
|
45
45
|
@staticmethod
|
|
46
|
-
def __try_create_copy_core(other,
|
|
46
|
+
def __try_create_copy_core(other, convert_entity_id_func=lambda part_id: part_id, keep_text_opinion_id=True):
|
|
47
47
|
""" Tries to compose a copy by considering an optional id conversion,
|
|
48
48
|
and identification keeping.
|
|
49
49
|
convert_id:
|
|
50
50
|
func(id) -> id
|
|
51
51
|
"""
|
|
52
|
-
assert(callable(
|
|
52
|
+
assert(callable(convert_entity_id_func))
|
|
53
53
|
|
|
54
|
-
source_id =
|
|
55
|
-
target_id =
|
|
54
|
+
source_id = convert_entity_id_func(other.SourceId)
|
|
55
|
+
target_id = convert_entity_id_func(other.TargetId)
|
|
56
56
|
|
|
57
57
|
if source_id is None or target_id is None:
|
|
58
58
|
return None
|
|
@@ -61,7 +61,7 @@ class TextOpinion(object):
|
|
|
61
61
|
text_opinion_id=other.__text_opinion_id if keep_text_opinion_id else None,
|
|
62
62
|
source_id=source_id,
|
|
63
63
|
target_id=target_id,
|
|
64
|
-
label=other.
|
|
64
|
+
label=other.Label)
|
|
65
65
|
|
|
66
66
|
def __set_label_core(self, label):
|
|
67
67
|
assert(isinstance(label, Label))
|
|
@@ -72,7 +72,7 @@ class TextOpinion(object):
|
|
|
72
72
|
# region properties
|
|
73
73
|
|
|
74
74
|
@property
|
|
75
|
-
def
|
|
75
|
+
def Label(self):
|
|
76
76
|
return self.__modifiable_label
|
|
77
77
|
|
|
78
78
|
@property
|
arekit/common/utils.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
import os
|
|
3
|
-
import requests
|
|
4
3
|
from tqdm import tqdm
|
|
5
4
|
|
|
6
5
|
|
|
@@ -35,7 +34,38 @@ def progress_bar(iterable, total, desc="", unit="it"):
|
|
|
35
34
|
return progress_bar_iter(iterable=iterable, desc=desc, unit=unit)
|
|
36
35
|
|
|
37
36
|
|
|
38
|
-
def
|
|
37
|
+
def progress_bar_conditional(iterable, condition_func, total, postfix_func=None, desc="", unit="it"):
|
|
38
|
+
""" This progress-bar updates only on the
|
|
39
|
+
specific conditions during the iteration process.
|
|
40
|
+
"""
|
|
41
|
+
assert(callable(condition_func))
|
|
42
|
+
assert(callable(postfix_func) or postfix_func is None)
|
|
43
|
+
|
|
44
|
+
# We consider artificial function that always iters 0.
|
|
45
|
+
def __iter_infinite_placeholder():
|
|
46
|
+
while True:
|
|
47
|
+
yield 0
|
|
48
|
+
|
|
49
|
+
pbar_it = progress_bar(iterable=__iter_infinite_placeholder(),
|
|
50
|
+
desc=desc, unit=unit, total=total)
|
|
51
|
+
element = iter(pbar_it)
|
|
52
|
+
|
|
53
|
+
# Initialize with 0.
|
|
54
|
+
next(element)
|
|
55
|
+
|
|
56
|
+
for item in iterable:
|
|
57
|
+
|
|
58
|
+
# Optionally Update progress bar with the next state.
|
|
59
|
+
if condition_func(item):
|
|
60
|
+
next(element)
|
|
61
|
+
yield item
|
|
62
|
+
|
|
63
|
+
# Optionally provide meta-information.
|
|
64
|
+
if postfix_func is not None:
|
|
65
|
+
pbar_it.set_postfix(postfix_func(item))
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def progress_bar_defined(iterable, total, miniters=200, desc="", unit="it"):
|
|
39
69
|
return tqdm(iterable=iterable,
|
|
40
70
|
total=total,
|
|
41
71
|
desc=desc,
|
|
@@ -43,7 +73,7 @@ def progress_bar_defined(iterable, total, desc="", unit="it"):
|
|
|
43
73
|
position=0,
|
|
44
74
|
leave=True,
|
|
45
75
|
unit=unit,
|
|
46
|
-
miniters=total /
|
|
76
|
+
miniters=total / miniters if total is not None else total)
|
|
47
77
|
|
|
48
78
|
|
|
49
79
|
def progress_bar_iter(iterable, desc="", unit='it'):
|
|
@@ -53,46 +83,3 @@ def progress_bar_iter(iterable, desc="", unit='it'):
|
|
|
53
83
|
leave=True,
|
|
54
84
|
ncols=120,
|
|
55
85
|
unit=unit)
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
def get_default_download_dir():
|
|
59
|
-
""" Refered to NLTK toolkit approach
|
|
60
|
-
https://github.com/nltk/nltk/blob/8e771679cee1b4a9540633cc3ea17f4421ffd6c0/nltk/downloader.py#L1051
|
|
61
|
-
"""
|
|
62
|
-
|
|
63
|
-
# On Windows, use %APPDATA%
|
|
64
|
-
if sys.platform == "win32" and "APPDATA" in os.environ:
|
|
65
|
-
homedir = os.environ["APPDATA"]
|
|
66
|
-
|
|
67
|
-
# Otherwise, install in the user's home directory.
|
|
68
|
-
else:
|
|
69
|
-
homedir = os.path.expanduser("~/")
|
|
70
|
-
if homedir == "~/":
|
|
71
|
-
raise ValueError("Could not find a default download directory")
|
|
72
|
-
|
|
73
|
-
return os.path.join(homedir, ".arekit")
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
def download(dest_file_path, source_url):
|
|
77
|
-
""" Refered to https://github.com/nicolay-r/ner-bilstm-crf-tensorflow/blob/master/ner/utils.py
|
|
78
|
-
Simple http file downloader
|
|
79
|
-
"""
|
|
80
|
-
print(('Downloading from {src} to {dest}'.format(src=source_url, dest=dest_file_path)))
|
|
81
|
-
|
|
82
|
-
sys.stdout.flush()
|
|
83
|
-
datapath = os.path.dirname(dest_file_path)
|
|
84
|
-
|
|
85
|
-
if not os.path.exists(datapath):
|
|
86
|
-
os.makedirs(datapath, mode=0o755)
|
|
87
|
-
|
|
88
|
-
dest_file_path = os.path.abspath(dest_file_path)
|
|
89
|
-
|
|
90
|
-
r = requests.get(source_url, stream=True)
|
|
91
|
-
total_length = int(r.headers.get('content-length', 0))
|
|
92
|
-
|
|
93
|
-
with open(dest_file_path, 'wb') as f:
|
|
94
|
-
pbar = tqdm(total=total_length, unit='B', unit_scale=True)
|
|
95
|
-
for chunk in r.iter_content(chunk_size=32 * 1024):
|
|
96
|
-
if chunk: # filter out keep-alive new chunks
|
|
97
|
-
pbar.update(len(chunk))
|
|
98
|
-
f.write(chunk)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import
|
|
1
|
+
from collections.abc import Iterable
|
|
2
2
|
import numpy as np
|
|
3
3
|
|
|
4
4
|
|
|
@@ -31,7 +31,7 @@ class Embedding(object):
|
|
|
31
31
|
|
|
32
32
|
@classmethod
|
|
33
33
|
def from_word_embedding_pairs_iter(cls, word_embedding_pairs):
|
|
34
|
-
assert(isinstance(word_embedding_pairs,
|
|
34
|
+
assert(isinstance(word_embedding_pairs, Iterable))
|
|
35
35
|
|
|
36
36
|
matrix = []
|
|
37
37
|
words = []
|
|
@@ -51,7 +51,7 @@ class Embedding(object):
|
|
|
51
51
|
|
|
52
52
|
@classmethod
|
|
53
53
|
def from_list_with_embedding_func(cls, words_iter, embedding_func):
|
|
54
|
-
assert(isinstance(words_iter,
|
|
54
|
+
assert(isinstance(words_iter, Iterable))
|
|
55
55
|
assert(callable(embedding_func))
|
|
56
56
|
|
|
57
57
|
matrix = []
|
|
@@ -2,17 +2,17 @@ class BaseEmbeddingIO(object):
|
|
|
2
2
|
""" API for loading and saving embedding and vocabulary related data.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
def save_vocab(self, data
|
|
5
|
+
def save_vocab(self, data):
|
|
6
6
|
raise NotImplementedError()
|
|
7
7
|
|
|
8
|
-
def load_vocab(self,
|
|
8
|
+
def load_vocab(self,):
|
|
9
9
|
raise NotImplementedError()
|
|
10
10
|
|
|
11
|
-
def save_embedding(self, data
|
|
11
|
+
def save_embedding(self, data):
|
|
12
12
|
raise NotImplementedError()
|
|
13
13
|
|
|
14
|
-
def load_embedding(self
|
|
14
|
+
def load_embedding(self):
|
|
15
15
|
raise NotImplementedError()
|
|
16
16
|
|
|
17
|
-
def check_targets_existed(self
|
|
17
|
+
def check_targets_existed(self):
|
|
18
18
|
raise NotImplementedError()
|
|
@@ -5,10 +5,11 @@ from arekit.common.data.input.providers.rows.samples import BaseSampleRowProvide
|
|
|
5
5
|
from arekit.common.entities.base import Entity
|
|
6
6
|
from arekit.common.frames.text_variant import TextFrameVariant
|
|
7
7
|
from arekit.common.labels.scaler.sentiment import SentimentLabelScaler
|
|
8
|
-
from arekit.common.
|
|
8
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
9
9
|
from arekit.contrib.networks.input.formatters.pos_mapper import PosTermsMapper
|
|
10
10
|
from arekit.contrib.networks.input import const
|
|
11
11
|
from arekit.contrib.networks.input.providers.term_connotation import extract_uint_frame_variant_connotation
|
|
12
|
+
from arekit.contrib.networks.input.rows_parser import create_nn_val_writer_fmt
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class NetworkSampleRowProvider(BaseSampleRowProvider):
|
|
@@ -36,6 +37,7 @@ class NetworkSampleRowProvider(BaseSampleRowProvider):
|
|
|
36
37
|
self.__frame_role_label_scaler = frame_role_label_scaler
|
|
37
38
|
self.__pos_terms_mapper = pos_terms_mapper
|
|
38
39
|
self.__term_embedding_pairs = term_embedding_pairs
|
|
40
|
+
self.__nn_val_fmt = create_nn_val_writer_fmt(fmt_type="writer")
|
|
39
41
|
|
|
40
42
|
@property
|
|
41
43
|
def HasEmbeddingPairs(self):
|
|
@@ -57,21 +59,21 @@ class NetworkSampleRowProvider(BaseSampleRowProvider):
|
|
|
57
59
|
self.__term_embedding_pairs.clear()
|
|
58
60
|
|
|
59
61
|
def _fill_row_core(self, row, text_opinion_linkage, index_in_linked, etalon_label,
|
|
60
|
-
|
|
61
|
-
assert(isinstance(
|
|
62
|
+
parsed_doc, sentence_ind, s_ind, t_ind):
|
|
63
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
62
64
|
|
|
63
65
|
super(NetworkSampleRowProvider, self)._fill_row_core(
|
|
64
66
|
row=row,
|
|
65
67
|
text_opinion_linkage=text_opinion_linkage,
|
|
66
68
|
index_in_linked=index_in_linked,
|
|
67
69
|
etalon_label=etalon_label,
|
|
68
|
-
|
|
70
|
+
parsed_doc=parsed_doc,
|
|
69
71
|
sentence_ind=sentence_ind,
|
|
70
72
|
s_ind=s_ind, t_ind=t_ind)
|
|
71
73
|
|
|
72
74
|
# Extracting list of terms, utilized in further.
|
|
73
75
|
terms_iter, actual_s_ind, actual_t_ind = self._provide_sentence_terms(
|
|
74
|
-
|
|
76
|
+
parsed_doc=parsed_doc, sentence_ind=sentence_ind, s_ind=s_ind, t_ind=t_ind)
|
|
75
77
|
terms = list(terms_iter)
|
|
76
78
|
|
|
77
79
|
# Compose frame indices.
|
|
@@ -85,30 +87,18 @@ class NetworkSampleRowProvider(BaseSampleRowProvider):
|
|
|
85
87
|
three_label_scaler=self.__frame_role_label_scaler),
|
|
86
88
|
[terms[frame_ind] for frame_ind in uint_frame_inds]))
|
|
87
89
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
+
vm = {
|
|
91
|
+
const.FrameVariantIndices: uint_frame_inds,
|
|
92
|
+
const.FrameConnotations: uint_frame_connotations,
|
|
93
|
+
const.SynonymSubject: self.__create_synonyms_set(terms=terms, term_ind=actual_s_ind),
|
|
94
|
+
const.SynonymObject: self.__create_synonyms_set(terms=terms, term_ind=actual_t_ind),
|
|
95
|
+
const.PosTags: None if self.__pos_terms_mapper is None else [int(pos_tag) for pos_tag in self.__pos_terms_mapper.iter_mapped(terms)]
|
|
96
|
+
}
|
|
90
97
|
|
|
91
|
-
|
|
92
|
-
uint_syn_t_inds = self.__create_synonyms_set(terms=terms, term_ind=actual_t_ind)
|
|
93
|
-
|
|
94
|
-
# Part of speech tags
|
|
95
|
-
pos_int_tags = None if self.__pos_terms_mapper is None \
|
|
96
|
-
else [int(pos_tag) for pos_tag in self.__pos_terms_mapper.iter_mapped(terms)]
|
|
97
|
-
|
|
98
|
-
# Saving.
|
|
99
|
-
row[const.FrameVariantIndices] = self.__to_arg(uint_frame_inds)
|
|
100
|
-
row[const.FrameConnotations] = self.__to_arg(uint_frame_connotations)
|
|
101
|
-
row[const.SynonymSubject] = self.__to_arg(uint_syn_s_inds)
|
|
102
|
-
row[const.SynonymObject] = self.__to_arg(uint_syn_t_inds)
|
|
103
|
-
if pos_int_tags is not None:
|
|
104
|
-
row[const.PosTags] = self.__to_arg(pos_int_tags)
|
|
98
|
+
self._apply_row_data(row=row, vm=vm, val_fmt=self.__nn_val_fmt)
|
|
105
99
|
|
|
106
100
|
# region private methods
|
|
107
101
|
|
|
108
|
-
@staticmethod
|
|
109
|
-
def __is_entity(t):
|
|
110
|
-
return isinstance(t, Entity)
|
|
111
|
-
|
|
112
102
|
def __create_synonyms_set(self, terms, term_ind):
|
|
113
103
|
entity = terms[term_ind]
|
|
114
104
|
assert(isinstance(entity, Entity))
|
|
@@ -136,8 +126,4 @@ class NetworkSampleRowProvider(BaseSampleRowProvider):
|
|
|
136
126
|
return False
|
|
137
127
|
return term.GroupIndex == group_ind
|
|
138
128
|
|
|
139
|
-
@staticmethod
|
|
140
|
-
def __to_arg(inds_iter):
|
|
141
|
-
return const.ArgsSep.join([str(i) for i in inds_iter])
|
|
142
|
-
|
|
143
129
|
# endregion
|