arekit 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arekit/__init__.py +0 -0
- arekit/common/__init__.py +0 -0
- arekit/common/bound.py +48 -0
- arekit/common/context/__init__.py +0 -0
- arekit/common/context/terms_mapper.py +51 -0
- arekit/common/context/token.py +16 -0
- arekit/common/data/__init__.py +0 -0
- arekit/common/data/const.py +21 -0
- arekit/common/data/doc_provider.py +6 -0
- arekit/common/data/input/__init__.py +0 -0
- arekit/common/data/input/providers/__init__.py +0 -0
- arekit/common/data/input/providers/columns/__init__.py +0 -0
- arekit/common/data/input/providers/columns/base.py +9 -0
- arekit/common/data/input/providers/columns/sample.py +59 -0
- arekit/common/data/input/providers/const.py +3 -0
- arekit/common/data/input/providers/contents.py +9 -0
- arekit/common/data/input/providers/instances/__init__.py +0 -0
- arekit/common/data/input/providers/instances/base.py +14 -0
- arekit/common/data/input/providers/instances/multiple.py +27 -0
- arekit/common/data/input/providers/instances/single.py +8 -0
- arekit/common/data/input/providers/label/__init__.py +0 -0
- arekit/common/data/input/providers/label/base.py +24 -0
- arekit/common/data/input/providers/label/binary.py +11 -0
- arekit/common/data/input/providers/label/multiple.py +15 -0
- arekit/common/data/input/providers/rows/__init__.py +0 -0
- arekit/common/data/input/providers/rows/base.py +64 -0
- arekit/common/data/input/providers/rows/samples.py +227 -0
- arekit/common/data/input/providers/sample/__init__.py +0 -0
- arekit/common/data/input/providers/sample/cropped.py +43 -0
- arekit/common/data/input/providers/text/__init__.py +0 -0
- arekit/common/data/input/providers/text/single.py +49 -0
- arekit/common/data/input/repositories/__init__.py +0 -0
- arekit/common/data/input/repositories/base.py +68 -0
- arekit/common/data/input/repositories/sample.py +22 -0
- arekit/common/data/input/sample.py +66 -0
- arekit/common/data/input/terms_mapper.py +88 -0
- arekit/common/data/rows_fmt.py +82 -0
- arekit/common/data/rows_parser.py +43 -0
- arekit/common/data/storages/__init__.py +0 -0
- arekit/common/data/storages/base.py +109 -0
- arekit/common/data/views/__init__.py +0 -0
- arekit/common/data/views/samples.py +26 -0
- arekit/common/docs/__init__.py +0 -0
- arekit/common/docs/base.py +30 -0
- arekit/common/docs/entities_grouping.py +16 -0
- arekit/common/docs/entity.py +18 -0
- arekit/common/docs/objects_parser.py +37 -0
- arekit/common/docs/parsed/__init__.py +0 -0
- arekit/common/docs/parsed/base.py +101 -0
- arekit/common/docs/parsed/providers/__init__.py +0 -0
- arekit/common/docs/parsed/providers/base.py +68 -0
- arekit/common/docs/parsed/providers/base_pairs.py +51 -0
- arekit/common/docs/parsed/providers/entity_service.py +175 -0
- arekit/common/docs/parsed/providers/opinion_pairs.py +20 -0
- arekit/common/docs/parsed/providers/text_opinion_pairs.py +78 -0
- arekit/common/docs/parsed/service.py +31 -0
- arekit/common/docs/parsed/term_position.py +42 -0
- arekit/common/docs/parser.py +34 -0
- arekit/common/docs/sentence.py +14 -0
- arekit/common/entities/__init__.py +0 -0
- arekit/common/entities/base.py +51 -0
- arekit/common/entities/collection.py +72 -0
- arekit/common/entities/str_fmt.py +8 -0
- arekit/common/entities/types.py +9 -0
- arekit/common/experiment/__init__.py +0 -0
- arekit/common/experiment/api/__init__.py +0 -0
- arekit/common/experiment/api/base_samples_io.py +20 -0
- arekit/common/experiment/data_type.py +17 -0
- arekit/common/frames/__init__.py +0 -0
- arekit/common/frames/connotations/__init__.py +0 -0
- arekit/common/frames/connotations/descriptor.py +17 -0
- arekit/common/frames/connotations/provider.py +4 -0
- arekit/common/frames/text_variant.py +43 -0
- arekit/common/frames/variants/__init__.py +0 -0
- arekit/common/frames/variants/base.py +21 -0
- arekit/common/frames/variants/collection.py +60 -0
- arekit/common/labels/__init__.py +0 -0
- arekit/common/labels/base.py +19 -0
- arekit/common/labels/provider/__init__.py +0 -0
- arekit/common/labels/provider/base.py +7 -0
- arekit/common/labels/provider/constant.py +14 -0
- arekit/common/labels/scaler/__init__.py +0 -0
- arekit/common/labels/scaler/base.py +85 -0
- arekit/common/labels/scaler/sentiment.py +7 -0
- arekit/common/labels/scaler/single.py +10 -0
- arekit/common/labels/str_fmt.py +55 -0
- arekit/common/linkage/__init__.py +0 -0
- arekit/common/linkage/base.py +44 -0
- arekit/common/linkage/meta.py +23 -0
- arekit/common/linkage/opinions.py +9 -0
- arekit/common/linkage/text_opinions.py +22 -0
- arekit/common/log_utils.py +29 -0
- arekit/common/model/__init__.py +0 -0
- arekit/common/model/labeling/__init__.py +0 -0
- arekit/common/model/labeling/base.py +24 -0
- arekit/common/model/labeling/modes.py +8 -0
- arekit/common/model/labeling/single.py +24 -0
- arekit/common/opinions/__init__.py +0 -0
- arekit/common/opinions/annot/__init__.py +0 -0
- arekit/common/opinions/annot/algo/__init__.py +0 -0
- arekit/common/opinions/annot/algo/base.py +4 -0
- arekit/common/opinions/annot/algo/pair_based.py +99 -0
- arekit/common/opinions/annot/algo/predefined.py +16 -0
- arekit/common/opinions/annot/algo_based.py +55 -0
- arekit/common/opinions/annot/base.py +15 -0
- arekit/common/opinions/base.py +74 -0
- arekit/common/opinions/collection.py +150 -0
- arekit/common/opinions/enums.py +6 -0
- arekit/common/opinions/provider.py +4 -0
- arekit/common/opinions/writer.py +4 -0
- arekit/common/pipeline/__init__.py +0 -0
- arekit/common/pipeline/base.py +25 -0
- arekit/common/pipeline/context.py +36 -0
- arekit/common/pipeline/conts.py +2 -0
- arekit/common/pipeline/items/__init__.py +0 -0
- arekit/common/pipeline/items/base.py +12 -0
- arekit/common/pipeline/items/flatten.py +14 -0
- arekit/common/pipeline/items/handle.py +17 -0
- arekit/common/pipeline/items/iter.py +11 -0
- arekit/common/pipeline/items/map.py +11 -0
- arekit/common/pipeline/items/map_nested.py +13 -0
- arekit/common/synonyms/__init__.py +0 -0
- arekit/common/synonyms/base.py +151 -0
- arekit/common/synonyms/grouping.py +21 -0
- arekit/common/text/__init__.py +0 -0
- arekit/common/text/enums.py +12 -0
- arekit/common/text/parsed.py +42 -0
- arekit/common/text/parser.py +12 -0
- arekit/common/text/partitioning/__init__.py +0 -0
- arekit/common/text/partitioning/base.py +4 -0
- arekit/common/text/partitioning/str.py +36 -0
- arekit/common/text/partitioning/terms.py +35 -0
- arekit/common/text/stemmer.py +16 -0
- arekit/common/text_opinions/__init__.py +0 -0
- arekit/common/text_opinions/base.py +105 -0
- arekit/common/utils.py +129 -0
- arekit/contrib/__init__.py +0 -0
- arekit/contrib/bert/__init__.py +0 -0
- arekit/contrib/bert/input/__init__.py +0 -0
- arekit/contrib/bert/input/providers/__init__.py +0 -0
- arekit/contrib/bert/input/providers/cropped_sample.py +17 -0
- arekit/contrib/bert/input/providers/text_pair.py +62 -0
- arekit/contrib/bert/terms/__init__.py +0 -0
- arekit/contrib/bert/terms/mapper.py +20 -0
- arekit/contrib/networks/__init__.py +0 -0
- arekit/contrib/networks/embedding.py +149 -0
- arekit/contrib/networks/embedding_io.py +18 -0
- arekit/contrib/networks/input/__init__.py +0 -0
- arekit/contrib/networks/input/const.py +6 -0
- arekit/contrib/networks/input/ctx_serialization.py +28 -0
- arekit/contrib/networks/input/embedding/__init__.py +0 -0
- arekit/contrib/networks/input/embedding/matrix.py +29 -0
- arekit/contrib/networks/input/embedding/offsets.py +55 -0
- arekit/contrib/networks/input/formatters/__init__.py +0 -0
- arekit/contrib/networks/input/formatters/pos_mapper.py +22 -0
- arekit/contrib/networks/input/providers/__init__.py +0 -0
- arekit/contrib/networks/input/providers/sample.py +129 -0
- arekit/contrib/networks/input/providers/term_connotation.py +23 -0
- arekit/contrib/networks/input/providers/text.py +24 -0
- arekit/contrib/networks/input/rows_parser.py +47 -0
- arekit/contrib/networks/input/term_types.py +13 -0
- arekit/contrib/networks/input/terms_mapping.py +60 -0
- arekit/contrib/networks/vectorizer.py +6 -0
- arekit/contrib/prompt/__init__.py +0 -0
- arekit/contrib/prompt/sample.py +61 -0
- arekit/contrib/source/__init__.py +0 -0
- arekit/contrib/source/brat/__init__.py +0 -0
- arekit/contrib/source/brat/annot.py +84 -0
- arekit/contrib/source/brat/doc.py +28 -0
- arekit/contrib/source/brat/entities/__init__.py +0 -0
- arekit/contrib/source/brat/entities/compound.py +13 -0
- arekit/contrib/source/brat/entities/entity.py +42 -0
- arekit/contrib/source/brat/entities/parser.py +53 -0
- arekit/contrib/source/brat/opinions/__init__.py +0 -0
- arekit/contrib/source/brat/opinions/converter.py +19 -0
- arekit/contrib/source/brat/relation.py +32 -0
- arekit/contrib/source/brat/sentence.py +69 -0
- arekit/contrib/source/brat/sentences_reader.py +128 -0
- arekit/contrib/source/download.py +41 -0
- arekit/contrib/source/nerel/__init__.py +0 -0
- arekit/contrib/source/nerel/entities.py +55 -0
- arekit/contrib/source/nerel/folding/__init__.py +0 -0
- arekit/contrib/source/nerel/folding/fixed.py +74 -0
- arekit/contrib/source/nerel/io_utils.py +62 -0
- arekit/contrib/source/nerel/labels.py +241 -0
- arekit/contrib/source/nerel/reader.py +46 -0
- arekit/contrib/source/nerel/utils.py +24 -0
- arekit/contrib/source/nerel/versions.py +12 -0
- arekit/contrib/source/nerelbio/__init__.py +0 -0
- arekit/contrib/source/nerelbio/io_utils.py +62 -0
- arekit/contrib/source/nerelbio/labels.py +265 -0
- arekit/contrib/source/nerelbio/reader.py +8 -0
- arekit/contrib/source/nerelbio/versions.py +8 -0
- arekit/contrib/source/ruattitudes/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/collection.py +36 -0
- arekit/contrib/source/ruattitudes/doc.py +51 -0
- arekit/contrib/source/ruattitudes/doc_brat.py +44 -0
- arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/entity/parser.py +7 -0
- arekit/contrib/source/ruattitudes/io_utils.py +56 -0
- arekit/contrib/source/ruattitudes/labels_fmt.py +12 -0
- arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/opinions/base.py +28 -0
- arekit/contrib/source/ruattitudes/opinions/converter.py +37 -0
- arekit/contrib/source/ruattitudes/reader.py +268 -0
- arekit/contrib/source/ruattitudes/sentence.py +73 -0
- arekit/contrib/source/ruattitudes/synonyms.py +17 -0
- arekit/contrib/source/ruattitudes/text_object.py +59 -0
- arekit/contrib/source/rusentiframes/__init__.py +0 -0
- arekit/contrib/source/rusentiframes/collection.py +157 -0
- arekit/contrib/source/rusentiframes/effect.py +24 -0
- arekit/contrib/source/rusentiframes/io_utils.py +19 -0
- arekit/contrib/source/rusentiframes/labels_fmt.py +22 -0
- arekit/contrib/source/rusentiframes/polarity.py +35 -0
- arekit/contrib/source/rusentiframes/role.py +15 -0
- arekit/contrib/source/rusentiframes/state.py +24 -0
- arekit/contrib/source/rusentiframes/types.py +42 -0
- arekit/contrib/source/rusentiframes/value.py +2 -0
- arekit/contrib/source/rusentrel/__init__.py +0 -0
- arekit/contrib/source/rusentrel/const.py +3 -0
- arekit/contrib/source/rusentrel/docs_reader.py +51 -0
- arekit/contrib/source/rusentrel/entities.py +26 -0
- arekit/contrib/source/rusentrel/io_utils.py +125 -0
- arekit/contrib/source/rusentrel/labels_fmt.py +12 -0
- arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
- arekit/contrib/source/rusentrel/opinions/collection.py +30 -0
- arekit/contrib/source/rusentrel/opinions/converter.py +40 -0
- arekit/contrib/source/rusentrel/opinions/provider.py +54 -0
- arekit/contrib/source/rusentrel/opinions/writer.py +42 -0
- arekit/contrib/source/rusentrel/synonyms.py +17 -0
- arekit/contrib/source/sentinerel/__init__.py +0 -0
- arekit/contrib/source/sentinerel/entities.py +52 -0
- arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
- arekit/contrib/source/sentinerel/folding/factory.py +31 -0
- arekit/contrib/source/sentinerel/folding/fixed.py +70 -0
- arekit/contrib/source/sentinerel/io_utils.py +87 -0
- arekit/contrib/source/sentinerel/labels.py +53 -0
- arekit/contrib/source/sentinerel/labels_scaler.py +30 -0
- arekit/contrib/source/sentinerel/reader.py +42 -0
- arekit/contrib/source/synonyms/__init__.py +0 -0
- arekit/contrib/source/synonyms/utils.py +19 -0
- arekit/contrib/source/zip_utils.py +47 -0
- arekit/contrib/utils/__init__.py +0 -0
- arekit/contrib/utils/bert/__init__.py +0 -0
- arekit/contrib/utils/bert/samplers.py +17 -0
- arekit/contrib/utils/connotations/__init__.py +0 -0
- arekit/contrib/utils/connotations/rusentiframes_sentiment.py +23 -0
- arekit/contrib/utils/data/__init__.py +0 -0
- arekit/contrib/utils/data/contents/__init__.py +0 -0
- arekit/contrib/utils/data/contents/opinions.py +37 -0
- arekit/contrib/utils/data/doc_provider/__init__.py +0 -0
- arekit/contrib/utils/data/doc_provider/dict_based.py +13 -0
- arekit/contrib/utils/data/doc_provider/dir_based.py +53 -0
- arekit/contrib/utils/data/readers/__init__.py +0 -0
- arekit/contrib/utils/data/readers/base.py +7 -0
- arekit/contrib/utils/data/readers/csv_pd.py +38 -0
- arekit/contrib/utils/data/readers/jsonl.py +15 -0
- arekit/contrib/utils/data/service/__init__.py +0 -0
- arekit/contrib/utils/data/service/balance.py +50 -0
- arekit/contrib/utils/data/storages/__init__.py +0 -0
- arekit/contrib/utils/data/storages/jsonl_based.py +18 -0
- arekit/contrib/utils/data/storages/pandas_based.py +123 -0
- arekit/contrib/utils/data/storages/row_cache.py +48 -0
- arekit/contrib/utils/data/writers/__init__.py +0 -0
- arekit/contrib/utils/data/writers/base.py +27 -0
- arekit/contrib/utils/data/writers/csv_native.py +63 -0
- arekit/contrib/utils/data/writers/csv_pd.py +40 -0
- arekit/contrib/utils/data/writers/json_opennre.py +132 -0
- arekit/contrib/utils/data/writers/sqlite_native.py +110 -0
- arekit/contrib/utils/download.py +77 -0
- arekit/contrib/utils/embeddings/__init__.py +0 -0
- arekit/contrib/utils/embeddings/rusvectores.py +58 -0
- arekit/contrib/utils/embeddings/tokens.py +30 -0
- arekit/contrib/utils/entities/__init__.py +0 -0
- arekit/contrib/utils/entities/filter.py +7 -0
- arekit/contrib/utils/entities/formatters/__init__.py +0 -0
- arekit/contrib/utils/entities/formatters/str_display.py +11 -0
- arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py +15 -0
- arekit/contrib/utils/io_utils/__init__.py +0 -0
- arekit/contrib/utils/io_utils/embedding.py +72 -0
- arekit/contrib/utils/io_utils/opinions.py +37 -0
- arekit/contrib/utils/io_utils/samples.py +79 -0
- arekit/contrib/utils/io_utils/utils.py +39 -0
- arekit/contrib/utils/lexicons/__init__.py +0 -0
- arekit/contrib/utils/lexicons/lexicon.py +41 -0
- arekit/contrib/utils/lexicons/relation.py +42 -0
- arekit/contrib/utils/lexicons/rusentilex.py +37 -0
- arekit/contrib/utils/nn/__init__.py +0 -0
- arekit/contrib/utils/nn/rows.py +83 -0
- arekit/contrib/utils/np_utils/__init__.py +0 -0
- arekit/contrib/utils/np_utils/embedding.py +22 -0
- arekit/contrib/utils/np_utils/npz_utils.py +13 -0
- arekit/contrib/utils/np_utils/vocab.py +20 -0
- arekit/contrib/utils/pipelines/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/sampling/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/sampling/base.py +99 -0
- arekit/contrib/utils/pipelines/items/sampling/networks.py +54 -0
- arekit/contrib/utils/pipelines/items/text/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/text/entities_default.py +23 -0
- arekit/contrib/utils/pipelines/items/text/frames.py +86 -0
- arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +36 -0
- arekit/contrib/utils/pipelines/items/text/frames_negation.py +32 -0
- arekit/contrib/utils/pipelines/items/text/terms_splitter.py +10 -0
- arekit/contrib/utils/pipelines/items/text/tokenizer.py +107 -0
- arekit/contrib/utils/pipelines/items/text/translator.py +135 -0
- arekit/contrib/utils/pipelines/opinion_collections.py +85 -0
- arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py +27 -0
- arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +65 -0
- arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +60 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py +29 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +64 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +79 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py +56 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +20 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +65 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py +21 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +107 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py +29 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +62 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +180 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +50 -0
- arekit/contrib/utils/pipelines/text_opinion/__init__.py +0 -0
- arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py +0 -0
- arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +34 -0
- arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +88 -0
- arekit/contrib/utils/pipelines/text_opinion/extraction.py +93 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py +0 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/base.py +4 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py +16 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py +29 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py +26 -0
- arekit/contrib/utils/processing/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/mods.py +12 -0
- arekit/contrib/utils/processing/languages/pos.py +23 -0
- arekit/contrib/utils/processing/languages/ru/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/ru/cases.py +78 -0
- arekit/contrib/utils/processing/languages/ru/constants.py +6 -0
- arekit/contrib/utils/processing/languages/ru/mods.py +13 -0
- arekit/contrib/utils/processing/languages/ru/number.py +23 -0
- arekit/contrib/utils/processing/languages/ru/pos_service.py +36 -0
- arekit/contrib/utils/processing/lemmatization/__init__.py +0 -0
- arekit/contrib/utils/processing/lemmatization/mystem.py +51 -0
- arekit/contrib/utils/processing/pos/__init__.py +0 -0
- arekit/contrib/utils/processing/pos/base.py +12 -0
- arekit/contrib/utils/processing/pos/mystem_wrap.py +134 -0
- arekit/contrib/utils/processing/pos/russian.py +10 -0
- arekit/contrib/utils/processing/text/__init__.py +0 -0
- arekit/contrib/utils/processing/text/tokens.py +127 -0
- arekit/contrib/utils/resources.py +25 -0
- arekit/contrib/utils/serializer.py +43 -0
- arekit/contrib/utils/sources/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +63 -0
- arekit/contrib/utils/synonyms/__init__.py +0 -0
- arekit/contrib/utils/synonyms/simple.py +15 -0
- arekit/contrib/utils/synonyms/stemmer_based.py +38 -0
- arekit/contrib/utils/vectorizers/__init__.py +0 -0
- arekit/contrib/utils/vectorizers/bpe.py +93 -0
- arekit/contrib/utils/vectorizers/random_norm.py +39 -0
- arekit/download_data.py +11 -0
- arekit-0.24.0.dist-info/LICENSE +21 -0
- arekit-0.24.0.dist-info/METADATA +23 -0
- arekit-0.24.0.dist-info/RECORD +374 -0
- arekit-0.24.0.dist-info/WHEEL +5 -0
- arekit-0.24.0.dist-info/top_level.txt +1 -0
arekit/__init__.py
ADDED
|
File without changes
|
|
File without changes
|
arekit/common/bound.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
class Bound:
|
|
2
|
+
|
|
3
|
+
def __init__(self, pos, length):
|
|
4
|
+
assert(isinstance(pos, int))
|
|
5
|
+
assert(isinstance(length, int))
|
|
6
|
+
self.__pos = pos
|
|
7
|
+
self.__length = length
|
|
8
|
+
|
|
9
|
+
# region properties
|
|
10
|
+
|
|
11
|
+
@property
|
|
12
|
+
def Position(self):
|
|
13
|
+
return self.__pos
|
|
14
|
+
|
|
15
|
+
@property
|
|
16
|
+
def Length(self):
|
|
17
|
+
return self.__length
|
|
18
|
+
|
|
19
|
+
# endregion
|
|
20
|
+
|
|
21
|
+
def itersects_with(self, other):
|
|
22
|
+
begin = self.__pos
|
|
23
|
+
end = self.__pos + self.__length
|
|
24
|
+
other_begin = other.Position
|
|
25
|
+
other_end_included = other.Position + other.Length - 1
|
|
26
|
+
if end > other_begin >= begin:
|
|
27
|
+
return True
|
|
28
|
+
if end > other_end_included >= begin:
|
|
29
|
+
return True
|
|
30
|
+
if other_begin < begin and end <= other_end_included:
|
|
31
|
+
return True
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
def intersect(self, other):
|
|
35
|
+
begin = self.__pos
|
|
36
|
+
end = self.__pos + self.__length
|
|
37
|
+
other_begin = other.Position
|
|
38
|
+
other_end = other.Position + other.Length
|
|
39
|
+
actual_begin = min(begin, other_begin)
|
|
40
|
+
actual_length = max(end, other_end) - actual_begin
|
|
41
|
+
return Bound(pos=actual_begin, length=actual_length)
|
|
42
|
+
|
|
43
|
+
def contains(self, other):
|
|
44
|
+
begin = self.__pos
|
|
45
|
+
end = self.__pos + self.__length
|
|
46
|
+
other_begin = other.Position
|
|
47
|
+
other_end = other.Position + other.Length
|
|
48
|
+
return begin <= other_begin and end >= other_end
|
|
File without changes
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
|
|
3
|
+
from arekit.common.context.token import Token
|
|
4
|
+
from arekit.common.entities.base import Entity
|
|
5
|
+
from arekit.common.frames.text_variant import TextFrameVariant
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TextTermsMapper(object):
|
|
9
|
+
|
|
10
|
+
def iter_mapped(self, terms):
|
|
11
|
+
""" Performs mapping operation of each terms in a sequence
|
|
12
|
+
"""
|
|
13
|
+
assert(isinstance(terms, Iterable))
|
|
14
|
+
|
|
15
|
+
self._before_mapping()
|
|
16
|
+
|
|
17
|
+
for i, term in enumerate(terms):
|
|
18
|
+
|
|
19
|
+
if isinstance(term, str):
|
|
20
|
+
m_term = self.map_word(i, term)
|
|
21
|
+
elif isinstance(term, Token):
|
|
22
|
+
m_term = self.map_token(i, term)
|
|
23
|
+
elif isinstance(term, TextFrameVariant):
|
|
24
|
+
m_term = self.map_text_frame_variant(i, term)
|
|
25
|
+
elif isinstance(term, Entity):
|
|
26
|
+
m_term = self.map_entity(i, term)
|
|
27
|
+
else:
|
|
28
|
+
raise Exception("Unsupported type {}".format(term))
|
|
29
|
+
|
|
30
|
+
if m_term is not None:
|
|
31
|
+
yield m_term
|
|
32
|
+
|
|
33
|
+
self._after_mapping()
|
|
34
|
+
|
|
35
|
+
def _before_mapping(self):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
def _after_mapping(self):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
def map_word(self, w_ind, word):
|
|
42
|
+
raise NotImplementedError()
|
|
43
|
+
|
|
44
|
+
def map_token(self, t_ind, token):
|
|
45
|
+
raise NotImplementedError()
|
|
46
|
+
|
|
47
|
+
def map_text_frame_variant(self, fv_ind, text_frame_variant):
|
|
48
|
+
raise NotImplementedError()
|
|
49
|
+
|
|
50
|
+
def map_entity(self, e_ind, entity):
|
|
51
|
+
raise NotImplementedError()
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
class Token:
|
|
2
|
+
"""
|
|
3
|
+
Token that stores original and resulted token values
|
|
4
|
+
i.e.: term=',', token_value='<[COMMA]>'
|
|
5
|
+
"""
|
|
6
|
+
def __init__(self, term, token_value):
|
|
7
|
+
assert(isinstance(term, str))
|
|
8
|
+
assert(isinstance(token_value, str))
|
|
9
|
+
self.__meta_value = term
|
|
10
|
+
self.__token_value = token_value
|
|
11
|
+
|
|
12
|
+
def get_meta_value(self):
|
|
13
|
+
return self.__meta_value
|
|
14
|
+
|
|
15
|
+
def get_token_value(self):
|
|
16
|
+
return self.__token_value
|
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
ID = 'id'
|
|
2
|
+
DOC_ID = 'doc_id'
|
|
3
|
+
TEXT = 'text_a'
|
|
4
|
+
LABEL_UINT = 'label_uint'
|
|
5
|
+
LABEL_STR = 'label_str'
|
|
6
|
+
|
|
7
|
+
# Global identifier of the opinion in the sampled data.
|
|
8
|
+
OPINION_ID = "opinion_id"
|
|
9
|
+
OPINION_LINKAGE_ID = "linkage_id"
|
|
10
|
+
|
|
11
|
+
# Corresponds to fields with attitude ends. (indices, INT)
|
|
12
|
+
S_IND = 's_ind'
|
|
13
|
+
T_IND = 't_ind'
|
|
14
|
+
|
|
15
|
+
# Provide sentence index.
|
|
16
|
+
SENT_IND = 'sent_ind'
|
|
17
|
+
|
|
18
|
+
# Entity parameters
|
|
19
|
+
ENTITY_VALUES = 'entity_values'
|
|
20
|
+
ENTITY_TYPES = 'entity_types'
|
|
21
|
+
ENTITIES = 'entities'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from arekit.common.data import const
|
|
2
|
+
from arekit.common.data.input.providers.columns.base import BaseColumnsProvider
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class SampleColumnsProvider(BaseColumnsProvider):
|
|
6
|
+
"""
|
|
7
|
+
[id, label, text_a] -- for train
|
|
8
|
+
[id, text_a] -- for test
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, store_labels):
|
|
12
|
+
super(SampleColumnsProvider, self).__init__()
|
|
13
|
+
self.__store_labels = store_labels
|
|
14
|
+
self.__text_column_names = None
|
|
15
|
+
|
|
16
|
+
# region properties
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def StoreLabels(self):
|
|
20
|
+
return self.__store_labels
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def TextColumnNames(self):
|
|
24
|
+
return self.__text_column_names
|
|
25
|
+
|
|
26
|
+
# endregion
|
|
27
|
+
|
|
28
|
+
def get_columns_list_with_types(self):
|
|
29
|
+
"""
|
|
30
|
+
Composing df with the following columns:
|
|
31
|
+
[id, label, type, text_a]
|
|
32
|
+
"""
|
|
33
|
+
dtypes_list = super(SampleColumnsProvider, self).get_columns_list_with_types()
|
|
34
|
+
|
|
35
|
+
dtypes_list.append((const.ID, str))
|
|
36
|
+
dtypes_list.append((const.DOC_ID, str))
|
|
37
|
+
|
|
38
|
+
# insert labels
|
|
39
|
+
if self.__store_labels:
|
|
40
|
+
dtypes_list.append((const.LABEL_UINT, 'int32'))
|
|
41
|
+
dtypes_list.append((const.LABEL_STR, str))
|
|
42
|
+
|
|
43
|
+
# insert text columns
|
|
44
|
+
for col_name in self.__text_column_names:
|
|
45
|
+
dtypes_list.append((col_name, str))
|
|
46
|
+
|
|
47
|
+
# insert indices
|
|
48
|
+
dtypes_list.append((const.S_IND, 'int32'))
|
|
49
|
+
dtypes_list.append((const.T_IND, 'int32'))
|
|
50
|
+
|
|
51
|
+
# opinion-extraction task related fields
|
|
52
|
+
dtypes_list.append((const.OPINION_ID, 'int32'))
|
|
53
|
+
dtypes_list.append((const.OPINION_LINKAGE_ID, 'int32'))
|
|
54
|
+
|
|
55
|
+
return dtypes_list
|
|
56
|
+
|
|
57
|
+
def set_text_column_names(self, text_column_names):
|
|
58
|
+
assert(isinstance(text_column_names, list))
|
|
59
|
+
self.__text_column_names = text_column_names
|
|
File without changes
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from arekit.common.linkage.base import LinkedDataWrapper
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BaseLinkedDataInstancesProvider(object):
|
|
5
|
+
|
|
6
|
+
def iter_instances(self, linked_data):
|
|
7
|
+
raise NotImplementedError()
|
|
8
|
+
|
|
9
|
+
@staticmethod
|
|
10
|
+
def provide_label(linked_data):
|
|
11
|
+
""" Implementation based on the first element of the linkage.
|
|
12
|
+
"""
|
|
13
|
+
assert(isinstance(linked_data, LinkedDataWrapper))
|
|
14
|
+
return linked_data.First.Label
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from arekit.common.data.input.providers.instances.base import BaseLinkedDataInstancesProvider
|
|
2
|
+
from arekit.common.linkage.text_opinions import TextOpinionsLinkage
|
|
3
|
+
from arekit.common.text_opinions.base import TextOpinion
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class MultipleInstancesLinkedTextOpinionsProvider(BaseLinkedDataInstancesProvider):
|
|
7
|
+
|
|
8
|
+
def __init__(self, supported_labels):
|
|
9
|
+
assert(isinstance(supported_labels, list))
|
|
10
|
+
self.__supported_labels = supported_labels
|
|
11
|
+
|
|
12
|
+
def iter_instances(self, linked_data):
|
|
13
|
+
""" Enumerate all opinions as if it would be with the different label types.
|
|
14
|
+
"""
|
|
15
|
+
for label in self.__supported_labels:
|
|
16
|
+
yield self.__modify_first_and_copy_linked_wrap(linked_data, label)
|
|
17
|
+
|
|
18
|
+
@staticmethod
|
|
19
|
+
def __modify_first_and_copy_linked_wrap(text_opinions_linkage, label):
|
|
20
|
+
assert (isinstance(text_opinions_linkage, TextOpinionsLinkage))
|
|
21
|
+
|
|
22
|
+
linkage = list(text_opinions_linkage)
|
|
23
|
+
text_opinion_copy = TextOpinion.create_copy(other=linkage[0])
|
|
24
|
+
text_opinion_copy.set_label(label=label)
|
|
25
|
+
linkage[0] = text_opinion_copy
|
|
26
|
+
|
|
27
|
+
return TextOpinionsLinkage(linked_data=linkage)
|
|
File without changes
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from arekit.common.labels.scaler.base import BaseLabelScaler
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class LabelProvider(object):
|
|
5
|
+
|
|
6
|
+
def __init__(self, label_scaler):
|
|
7
|
+
assert(isinstance(label_scaler, BaseLabelScaler))
|
|
8
|
+
self.__label_scaler = label_scaler
|
|
9
|
+
|
|
10
|
+
@property
|
|
11
|
+
def LabelScaler(self):
|
|
12
|
+
return self.__label_scaler
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def SupportedLabels(self):
|
|
16
|
+
return self.__label_scaler.ordered_suppoted_labels()
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def OutputLabelsUint(self):
|
|
20
|
+
raise NotImplementedError()
|
|
21
|
+
|
|
22
|
+
def calculate_output_uint_label(self, expected_uint_label, etalon_uint_label):
|
|
23
|
+
raise NotImplementedError()
|
|
24
|
+
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from arekit.common.data.input.providers.label.base import LabelProvider
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BinaryLabelProvider(LabelProvider):
|
|
5
|
+
|
|
6
|
+
def calculate_output_uint_label(self, expected_uint_label, etalon_uint_label):
|
|
7
|
+
return 1 if expected_uint_label == etalon_uint_label else 0
|
|
8
|
+
|
|
9
|
+
@property
|
|
10
|
+
def OutputLabelsUint(self):
|
|
11
|
+
return [0, 1]
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from arekit.common.data.input.providers.label.base import LabelProvider
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class MultipleLabelProvider(LabelProvider):
|
|
5
|
+
|
|
6
|
+
def __init__(self, label_scaler):
|
|
7
|
+
super(MultipleLabelProvider, self).__init__(label_scaler=label_scaler)
|
|
8
|
+
|
|
9
|
+
def calculate_output_uint_label(self, expected_uint_label, etalon_uint_label):
|
|
10
|
+
return expected_uint_label
|
|
11
|
+
|
|
12
|
+
@property
|
|
13
|
+
def OutputLabelsUint(self):
|
|
14
|
+
return [self.LabelScaler.label_to_uint(label) for label in self.SupportedLabels]
|
|
15
|
+
|
|
File without changes
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from collections import Counter
|
|
2
|
+
from collections.abc import Iterable
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from arekit.common.data.input.providers.contents import ContentsProvider
|
|
6
|
+
from arekit.common.linkage.base import LinkedDataWrapper
|
|
7
|
+
from arekit.common.docs.parsed.providers.entity_service import EntityServiceProvider
|
|
8
|
+
from arekit.common.linkage.meta import MetaEmptyLinkedDataWrapper
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BaseRowProvider(object):
|
|
14
|
+
""" Base provider for rows that suppose to be filled into BaseRowsStorage.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self):
|
|
18
|
+
self.__rows_counter = None
|
|
19
|
+
|
|
20
|
+
# region protected methods
|
|
21
|
+
|
|
22
|
+
# TODO. This might be also generalized.
|
|
23
|
+
# TODO. Idle-mode is also a implementation and task specific parameter, i.e. might be removed from here.
|
|
24
|
+
def _provide_rows(self, parsed_doc, entity_service, text_opinion_linkage, idle_mode):
|
|
25
|
+
raise NotImplementedError()
|
|
26
|
+
|
|
27
|
+
def _count_row(self):
|
|
28
|
+
index = self.__rows_counter["rows_iterated"]
|
|
29
|
+
self.__rows_counter["rows_iterated"] += 1
|
|
30
|
+
return index
|
|
31
|
+
|
|
32
|
+
# endregion
|
|
33
|
+
|
|
34
|
+
def __iter_rows(self, linked_data, idle_mode):
|
|
35
|
+
parsed_doc_service = linked_data.Tag
|
|
36
|
+
return self._provide_rows(parsed_doc=parsed_doc_service.ParsedDocument,
|
|
37
|
+
entity_service=parsed_doc_service.get_provider(EntityServiceProvider.NAME),
|
|
38
|
+
text_opinion_linkage=linked_data,
|
|
39
|
+
idle_mode=idle_mode)
|
|
40
|
+
|
|
41
|
+
def iter_by_rows(self, contents_provider, doc_ids_iter, idle_mode):
|
|
42
|
+
assert(isinstance(contents_provider, ContentsProvider))
|
|
43
|
+
assert(isinstance(doc_ids_iter, Iterable))
|
|
44
|
+
|
|
45
|
+
self.__rows_counter = Counter()
|
|
46
|
+
|
|
47
|
+
for linked_data in contents_provider.from_doc_ids(doc_ids=doc_ids_iter, idle_mode=idle_mode):
|
|
48
|
+
assert(isinstance(linked_data, LinkedDataWrapper))
|
|
49
|
+
|
|
50
|
+
if isinstance(linked_data, MetaEmptyLinkedDataWrapper):
|
|
51
|
+
if idle_mode:
|
|
52
|
+
# In the case of the IDLE mode we do not consider the meta-data.
|
|
53
|
+
data_it = []
|
|
54
|
+
else:
|
|
55
|
+
# Consider the actual linked data instance.
|
|
56
|
+
data_it = [linked_data]
|
|
57
|
+
else:
|
|
58
|
+
# Consider the actual rows of the related linked data.
|
|
59
|
+
data_it = self.__iter_rows(linked_data=linked_data, idle_mode=idle_mode)
|
|
60
|
+
|
|
61
|
+
for data in data_it:
|
|
62
|
+
yield linked_data.RelatedDocID, data
|
|
63
|
+
|
|
64
|
+
self.__rows_counter = None
|
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
from collections import OrderedDict
|
|
2
|
+
|
|
3
|
+
from arekit.common.data import const
|
|
4
|
+
from arekit.common.data.input.providers.instances.multiple import MultipleInstancesLinkedTextOpinionsProvider
|
|
5
|
+
from arekit.common.data.input.providers.instances.single import SingleInstanceLinkedDataProvider
|
|
6
|
+
from arekit.common.data.input.providers.label.base import LabelProvider
|
|
7
|
+
from arekit.common.data.input.providers.label.binary import BinaryLabelProvider
|
|
8
|
+
from arekit.common.data.input.providers.label.multiple import MultipleLabelProvider
|
|
9
|
+
from arekit.common.data.input.providers.rows.base import BaseRowProvider
|
|
10
|
+
from arekit.common.data.input.providers.text.single import BaseSingleTextProvider
|
|
11
|
+
from arekit.common.data.rows_fmt import create_base_column_fmt
|
|
12
|
+
from arekit.common.entities.base import Entity
|
|
13
|
+
from arekit.common.labels.base import Label
|
|
14
|
+
|
|
15
|
+
from arekit.common.linkage.text_opinions import TextOpinionsLinkage
|
|
16
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
17
|
+
from arekit.common.docs.parsed.providers.entity_service import EntityEndType, EntityServiceProvider
|
|
18
|
+
from arekit.common.docs.parsed.term_position import TermPositionTypes
|
|
19
|
+
from arekit.common.text_opinions.base import TextOpinion
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# TODO. This is actually a text-opinion related sampler.
|
|
23
|
+
# TODO. Here we may expose all the text-opinion related params.
|
|
24
|
+
# TODO. With more generalized API in base class.
|
|
25
|
+
class BaseSampleRowProvider(BaseRowProvider):
|
|
26
|
+
""" Rows provider for samples storage.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, label_provider, text_provider):
|
|
30
|
+
assert(isinstance(label_provider, LabelProvider))
|
|
31
|
+
assert(isinstance(text_provider, BaseSingleTextProvider))
|
|
32
|
+
super(BaseSampleRowProvider, self).__init__()
|
|
33
|
+
|
|
34
|
+
self._label_provider = label_provider
|
|
35
|
+
self.__text_provider = text_provider
|
|
36
|
+
self.__instances_provider = self.__create_instances_provider(label_provider)
|
|
37
|
+
self.__store_labels = None
|
|
38
|
+
self._val_fmt = create_base_column_fmt(fmt_type="writer")
|
|
39
|
+
|
|
40
|
+
# region properties
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def LabelProvider(self):
|
|
44
|
+
return self._label_provider
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def TextProvider(self):
|
|
48
|
+
return self.__text_provider
|
|
49
|
+
|
|
50
|
+
# endregion
|
|
51
|
+
|
|
52
|
+
# region protected methods
|
|
53
|
+
|
|
54
|
+
def _provide_sentence_terms(self, parsed_doc, sentence_ind, s_ind, t_ind):
|
|
55
|
+
terms_iter = parsed_doc.iter_sentence_terms(sentence_index=sentence_ind, return_id=False)
|
|
56
|
+
return list(terms_iter), s_ind, t_ind
|
|
57
|
+
|
|
58
|
+
# TODO. This is a very task-specific description, too many data provided.
|
|
59
|
+
# TODO. Switch this API to dict of params
|
|
60
|
+
def _fill_row_core(self, row, text_opinion_linkage, index_in_linked, etalon_label,
|
|
61
|
+
parsed_doc, sentence_ind, s_ind, t_ind):
|
|
62
|
+
assert(isinstance(self.__store_labels, bool))
|
|
63
|
+
|
|
64
|
+
sentence_terms, actual_s_ind, actual_t_ind = self._provide_sentence_terms(
|
|
65
|
+
parsed_doc=parsed_doc, sentence_ind=sentence_ind, s_ind=s_ind, t_ind=t_ind)
|
|
66
|
+
|
|
67
|
+
# Entity indices from the related context.
|
|
68
|
+
entities = list(filter(lambda term: isinstance(term, Entity), sentence_terms))
|
|
69
|
+
|
|
70
|
+
# Values mapping.
|
|
71
|
+
vm = {
|
|
72
|
+
const.ID: self._count_row(),
|
|
73
|
+
const.OPINION_ID: text_opinion_linkage.First.TextOpinionID,
|
|
74
|
+
const.OPINION_LINKAGE_ID: index_in_linked,
|
|
75
|
+
const.DOC_ID: text_opinion_linkage.First.DocID,
|
|
76
|
+
const.SENT_IND: sentence_ind,
|
|
77
|
+
const.ENTITY_VALUES: entities,
|
|
78
|
+
const.ENTITY_TYPES: entities,
|
|
79
|
+
const.ENTITIES: [str(i) for i, t in enumerate(sentence_terms) if isinstance(t, Entity)],
|
|
80
|
+
const.S_IND: actual_s_ind,
|
|
81
|
+
const.T_IND: actual_t_ind,
|
|
82
|
+
const.LABEL_UINT: None,
|
|
83
|
+
const.LABEL_STR: None
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
# Compose text value.
|
|
87
|
+
def __assign_value(column, value):
|
|
88
|
+
vm[column] = value
|
|
89
|
+
|
|
90
|
+
expected_label = text_opinion_linkage.get_linked_label()
|
|
91
|
+
|
|
92
|
+
self.__text_provider.add_text_in_row(
|
|
93
|
+
set_text_func=__assign_value, sentence_terms=sentence_terms,
|
|
94
|
+
s_ind=actual_s_ind, t_ind=actual_t_ind,
|
|
95
|
+
expected_label=expected_label)
|
|
96
|
+
|
|
97
|
+
if self.__store_labels:
|
|
98
|
+
l2i = self._label_provider.LabelScaler.label_to_uint
|
|
99
|
+
ui2l = self._label_provider.LabelScaler.uint_to_label
|
|
100
|
+
uint_label = self._label_provider.calculate_output_uint_label(
|
|
101
|
+
expected_uint_label=l2i(expected_label), etalon_uint_label=l2i(etalon_label))
|
|
102
|
+
vm[const.LABEL_UINT] = uint_label
|
|
103
|
+
vm[const.LABEL_STR] = type(ui2l(uint_label)).__name__
|
|
104
|
+
|
|
105
|
+
self._apply_row_data(row=row, vm=vm, val_fmt=self._val_fmt)
|
|
106
|
+
|
|
107
|
+
@staticmethod
|
|
108
|
+
def _apply_row_data(row, vm, val_fmt):
|
|
109
|
+
for k, v in vm.items():
|
|
110
|
+
if v is None:
|
|
111
|
+
continue
|
|
112
|
+
row[k] = v if k not in val_fmt else val_fmt[k](v)
|
|
113
|
+
|
|
114
|
+
def _provide_rows(self, parsed_doc, entity_service, text_opinion_linkage, idle_mode):
|
|
115
|
+
assert(isinstance(idle_mode, bool))
|
|
116
|
+
|
|
117
|
+
row_dict = OrderedDict()
|
|
118
|
+
|
|
119
|
+
for index_in_linked in range(len(text_opinion_linkage)):
|
|
120
|
+
|
|
121
|
+
rows_it = self.__provide_rows(
|
|
122
|
+
parsed_doc=parsed_doc,
|
|
123
|
+
entity_service=entity_service,
|
|
124
|
+
row_dict=row_dict,
|
|
125
|
+
text_opinion_linkage=text_opinion_linkage,
|
|
126
|
+
index_in_linked=index_in_linked,
|
|
127
|
+
idle_mode=idle_mode)
|
|
128
|
+
|
|
129
|
+
for row in rows_it:
|
|
130
|
+
yield row
|
|
131
|
+
|
|
132
|
+
# endregion
|
|
133
|
+
|
|
134
|
+
# region private methods
|
|
135
|
+
|
|
136
|
+
@staticmethod
|
|
137
|
+
def __create_instances_provider(label_provider):
|
|
138
|
+
# TODO. #473 related: these label providers are based on text opinion extraction task!
|
|
139
|
+
if isinstance(label_provider, BinaryLabelProvider):
|
|
140
|
+
return MultipleInstancesLinkedTextOpinionsProvider(label_provider.SupportedLabels)
|
|
141
|
+
if isinstance(label_provider, MultipleLabelProvider):
|
|
142
|
+
return SingleInstanceLinkedDataProvider()
|
|
143
|
+
|
|
144
|
+
def __provide_rows(self, row_dict, parsed_doc, entity_service,
|
|
145
|
+
text_opinion_linkage, index_in_linked, idle_mode):
|
|
146
|
+
"""
|
|
147
|
+
Providing Rows depending on row_id_formatter type
|
|
148
|
+
"""
|
|
149
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
150
|
+
assert(isinstance(row_dict, OrderedDict))
|
|
151
|
+
assert(isinstance(text_opinion_linkage, TextOpinionsLinkage))
|
|
152
|
+
|
|
153
|
+
etalon_label = self.__instances_provider.provide_label(text_opinion_linkage)
|
|
154
|
+
for instance in self.__instances_provider.iter_instances(text_opinion_linkage):
|
|
155
|
+
yield self.__create_row(row=row_dict,
|
|
156
|
+
row_id=0,
|
|
157
|
+
parsed_doc=parsed_doc,
|
|
158
|
+
entity_service=entity_service,
|
|
159
|
+
text_opinions_linkage=instance,
|
|
160
|
+
index_in_linked=index_in_linked,
|
|
161
|
+
# TODO. provide uint_label
|
|
162
|
+
etalon_label=etalon_label,
|
|
163
|
+
idle_mode=idle_mode)
|
|
164
|
+
|
|
165
|
+
def __create_row(self, row, row_id, parsed_doc, entity_service, text_opinions_linkage,
|
|
166
|
+
index_in_linked, etalon_label, idle_mode):
|
|
167
|
+
"""
|
|
168
|
+
Composing row in following format:
|
|
169
|
+
[id, label, type, text_a]
|
|
170
|
+
|
|
171
|
+
returns: OrderedDict
|
|
172
|
+
row with key values
|
|
173
|
+
"""
|
|
174
|
+
assert(isinstance(row, OrderedDict))
|
|
175
|
+
assert(isinstance(text_opinions_linkage, TextOpinionsLinkage))
|
|
176
|
+
assert(isinstance(index_in_linked, int))
|
|
177
|
+
assert(isinstance(etalon_label, Label))
|
|
178
|
+
assert(isinstance(idle_mode, bool))
|
|
179
|
+
|
|
180
|
+
if idle_mode:
|
|
181
|
+
return None
|
|
182
|
+
|
|
183
|
+
text_opinion = text_opinions_linkage[index_in_linked]
|
|
184
|
+
|
|
185
|
+
s_ind, t_ind = self.__get_opinion_end_indices(entity_service, text_opinion)
|
|
186
|
+
|
|
187
|
+
row.clear()
|
|
188
|
+
|
|
189
|
+
source_s_ind = entity_service.extract_entity_position(
|
|
190
|
+
text_opinion=text_opinion, end_type=EntityEndType.Source,
|
|
191
|
+
position_type=TermPositionTypes.SentenceIndex)
|
|
192
|
+
|
|
193
|
+
target_s_ind = entity_service.extract_entity_position(
|
|
194
|
+
text_opinion=text_opinion, end_type=EntityEndType.Target,
|
|
195
|
+
position_type=TermPositionTypes.SentenceIndex)
|
|
196
|
+
|
|
197
|
+
if target_s_ind != source_s_ind:
|
|
198
|
+
raise Exception("Limitation: Multi-Sentence text_opinions are not supported.")
|
|
199
|
+
|
|
200
|
+
self._fill_row_core(row=row,
|
|
201
|
+
parsed_doc=parsed_doc,
|
|
202
|
+
sentence_ind=source_s_ind,
|
|
203
|
+
text_opinion_linkage=text_opinions_linkage,
|
|
204
|
+
index_in_linked=index_in_linked,
|
|
205
|
+
etalon_label=etalon_label,
|
|
206
|
+
s_ind=s_ind,
|
|
207
|
+
t_ind=t_ind)
|
|
208
|
+
return row
|
|
209
|
+
|
|
210
|
+
@staticmethod
|
|
211
|
+
def __get_opinion_end_indices(service, text_opinion):
|
|
212
|
+
assert(isinstance(service, EntityServiceProvider))
|
|
213
|
+
assert(isinstance(text_opinion, TextOpinion))
|
|
214
|
+
|
|
215
|
+
s_ind = service.get_entity_position(text_opinion.SourceId).get_index(
|
|
216
|
+
position_type=TermPositionTypes.IndexInSentence)
|
|
217
|
+
|
|
218
|
+
t_ind = service.get_entity_position(text_opinion.TargetId).get_index(
|
|
219
|
+
position_type=TermPositionTypes.IndexInSentence)
|
|
220
|
+
|
|
221
|
+
return s_ind, t_ind
|
|
222
|
+
|
|
223
|
+
# endregion
|
|
224
|
+
|
|
225
|
+
def set_store_labels(self, store_labels):
|
|
226
|
+
assert(isinstance(store_labels, bool))
|
|
227
|
+
self.__store_labels = store_labels
|
|
File without changes
|