arekit 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arekit/__init__.py +0 -0
- arekit/common/__init__.py +0 -0
- arekit/common/bound.py +48 -0
- arekit/common/context/__init__.py +0 -0
- arekit/common/context/terms_mapper.py +51 -0
- arekit/common/context/token.py +16 -0
- arekit/common/data/__init__.py +0 -0
- arekit/common/data/const.py +21 -0
- arekit/common/data/doc_provider.py +6 -0
- arekit/common/data/input/__init__.py +0 -0
- arekit/common/data/input/providers/__init__.py +0 -0
- arekit/common/data/input/providers/columns/__init__.py +0 -0
- arekit/common/data/input/providers/columns/base.py +9 -0
- arekit/common/data/input/providers/columns/sample.py +59 -0
- arekit/common/data/input/providers/const.py +3 -0
- arekit/common/data/input/providers/contents.py +9 -0
- arekit/common/data/input/providers/instances/__init__.py +0 -0
- arekit/common/data/input/providers/instances/base.py +14 -0
- arekit/common/data/input/providers/instances/multiple.py +27 -0
- arekit/common/data/input/providers/instances/single.py +8 -0
- arekit/common/data/input/providers/label/__init__.py +0 -0
- arekit/common/data/input/providers/label/base.py +24 -0
- arekit/common/data/input/providers/label/binary.py +11 -0
- arekit/common/data/input/providers/label/multiple.py +15 -0
- arekit/common/data/input/providers/rows/__init__.py +0 -0
- arekit/common/data/input/providers/rows/base.py +64 -0
- arekit/common/data/input/providers/rows/samples.py +227 -0
- arekit/common/data/input/providers/sample/__init__.py +0 -0
- arekit/common/data/input/providers/sample/cropped.py +43 -0
- arekit/common/data/input/providers/text/__init__.py +0 -0
- arekit/common/data/input/providers/text/single.py +49 -0
- arekit/common/data/input/repositories/__init__.py +0 -0
- arekit/common/data/input/repositories/base.py +68 -0
- arekit/common/data/input/repositories/sample.py +22 -0
- arekit/common/data/input/sample.py +66 -0
- arekit/common/data/input/terms_mapper.py +88 -0
- arekit/common/data/rows_fmt.py +82 -0
- arekit/common/data/rows_parser.py +43 -0
- arekit/common/data/storages/__init__.py +0 -0
- arekit/common/data/storages/base.py +109 -0
- arekit/common/data/views/__init__.py +0 -0
- arekit/common/data/views/samples.py +26 -0
- arekit/common/docs/__init__.py +0 -0
- arekit/common/docs/base.py +30 -0
- arekit/common/docs/entities_grouping.py +16 -0
- arekit/common/docs/entity.py +18 -0
- arekit/common/docs/objects_parser.py +37 -0
- arekit/common/docs/parsed/__init__.py +0 -0
- arekit/common/docs/parsed/base.py +101 -0
- arekit/common/docs/parsed/providers/__init__.py +0 -0
- arekit/common/docs/parsed/providers/base.py +68 -0
- arekit/common/docs/parsed/providers/base_pairs.py +51 -0
- arekit/common/docs/parsed/providers/entity_service.py +175 -0
- arekit/common/docs/parsed/providers/opinion_pairs.py +20 -0
- arekit/common/docs/parsed/providers/text_opinion_pairs.py +78 -0
- arekit/common/docs/parsed/service.py +31 -0
- arekit/common/docs/parsed/term_position.py +42 -0
- arekit/common/docs/parser.py +34 -0
- arekit/common/docs/sentence.py +14 -0
- arekit/common/entities/__init__.py +0 -0
- arekit/common/entities/base.py +51 -0
- arekit/common/entities/collection.py +72 -0
- arekit/common/entities/str_fmt.py +8 -0
- arekit/common/entities/types.py +9 -0
- arekit/common/experiment/__init__.py +0 -0
- arekit/common/experiment/api/__init__.py +0 -0
- arekit/common/experiment/api/base_samples_io.py +20 -0
- arekit/common/experiment/data_type.py +17 -0
- arekit/common/frames/__init__.py +0 -0
- arekit/common/frames/connotations/__init__.py +0 -0
- arekit/common/frames/connotations/descriptor.py +17 -0
- arekit/common/frames/connotations/provider.py +4 -0
- arekit/common/frames/text_variant.py +43 -0
- arekit/common/frames/variants/__init__.py +0 -0
- arekit/common/frames/variants/base.py +21 -0
- arekit/common/frames/variants/collection.py +60 -0
- arekit/common/labels/__init__.py +0 -0
- arekit/common/labels/base.py +19 -0
- arekit/common/labels/provider/__init__.py +0 -0
- arekit/common/labels/provider/base.py +7 -0
- arekit/common/labels/provider/constant.py +14 -0
- arekit/common/labels/scaler/__init__.py +0 -0
- arekit/common/labels/scaler/base.py +85 -0
- arekit/common/labels/scaler/sentiment.py +7 -0
- arekit/common/labels/scaler/single.py +10 -0
- arekit/common/labels/str_fmt.py +55 -0
- arekit/common/linkage/__init__.py +0 -0
- arekit/common/linkage/base.py +44 -0
- arekit/common/linkage/meta.py +23 -0
- arekit/common/linkage/opinions.py +9 -0
- arekit/common/linkage/text_opinions.py +22 -0
- arekit/common/log_utils.py +29 -0
- arekit/common/model/__init__.py +0 -0
- arekit/common/model/labeling/__init__.py +0 -0
- arekit/common/model/labeling/base.py +24 -0
- arekit/common/model/labeling/modes.py +8 -0
- arekit/common/model/labeling/single.py +24 -0
- arekit/common/opinions/__init__.py +0 -0
- arekit/common/opinions/annot/__init__.py +0 -0
- arekit/common/opinions/annot/algo/__init__.py +0 -0
- arekit/common/opinions/annot/algo/base.py +4 -0
- arekit/common/opinions/annot/algo/pair_based.py +99 -0
- arekit/common/opinions/annot/algo/predefined.py +16 -0
- arekit/common/opinions/annot/algo_based.py +55 -0
- arekit/common/opinions/annot/base.py +15 -0
- arekit/common/opinions/base.py +74 -0
- arekit/common/opinions/collection.py +150 -0
- arekit/common/opinions/enums.py +6 -0
- arekit/common/opinions/provider.py +4 -0
- arekit/common/opinions/writer.py +4 -0
- arekit/common/pipeline/__init__.py +0 -0
- arekit/common/pipeline/base.py +25 -0
- arekit/common/pipeline/context.py +36 -0
- arekit/common/pipeline/conts.py +2 -0
- arekit/common/pipeline/items/__init__.py +0 -0
- arekit/common/pipeline/items/base.py +12 -0
- arekit/common/pipeline/items/flatten.py +14 -0
- arekit/common/pipeline/items/handle.py +17 -0
- arekit/common/pipeline/items/iter.py +11 -0
- arekit/common/pipeline/items/map.py +11 -0
- arekit/common/pipeline/items/map_nested.py +13 -0
- arekit/common/synonyms/__init__.py +0 -0
- arekit/common/synonyms/base.py +151 -0
- arekit/common/synonyms/grouping.py +21 -0
- arekit/common/text/__init__.py +0 -0
- arekit/common/text/enums.py +12 -0
- arekit/common/text/parsed.py +42 -0
- arekit/common/text/parser.py +12 -0
- arekit/common/text/partitioning/__init__.py +0 -0
- arekit/common/text/partitioning/base.py +4 -0
- arekit/common/text/partitioning/str.py +36 -0
- arekit/common/text/partitioning/terms.py +35 -0
- arekit/common/text/stemmer.py +16 -0
- arekit/common/text_opinions/__init__.py +0 -0
- arekit/common/text_opinions/base.py +105 -0
- arekit/common/utils.py +129 -0
- arekit/contrib/__init__.py +0 -0
- arekit/contrib/bert/__init__.py +0 -0
- arekit/contrib/bert/input/__init__.py +0 -0
- arekit/contrib/bert/input/providers/__init__.py +0 -0
- arekit/contrib/bert/input/providers/cropped_sample.py +17 -0
- arekit/contrib/bert/input/providers/text_pair.py +62 -0
- arekit/contrib/bert/terms/__init__.py +0 -0
- arekit/contrib/bert/terms/mapper.py +20 -0
- arekit/contrib/networks/__init__.py +0 -0
- arekit/contrib/networks/embedding.py +149 -0
- arekit/contrib/networks/embedding_io.py +18 -0
- arekit/contrib/networks/input/__init__.py +0 -0
- arekit/contrib/networks/input/const.py +6 -0
- arekit/contrib/networks/input/ctx_serialization.py +28 -0
- arekit/contrib/networks/input/embedding/__init__.py +0 -0
- arekit/contrib/networks/input/embedding/matrix.py +29 -0
- arekit/contrib/networks/input/embedding/offsets.py +55 -0
- arekit/contrib/networks/input/formatters/__init__.py +0 -0
- arekit/contrib/networks/input/formatters/pos_mapper.py +22 -0
- arekit/contrib/networks/input/providers/__init__.py +0 -0
- arekit/contrib/networks/input/providers/sample.py +129 -0
- arekit/contrib/networks/input/providers/term_connotation.py +23 -0
- arekit/contrib/networks/input/providers/text.py +24 -0
- arekit/contrib/networks/input/rows_parser.py +47 -0
- arekit/contrib/networks/input/term_types.py +13 -0
- arekit/contrib/networks/input/terms_mapping.py +60 -0
- arekit/contrib/networks/vectorizer.py +6 -0
- arekit/contrib/prompt/__init__.py +0 -0
- arekit/contrib/prompt/sample.py +61 -0
- arekit/contrib/source/__init__.py +0 -0
- arekit/contrib/source/brat/__init__.py +0 -0
- arekit/contrib/source/brat/annot.py +84 -0
- arekit/contrib/source/brat/doc.py +28 -0
- arekit/contrib/source/brat/entities/__init__.py +0 -0
- arekit/contrib/source/brat/entities/compound.py +13 -0
- arekit/contrib/source/brat/entities/entity.py +42 -0
- arekit/contrib/source/brat/entities/parser.py +53 -0
- arekit/contrib/source/brat/opinions/__init__.py +0 -0
- arekit/contrib/source/brat/opinions/converter.py +19 -0
- arekit/contrib/source/brat/relation.py +32 -0
- arekit/contrib/source/brat/sentence.py +69 -0
- arekit/contrib/source/brat/sentences_reader.py +128 -0
- arekit/contrib/source/download.py +41 -0
- arekit/contrib/source/nerel/__init__.py +0 -0
- arekit/contrib/source/nerel/entities.py +55 -0
- arekit/contrib/source/nerel/folding/__init__.py +0 -0
- arekit/contrib/source/nerel/folding/fixed.py +74 -0
- arekit/contrib/source/nerel/io_utils.py +62 -0
- arekit/contrib/source/nerel/labels.py +241 -0
- arekit/contrib/source/nerel/reader.py +46 -0
- arekit/contrib/source/nerel/utils.py +24 -0
- arekit/contrib/source/nerel/versions.py +12 -0
- arekit/contrib/source/nerelbio/__init__.py +0 -0
- arekit/contrib/source/nerelbio/io_utils.py +62 -0
- arekit/contrib/source/nerelbio/labels.py +265 -0
- arekit/contrib/source/nerelbio/reader.py +8 -0
- arekit/contrib/source/nerelbio/versions.py +8 -0
- arekit/contrib/source/ruattitudes/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/collection.py +36 -0
- arekit/contrib/source/ruattitudes/doc.py +51 -0
- arekit/contrib/source/ruattitudes/doc_brat.py +44 -0
- arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/entity/parser.py +7 -0
- arekit/contrib/source/ruattitudes/io_utils.py +56 -0
- arekit/contrib/source/ruattitudes/labels_fmt.py +12 -0
- arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/opinions/base.py +28 -0
- arekit/contrib/source/ruattitudes/opinions/converter.py +37 -0
- arekit/contrib/source/ruattitudes/reader.py +268 -0
- arekit/contrib/source/ruattitudes/sentence.py +73 -0
- arekit/contrib/source/ruattitudes/synonyms.py +17 -0
- arekit/contrib/source/ruattitudes/text_object.py +59 -0
- arekit/contrib/source/rusentiframes/__init__.py +0 -0
- arekit/contrib/source/rusentiframes/collection.py +157 -0
- arekit/contrib/source/rusentiframes/effect.py +24 -0
- arekit/contrib/source/rusentiframes/io_utils.py +19 -0
- arekit/contrib/source/rusentiframes/labels_fmt.py +22 -0
- arekit/contrib/source/rusentiframes/polarity.py +35 -0
- arekit/contrib/source/rusentiframes/role.py +15 -0
- arekit/contrib/source/rusentiframes/state.py +24 -0
- arekit/contrib/source/rusentiframes/types.py +42 -0
- arekit/contrib/source/rusentiframes/value.py +2 -0
- arekit/contrib/source/rusentrel/__init__.py +0 -0
- arekit/contrib/source/rusentrel/const.py +3 -0
- arekit/contrib/source/rusentrel/docs_reader.py +51 -0
- arekit/contrib/source/rusentrel/entities.py +26 -0
- arekit/contrib/source/rusentrel/io_utils.py +125 -0
- arekit/contrib/source/rusentrel/labels_fmt.py +12 -0
- arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
- arekit/contrib/source/rusentrel/opinions/collection.py +30 -0
- arekit/contrib/source/rusentrel/opinions/converter.py +40 -0
- arekit/contrib/source/rusentrel/opinions/provider.py +54 -0
- arekit/contrib/source/rusentrel/opinions/writer.py +42 -0
- arekit/contrib/source/rusentrel/synonyms.py +17 -0
- arekit/contrib/source/sentinerel/__init__.py +0 -0
- arekit/contrib/source/sentinerel/entities.py +52 -0
- arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
- arekit/contrib/source/sentinerel/folding/factory.py +31 -0
- arekit/contrib/source/sentinerel/folding/fixed.py +70 -0
- arekit/contrib/source/sentinerel/io_utils.py +87 -0
- arekit/contrib/source/sentinerel/labels.py +53 -0
- arekit/contrib/source/sentinerel/labels_scaler.py +30 -0
- arekit/contrib/source/sentinerel/reader.py +42 -0
- arekit/contrib/source/synonyms/__init__.py +0 -0
- arekit/contrib/source/synonyms/utils.py +19 -0
- arekit/contrib/source/zip_utils.py +47 -0
- arekit/contrib/utils/__init__.py +0 -0
- arekit/contrib/utils/bert/__init__.py +0 -0
- arekit/contrib/utils/bert/samplers.py +17 -0
- arekit/contrib/utils/connotations/__init__.py +0 -0
- arekit/contrib/utils/connotations/rusentiframes_sentiment.py +23 -0
- arekit/contrib/utils/data/__init__.py +0 -0
- arekit/contrib/utils/data/contents/__init__.py +0 -0
- arekit/contrib/utils/data/contents/opinions.py +37 -0
- arekit/contrib/utils/data/doc_provider/__init__.py +0 -0
- arekit/contrib/utils/data/doc_provider/dict_based.py +13 -0
- arekit/contrib/utils/data/doc_provider/dir_based.py +53 -0
- arekit/contrib/utils/data/readers/__init__.py +0 -0
- arekit/contrib/utils/data/readers/base.py +7 -0
- arekit/contrib/utils/data/readers/csv_pd.py +38 -0
- arekit/contrib/utils/data/readers/jsonl.py +15 -0
- arekit/contrib/utils/data/service/__init__.py +0 -0
- arekit/contrib/utils/data/service/balance.py +50 -0
- arekit/contrib/utils/data/storages/__init__.py +0 -0
- arekit/contrib/utils/data/storages/jsonl_based.py +18 -0
- arekit/contrib/utils/data/storages/pandas_based.py +123 -0
- arekit/contrib/utils/data/storages/row_cache.py +48 -0
- arekit/contrib/utils/data/writers/__init__.py +0 -0
- arekit/contrib/utils/data/writers/base.py +27 -0
- arekit/contrib/utils/data/writers/csv_native.py +63 -0
- arekit/contrib/utils/data/writers/csv_pd.py +40 -0
- arekit/contrib/utils/data/writers/json_opennre.py +132 -0
- arekit/contrib/utils/data/writers/sqlite_native.py +110 -0
- arekit/contrib/utils/download.py +77 -0
- arekit/contrib/utils/embeddings/__init__.py +0 -0
- arekit/contrib/utils/embeddings/rusvectores.py +58 -0
- arekit/contrib/utils/embeddings/tokens.py +30 -0
- arekit/contrib/utils/entities/__init__.py +0 -0
- arekit/contrib/utils/entities/filter.py +7 -0
- arekit/contrib/utils/entities/formatters/__init__.py +0 -0
- arekit/contrib/utils/entities/formatters/str_display.py +11 -0
- arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py +15 -0
- arekit/contrib/utils/io_utils/__init__.py +0 -0
- arekit/contrib/utils/io_utils/embedding.py +72 -0
- arekit/contrib/utils/io_utils/opinions.py +37 -0
- arekit/contrib/utils/io_utils/samples.py +79 -0
- arekit/contrib/utils/io_utils/utils.py +39 -0
- arekit/contrib/utils/lexicons/__init__.py +0 -0
- arekit/contrib/utils/lexicons/lexicon.py +41 -0
- arekit/contrib/utils/lexicons/relation.py +42 -0
- arekit/contrib/utils/lexicons/rusentilex.py +37 -0
- arekit/contrib/utils/nn/__init__.py +0 -0
- arekit/contrib/utils/nn/rows.py +83 -0
- arekit/contrib/utils/np_utils/__init__.py +0 -0
- arekit/contrib/utils/np_utils/embedding.py +22 -0
- arekit/contrib/utils/np_utils/npz_utils.py +13 -0
- arekit/contrib/utils/np_utils/vocab.py +20 -0
- arekit/contrib/utils/pipelines/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/sampling/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/sampling/base.py +99 -0
- arekit/contrib/utils/pipelines/items/sampling/networks.py +54 -0
- arekit/contrib/utils/pipelines/items/text/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/text/entities_default.py +23 -0
- arekit/contrib/utils/pipelines/items/text/frames.py +86 -0
- arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +36 -0
- arekit/contrib/utils/pipelines/items/text/frames_negation.py +32 -0
- arekit/contrib/utils/pipelines/items/text/terms_splitter.py +10 -0
- arekit/contrib/utils/pipelines/items/text/tokenizer.py +107 -0
- arekit/contrib/utils/pipelines/items/text/translator.py +135 -0
- arekit/contrib/utils/pipelines/opinion_collections.py +85 -0
- arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py +27 -0
- arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +65 -0
- arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +60 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py +29 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +64 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +79 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py +56 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +20 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +65 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py +21 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +107 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py +29 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +62 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +180 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +50 -0
- arekit/contrib/utils/pipelines/text_opinion/__init__.py +0 -0
- arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py +0 -0
- arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +34 -0
- arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +88 -0
- arekit/contrib/utils/pipelines/text_opinion/extraction.py +93 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py +0 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/base.py +4 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py +16 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py +29 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py +26 -0
- arekit/contrib/utils/processing/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/mods.py +12 -0
- arekit/contrib/utils/processing/languages/pos.py +23 -0
- arekit/contrib/utils/processing/languages/ru/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/ru/cases.py +78 -0
- arekit/contrib/utils/processing/languages/ru/constants.py +6 -0
- arekit/contrib/utils/processing/languages/ru/mods.py +13 -0
- arekit/contrib/utils/processing/languages/ru/number.py +23 -0
- arekit/contrib/utils/processing/languages/ru/pos_service.py +36 -0
- arekit/contrib/utils/processing/lemmatization/__init__.py +0 -0
- arekit/contrib/utils/processing/lemmatization/mystem.py +51 -0
- arekit/contrib/utils/processing/pos/__init__.py +0 -0
- arekit/contrib/utils/processing/pos/base.py +12 -0
- arekit/contrib/utils/processing/pos/mystem_wrap.py +134 -0
- arekit/contrib/utils/processing/pos/russian.py +10 -0
- arekit/contrib/utils/processing/text/__init__.py +0 -0
- arekit/contrib/utils/processing/text/tokens.py +127 -0
- arekit/contrib/utils/resources.py +25 -0
- arekit/contrib/utils/serializer.py +43 -0
- arekit/contrib/utils/sources/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +63 -0
- arekit/contrib/utils/synonyms/__init__.py +0 -0
- arekit/contrib/utils/synonyms/simple.py +15 -0
- arekit/contrib/utils/synonyms/stemmer_based.py +38 -0
- arekit/contrib/utils/vectorizers/__init__.py +0 -0
- arekit/contrib/utils/vectorizers/bpe.py +93 -0
- arekit/contrib/utils/vectorizers/random_norm.py +39 -0
- arekit/download_data.py +11 -0
- arekit-0.24.0.dist-info/LICENSE +21 -0
- arekit-0.24.0.dist-info/METADATA +23 -0
- arekit-0.24.0.dist-info/RECORD +374 -0
- arekit-0.24.0.dist-info/WHEEL +5 -0
- arekit-0.24.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
from arekit.common.utils import split_by_whitespaces
|
|
2
|
+
from arekit.contrib.source.ruattitudes.doc import RuAttitudesDocument
|
|
3
|
+
from arekit.contrib.source.ruattitudes.opinions.base import SentenceOpinion
|
|
4
|
+
from arekit.contrib.source.ruattitudes.sentence import RuAttitudesSentence
|
|
5
|
+
from arekit.contrib.source.ruattitudes.text_object import TextObject
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class RuAttitudesFormatReader(object):
|
|
9
|
+
|
|
10
|
+
DOC_SEP_KEY = '--------'
|
|
11
|
+
FILE_KEY = "File:"
|
|
12
|
+
OBJ_KEY = "Object:"
|
|
13
|
+
TITLE_KEY = "Title:"
|
|
14
|
+
SINDEX_KEY = "Sentence:"
|
|
15
|
+
OPINION_KEY = "Attitude:"
|
|
16
|
+
STEXT_KEY = "Text:"
|
|
17
|
+
TERMS_IN_TITLE = "TermsInTitle:"
|
|
18
|
+
TERMS_IN_TEXT = "TermsInText:"
|
|
19
|
+
FRAMEVAR_TITLE = "FrameVariant:"
|
|
20
|
+
|
|
21
|
+
AUTH_LABEL = '<AUTH>'
|
|
22
|
+
|
|
23
|
+
def __iter__(self):
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
# region private methods
|
|
27
|
+
|
|
28
|
+
@staticmethod
|
|
29
|
+
def iter_docs_inds(input_file, get_doc_index_func):
|
|
30
|
+
assert(callable(get_doc_index_func))
|
|
31
|
+
|
|
32
|
+
title = None
|
|
33
|
+
local_doc_ind = 0
|
|
34
|
+
has_sentences = False
|
|
35
|
+
|
|
36
|
+
for line in RuAttitudesFormatReader.__iter_lines(input_file):
|
|
37
|
+
|
|
38
|
+
if RuAttitudesFormatReader.__check_is_title(line):
|
|
39
|
+
# We use a placeholder, there is no need in actual value out there.
|
|
40
|
+
title = "title"
|
|
41
|
+
has_sentences = True
|
|
42
|
+
|
|
43
|
+
if RuAttitudesFormatReader.__check_is_doc_sep(line=line, title=title):
|
|
44
|
+
yield RuAttitudesFormatReader.__assign_doc_index(doc_index_func=get_doc_index_func,
|
|
45
|
+
local_index=local_doc_ind)
|
|
46
|
+
local_doc_ind += 1
|
|
47
|
+
title = None
|
|
48
|
+
|
|
49
|
+
if has_sentences:
|
|
50
|
+
yield RuAttitudesFormatReader.__assign_doc_index(doc_index_func=get_doc_index_func,
|
|
51
|
+
local_index=local_doc_ind)
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def iter_docs(input_file, get_doc_index_func):
|
|
55
|
+
assert(callable(get_doc_index_func))
|
|
56
|
+
|
|
57
|
+
reset = False
|
|
58
|
+
title = None
|
|
59
|
+
title_terms_count = None
|
|
60
|
+
text_terms_count = None
|
|
61
|
+
sentences = []
|
|
62
|
+
opinions_list = []
|
|
63
|
+
objects_list = []
|
|
64
|
+
s_index = 0
|
|
65
|
+
objects_in_prior_sentences_count = 0
|
|
66
|
+
local_doc_ind = 0
|
|
67
|
+
|
|
68
|
+
for line in RuAttitudesFormatReader.__iter_lines(input_file):
|
|
69
|
+
|
|
70
|
+
if RuAttitudesFormatReader.FILE_KEY in line:
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
if RuAttitudesFormatReader.OBJ_KEY in line:
|
|
74
|
+
object = RuAttitudesFormatReader.__parse_object(line)
|
|
75
|
+
objects_list.append(object)
|
|
76
|
+
|
|
77
|
+
if RuAttitudesFormatReader.OPINION_KEY in line:
|
|
78
|
+
sentence_opin = RuAttitudesFormatReader.__parse_sentence_opin(line)
|
|
79
|
+
opinions_list.append(sentence_opin)
|
|
80
|
+
|
|
81
|
+
if RuAttitudesFormatReader.FRAMEVAR_TITLE in line:
|
|
82
|
+
# TODO. This information is ommited now.
|
|
83
|
+
pass
|
|
84
|
+
|
|
85
|
+
if RuAttitudesFormatReader.TERMS_IN_TITLE in line:
|
|
86
|
+
title_terms_count = RuAttitudesFormatReader.__parse_terms_in_title_count(line)
|
|
87
|
+
|
|
88
|
+
if RuAttitudesFormatReader.SINDEX_KEY in line:
|
|
89
|
+
s_index = RuAttitudesFormatReader.__parse_sentence_index(line)
|
|
90
|
+
|
|
91
|
+
if RuAttitudesFormatReader.__check_is_title(line):
|
|
92
|
+
title = RuAttitudesSentence(is_title=True,
|
|
93
|
+
text=RuAttitudesFormatReader.__parse_sentence(line, True),
|
|
94
|
+
sentence_opins=opinions_list,
|
|
95
|
+
objects_list=objects_list,
|
|
96
|
+
sentence_index=-1)
|
|
97
|
+
sentences.append(title)
|
|
98
|
+
t_len = RuAttitudesFormatReader.__calculate_terms_in_line(line)
|
|
99
|
+
assert(title_terms_count == t_len or title_terms_count is None)
|
|
100
|
+
reset = True
|
|
101
|
+
|
|
102
|
+
if RuAttitudesFormatReader.STEXT_KEY in line and line.index(RuAttitudesFormatReader.STEXT_KEY) == 0:
|
|
103
|
+
sentence = RuAttitudesSentence(is_title=False,
|
|
104
|
+
text=RuAttitudesFormatReader.__parse_sentence(line, False),
|
|
105
|
+
sentence_opins=opinions_list,
|
|
106
|
+
objects_list=objects_list,
|
|
107
|
+
sentence_index=s_index)
|
|
108
|
+
sentences.append(sentence)
|
|
109
|
+
objects_in_prior_sentences_count += len(objects_list)
|
|
110
|
+
t_len = RuAttitudesFormatReader.__calculate_terms_in_line(line)
|
|
111
|
+
assert(text_terms_count == t_len or text_terms_count is None)
|
|
112
|
+
reset = True
|
|
113
|
+
|
|
114
|
+
if RuAttitudesFormatReader.__check_is_doc_sep(line=line, title=title):
|
|
115
|
+
doc_index = RuAttitudesFormatReader.__assign_doc_index(doc_index_func=get_doc_index_func,
|
|
116
|
+
local_index=local_doc_ind)
|
|
117
|
+
yield RuAttitudesDocument(sentences=sentences,
|
|
118
|
+
doc_index=doc_index)
|
|
119
|
+
local_doc_ind += 1
|
|
120
|
+
sentences = []
|
|
121
|
+
reset = True
|
|
122
|
+
|
|
123
|
+
if RuAttitudesFormatReader.TERMS_IN_TEXT in line:
|
|
124
|
+
text_terms_count = RuAttitudesFormatReader.__parse_terms_in_text_count(line)
|
|
125
|
+
|
|
126
|
+
if reset:
|
|
127
|
+
opinions_list = []
|
|
128
|
+
objects_list = []
|
|
129
|
+
title_terms_count = None
|
|
130
|
+
reset = False
|
|
131
|
+
|
|
132
|
+
if len(sentences) > 0:
|
|
133
|
+
doc_index = RuAttitudesFormatReader.__assign_doc_index(doc_index_func=get_doc_index_func,
|
|
134
|
+
local_index=local_doc_ind)
|
|
135
|
+
yield RuAttitudesDocument(sentences=sentences,
|
|
136
|
+
doc_index=doc_index)
|
|
137
|
+
sentences = []
|
|
138
|
+
|
|
139
|
+
assert(len(sentences) == 0)
|
|
140
|
+
|
|
141
|
+
@staticmethod
|
|
142
|
+
def __assign_doc_index(doc_index_func, local_index):
|
|
143
|
+
assert(callable(doc_index_func))
|
|
144
|
+
return doc_index_func(local_index)
|
|
145
|
+
|
|
146
|
+
@staticmethod
|
|
147
|
+
def __check_is_doc_sep(line, title):
|
|
148
|
+
return RuAttitudesFormatReader.DOC_SEP_KEY in line and title is not None
|
|
149
|
+
|
|
150
|
+
@staticmethod
|
|
151
|
+
def __check_is_title(line):
|
|
152
|
+
return RuAttitudesFormatReader.TITLE_KEY in line
|
|
153
|
+
|
|
154
|
+
@staticmethod
|
|
155
|
+
def __iter_lines(input_file):
|
|
156
|
+
for line in input_file.readlines():
|
|
157
|
+
yield line.decode('utf-8')
|
|
158
|
+
|
|
159
|
+
@staticmethod
|
|
160
|
+
def __calculate_terms_in_line(line):
|
|
161
|
+
assert(isinstance(line, str))
|
|
162
|
+
return len(split_by_whitespaces(line))
|
|
163
|
+
|
|
164
|
+
@staticmethod
|
|
165
|
+
def __parse_sentence(line, is_title):
|
|
166
|
+
assert(isinstance(is_title, bool))
|
|
167
|
+
|
|
168
|
+
key = RuAttitudesFormatReader.STEXT_KEY if not is_title else RuAttitudesFormatReader.TITLE_KEY
|
|
169
|
+
text = line[len(key):]
|
|
170
|
+
return text.strip()
|
|
171
|
+
|
|
172
|
+
@staticmethod
|
|
173
|
+
def __parse_sentence_opin(line):
|
|
174
|
+
line = line[len(RuAttitudesFormatReader.OPINION_KEY):]
|
|
175
|
+
|
|
176
|
+
s_from = line.index('b:(')
|
|
177
|
+
s_to = line.index(')', s_from)
|
|
178
|
+
label = int(line[s_from + 3:s_to])
|
|
179
|
+
|
|
180
|
+
o_from = line.index('oi:[')
|
|
181
|
+
o_to = line.index(']', o_from)
|
|
182
|
+
source_object_id_in_sentence, target_object_id_in_sentence = line[o_from + 4:o_to].split(',')
|
|
183
|
+
|
|
184
|
+
source_object_id_in_sentence = int(source_object_id_in_sentence)
|
|
185
|
+
target_object_id_in_sentence = int(target_object_id_in_sentence)
|
|
186
|
+
|
|
187
|
+
s_from = line.index('si:{')
|
|
188
|
+
s_to = line.index('}', s_from)
|
|
189
|
+
opninion_key = line[s_from+4:s_to]
|
|
190
|
+
|
|
191
|
+
sentence_opin = SentenceOpinion(source_id=source_object_id_in_sentence,
|
|
192
|
+
target_id=target_object_id_in_sentence,
|
|
193
|
+
label_int=label,
|
|
194
|
+
tag=opninion_key)
|
|
195
|
+
|
|
196
|
+
return sentence_opin
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def __parse_object(line):
|
|
200
|
+
assert(isinstance(line, str))
|
|
201
|
+
|
|
202
|
+
line = line[len(RuAttitudesFormatReader.OBJ_KEY):]
|
|
203
|
+
|
|
204
|
+
obj_ind_begin = line.index('oi:[', 0)
|
|
205
|
+
obj_ind_end = line.index(']', obj_ind_begin + 1)
|
|
206
|
+
|
|
207
|
+
o_begin = line.index("'", 0)
|
|
208
|
+
o_end = line.index("'", o_begin + 1)
|
|
209
|
+
|
|
210
|
+
b_from = line.index('b:(')
|
|
211
|
+
b_to = line.index(')', b_from)
|
|
212
|
+
|
|
213
|
+
id_in_sentence = int(line[obj_ind_begin + 4:obj_ind_end])
|
|
214
|
+
term_index, length = line[b_from+3:b_to].split(',')
|
|
215
|
+
value = line[o_begin + 1:o_end]
|
|
216
|
+
|
|
217
|
+
obj_type = RuAttitudesFormatReader.__try_get_type(line)
|
|
218
|
+
|
|
219
|
+
sg_from = line.index('si:{')
|
|
220
|
+
sg_to = line.index('}', sg_from)
|
|
221
|
+
group_index = int(line[sg_from+4:sg_to])
|
|
222
|
+
|
|
223
|
+
is_auth = '<AUTH>' in line
|
|
224
|
+
|
|
225
|
+
text_object = TextObject(id_in_sentence=id_in_sentence,
|
|
226
|
+
value=value,
|
|
227
|
+
obj_type=obj_type,
|
|
228
|
+
position=int(term_index),
|
|
229
|
+
terms_count=int(length),
|
|
230
|
+
syn_group_index=group_index,
|
|
231
|
+
is_auth=is_auth)
|
|
232
|
+
|
|
233
|
+
return text_object
|
|
234
|
+
|
|
235
|
+
@staticmethod
|
|
236
|
+
def __parse_terms_in_title_count(line):
|
|
237
|
+
line = line[len(RuAttitudesFormatReader.TERMS_IN_TITLE):]
|
|
238
|
+
return int(line)
|
|
239
|
+
|
|
240
|
+
@staticmethod
|
|
241
|
+
def __parse_terms_in_text_count(line):
|
|
242
|
+
line = line[len(RuAttitudesFormatReader.TERMS_IN_TEXT):]
|
|
243
|
+
return int(line)
|
|
244
|
+
|
|
245
|
+
@staticmethod
|
|
246
|
+
def __parse_sentence_index(line):
|
|
247
|
+
line = line[len(RuAttitudesFormatReader.SINDEX_KEY):]
|
|
248
|
+
return int(line)
|
|
249
|
+
|
|
250
|
+
@staticmethod
|
|
251
|
+
def __try_get_type(line):
|
|
252
|
+
|
|
253
|
+
# Tag, utilized in RuAttitudes-2.0 format.
|
|
254
|
+
template = 'type:'
|
|
255
|
+
if template in line:
|
|
256
|
+
is_auth = RuAttitudesFormatReader.AUTH_LABEL in line
|
|
257
|
+
t_from = line.index(template)
|
|
258
|
+
t_to = line.index(RuAttitudesFormatReader.AUTH_LABEL[0], t_from) if is_auth else len(line)
|
|
259
|
+
return line[t_from + len(template):t_to].strip()
|
|
260
|
+
|
|
261
|
+
# Tag, utilized in RuAttitudes-1.* format.
|
|
262
|
+
template = 't:['
|
|
263
|
+
if template in line:
|
|
264
|
+
t_from = line.index(template)
|
|
265
|
+
t_to = line.index(']', t_from)
|
|
266
|
+
return line[t_from + len(template):t_to].strip()
|
|
267
|
+
|
|
268
|
+
# endregion
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from arekit.common.docs.sentence import BaseDocumentSentence
|
|
2
|
+
from arekit.contrib.source.ruattitudes.opinions.base import SentenceOpinion
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class RuAttitudesSentence(BaseDocumentSentence):
|
|
6
|
+
|
|
7
|
+
def __init__(self, is_title, text, sentence_opins, objects_list, sentence_index):
|
|
8
|
+
assert(isinstance(is_title, bool))
|
|
9
|
+
assert(isinstance(sentence_opins, list))
|
|
10
|
+
assert(isinstance(objects_list, list))
|
|
11
|
+
assert(isinstance(sentence_index, int))
|
|
12
|
+
super(RuAttitudesSentence, self).__init__(text)
|
|
13
|
+
|
|
14
|
+
self.__is_title = is_title
|
|
15
|
+
self.__sentence_opins = sentence_opins
|
|
16
|
+
self.__objects = objects_list
|
|
17
|
+
self.__sentence_index = sentence_index
|
|
18
|
+
self.__owner = None
|
|
19
|
+
|
|
20
|
+
# region properties
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def SentenceIndex(self):
|
|
24
|
+
return self.__sentence_index
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def IsTitle(self):
|
|
28
|
+
return self.__is_title
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def Owner(self):
|
|
32
|
+
return self.__owner
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def ObjectsCount(self):
|
|
36
|
+
return len(self.__objects)
|
|
37
|
+
|
|
38
|
+
# endregion
|
|
39
|
+
|
|
40
|
+
# region public methods
|
|
41
|
+
|
|
42
|
+
def set_owner(self, owner):
|
|
43
|
+
if self.__owner is not None:
|
|
44
|
+
raise Exception("Owner is already declared")
|
|
45
|
+
self.__owner = owner
|
|
46
|
+
|
|
47
|
+
def get_objects(self, sentence_opin):
|
|
48
|
+
assert(isinstance(sentence_opin, SentenceOpinion))
|
|
49
|
+
source_obj = self.__objects[sentence_opin.SourceID]
|
|
50
|
+
target_obj = self.__objects[sentence_opin.TargetID]
|
|
51
|
+
return source_obj, target_obj
|
|
52
|
+
|
|
53
|
+
def get_doc_level_text_object_id(self, text_object_ind):
|
|
54
|
+
return text_object_ind + self.__owner.get_objects_declared_before(self.SentenceIndex)
|
|
55
|
+
|
|
56
|
+
def iter_objects(self):
|
|
57
|
+
for object in self.__objects:
|
|
58
|
+
yield object
|
|
59
|
+
|
|
60
|
+
def find_sentence_opin_by_key(self, key):
|
|
61
|
+
assert(key is not None)
|
|
62
|
+
|
|
63
|
+
for opinion in self.__sentence_opins:
|
|
64
|
+
if opinion.Tag == key:
|
|
65
|
+
return opinion
|
|
66
|
+
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
def iter_sentence_opins(self):
|
|
70
|
+
for opinion in self.__sentence_opins:
|
|
71
|
+
yield opinion
|
|
72
|
+
|
|
73
|
+
# endregion
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from arekit.contrib.source.ruattitudes.io_utils import RuAttitudesIOUtils
|
|
2
|
+
from arekit.contrib.source.synonyms.utils import iter_synonym_groups
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class RuAttitudesSynonymsCollectionHelper(object):
|
|
6
|
+
|
|
7
|
+
@staticmethod
|
|
8
|
+
def iter_groups(version):
|
|
9
|
+
it = RuAttitudesIOUtils.iter_from_zip(
|
|
10
|
+
inner_path=RuAttitudesIOUtils.get_synonyms_innerpath(),
|
|
11
|
+
process_func=lambda input_file: iter_synonym_groups(
|
|
12
|
+
input_file,
|
|
13
|
+
desc="Loading RuAttitudes SynonymsCollection"),
|
|
14
|
+
version=version)
|
|
15
|
+
|
|
16
|
+
for group in it:
|
|
17
|
+
yield group
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from arekit.common.bound import Bound
|
|
2
|
+
from arekit.contrib.source.brat.entities.entity import BratEntity
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class TextObject(object):
|
|
6
|
+
"""
|
|
7
|
+
Considering any part of text, labeled by 'position', and 'type'
|
|
8
|
+
The latter is used to emphasize the entity type.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, id_in_sentence, value, obj_type, position, terms_count, syn_group_index, is_auth):
|
|
12
|
+
assert(isinstance(id_in_sentence, int))
|
|
13
|
+
assert(isinstance(value, str))
|
|
14
|
+
assert(isinstance(position, int))
|
|
15
|
+
assert(isinstance(terms_count, int) and terms_count > 0)
|
|
16
|
+
assert(isinstance(obj_type, str) or obj_type is None)
|
|
17
|
+
assert(isinstance(syn_group_index, int))
|
|
18
|
+
assert(isinstance(is_auth, bool))
|
|
19
|
+
self.__value = value
|
|
20
|
+
self.__type = obj_type
|
|
21
|
+
self.__id_in_sentence = id_in_sentence
|
|
22
|
+
self.__syn_group_index = syn_group_index
|
|
23
|
+
self.__is_auth = is_auth
|
|
24
|
+
self.__bound = Bound(pos=position, length=terms_count)
|
|
25
|
+
|
|
26
|
+
def to_entity(self, to_doc_id_func):
|
|
27
|
+
assert(callable(to_doc_id_func))
|
|
28
|
+
return BratEntity(id_in_doc=to_doc_id_func(self.__id_in_sentence),
|
|
29
|
+
value=self.__value if len(self.__value) > 0 else '[empty]',
|
|
30
|
+
e_type=self.__type,
|
|
31
|
+
index_begin=self.__bound.Position,
|
|
32
|
+
index_end=self.__bound.Position + self.__bound.Length,
|
|
33
|
+
group_index=self.__syn_group_index,
|
|
34
|
+
# In the case of RuAttitudes collection we do not support childs.
|
|
35
|
+
childs=None)
|
|
36
|
+
|
|
37
|
+
# region properties
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def Value(self):
|
|
41
|
+
return self.__value
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def Type(self):
|
|
45
|
+
return self.__type
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def IdInSentence(self):
|
|
49
|
+
return self.__id_in_sentence
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def Bound(self):
|
|
53
|
+
return self.__bound
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def IsAuthorized(self):
|
|
57
|
+
return self.__is_auth
|
|
58
|
+
|
|
59
|
+
# endregion
|
|
File without changes
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from arekit.common.labels.str_fmt import StringLabelsFormatter
|
|
4
|
+
from arekit.contrib.source.rusentiframes.effect import FrameEffect
|
|
5
|
+
from arekit.contrib.source.rusentiframes.io_utils import RuSentiFramesIOUtils
|
|
6
|
+
from arekit.contrib.source.rusentiframes.types import RuSentiFramesVersions
|
|
7
|
+
from arekit.contrib.source.rusentiframes.labels_fmt import RuSentiFramesLabelsFormatter, \
|
|
8
|
+
RuSentiFramesEffectLabelsFormatter
|
|
9
|
+
from arekit.contrib.source.rusentiframes.polarity import RuSentiFramesFramePolarity
|
|
10
|
+
from arekit.contrib.source.rusentiframes.role import FrameRole
|
|
11
|
+
from arekit.contrib.source.rusentiframes.state import FrameState
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RuSentiFramesCollection(object):
|
|
15
|
+
|
|
16
|
+
__frames_key = "frames"
|
|
17
|
+
__polarity_key = "polarity"
|
|
18
|
+
__state_key = "state"
|
|
19
|
+
__effect_key = "effect"
|
|
20
|
+
__variants_key = "variants"
|
|
21
|
+
|
|
22
|
+
def __init__(self, data, labels_fmt, effect_labels_fmt, lowercase_variants=True):
|
|
23
|
+
""" data: dict
|
|
24
|
+
Has the following structure of the frame contents:
|
|
25
|
+
{
|
|
26
|
+
"frame_id": [ ... variants string list ... ]
|
|
27
|
+
...
|
|
28
|
+
}
|
|
29
|
+
lowercase_variants: bool
|
|
30
|
+
If 'True', forcely treat frame-variants as case-insensitive (lowercased)
|
|
31
|
+
or avoiding lowercasing operation in case of 'False'.
|
|
32
|
+
"""
|
|
33
|
+
assert(isinstance(data, dict))
|
|
34
|
+
assert(isinstance(labels_fmt, StringLabelsFormatter))
|
|
35
|
+
assert(isinstance(effect_labels_fmt, StringLabelsFormatter))
|
|
36
|
+
self.__labels_fmt = labels_fmt
|
|
37
|
+
self.__effect_labels_fmt = effect_labels_fmt
|
|
38
|
+
self.__data = data
|
|
39
|
+
|
|
40
|
+
if lowercase_variants:
|
|
41
|
+
for frame_id, frame in self.__data.items():
|
|
42
|
+
frame[self.__variants_key] = [variant.lower() for variant in frame[self.__variants_key]]
|
|
43
|
+
|
|
44
|
+
# region classmethods
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def read(cls, version, labels_fmt, effect_labels_fmt):
|
|
48
|
+
assert(isinstance(version, RuSentiFramesVersions))
|
|
49
|
+
assert(isinstance(labels_fmt, RuSentiFramesLabelsFormatter))
|
|
50
|
+
assert(isinstance(effect_labels_fmt, RuSentiFramesEffectLabelsFormatter))
|
|
51
|
+
|
|
52
|
+
return RuSentiFramesIOUtils.read_from_zip(
|
|
53
|
+
inner_path=RuSentiFramesIOUtils.get_collection_filepath(),
|
|
54
|
+
process_func=lambda input_file: cls.__from_json(
|
|
55
|
+
input_file=input_file,
|
|
56
|
+
labels_fmt=labels_fmt,
|
|
57
|
+
effect_labels_fmt=effect_labels_fmt),
|
|
58
|
+
version=version)
|
|
59
|
+
|
|
60
|
+
@classmethod
|
|
61
|
+
def __from_json(cls, input_file, labels_fmt, effect_labels_fmt):
|
|
62
|
+
data = json.load(input_file)
|
|
63
|
+
return cls(data=data,
|
|
64
|
+
labels_fmt=labels_fmt,
|
|
65
|
+
effect_labels_fmt=effect_labels_fmt)
|
|
66
|
+
|
|
67
|
+
# endregion
|
|
68
|
+
|
|
69
|
+
# region public 'try get' methods
|
|
70
|
+
|
|
71
|
+
def try_get_frame_polarity(self, frame_id, role_src, role_dest):
|
|
72
|
+
assert(isinstance(role_src, str))
|
|
73
|
+
assert(isinstance(role_dest, str))
|
|
74
|
+
|
|
75
|
+
if not self.__check_has_frame_polarity_key(frame_id):
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
for args in self.__data[frame_id][self.__frames_key][self.__polarity_key]:
|
|
79
|
+
if args[0] == role_src and args[1] == role_dest:
|
|
80
|
+
return self.__frame_polarity_from_args(args)
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
# endregion
|
|
84
|
+
|
|
85
|
+
# region public 'get' methods
|
|
86
|
+
|
|
87
|
+
def get_frame_roles(self, frame_id):
|
|
88
|
+
assert(isinstance(frame_id, str))
|
|
89
|
+
return [FrameRole(source=key, description=value)
|
|
90
|
+
for key, value in self.__data[frame_id]["roles"].items()]
|
|
91
|
+
|
|
92
|
+
def get_frame_polarities(self, frame_id):
|
|
93
|
+
assert(isinstance(frame_id, str))
|
|
94
|
+
|
|
95
|
+
if not self.__check_has_frame_polarity_key(frame_id):
|
|
96
|
+
return []
|
|
97
|
+
|
|
98
|
+
return [self.__frame_polarity_from_args(args)
|
|
99
|
+
for args in self.__data[frame_id][self.__frames_key][self.__polarity_key]]
|
|
100
|
+
|
|
101
|
+
def get_frame_states(self, frame_id):
|
|
102
|
+
assert(isinstance(frame_id, str))
|
|
103
|
+
|
|
104
|
+
if self.__state_key not in self.__data[frame_id][self.__frames_key]:
|
|
105
|
+
return []
|
|
106
|
+
|
|
107
|
+
return [FrameState(role=args[0], label=self.__labels_fmt.str_to_label(args[1]), prob=args[2])
|
|
108
|
+
for args in self.__data[frame_id][self.__frames_key][self.__state_key]]
|
|
109
|
+
|
|
110
|
+
def get_frame_titles(self, frame_id):
|
|
111
|
+
assert(isinstance(frame_id, str))
|
|
112
|
+
return self.__data[frame_id]["title"]
|
|
113
|
+
|
|
114
|
+
def get_frame_variants(self, frame_id):
|
|
115
|
+
return self.__data[frame_id][self.__variants_key]
|
|
116
|
+
|
|
117
|
+
def get_frame_values(self, frame_id):
|
|
118
|
+
assert(isinstance(frame_id, str))
|
|
119
|
+
# TODO. Not implemented yet.
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
def get_frame_effects(self, frame_id):
|
|
123
|
+
assert(isinstance(frame_id, str))
|
|
124
|
+
|
|
125
|
+
if self.__effect_key not in self.__data[frame_id][self.__frames_key]:
|
|
126
|
+
return []
|
|
127
|
+
|
|
128
|
+
return [FrameEffect(role=args[0], label=self.__effect_labels_fmt.str_to_label(args[1]), prob=args[2])
|
|
129
|
+
for args in self.__data[frame_id][self.__frames_key][self.__effect_key]]
|
|
130
|
+
|
|
131
|
+
# endregion
|
|
132
|
+
|
|
133
|
+
# region public 'iter' methods
|
|
134
|
+
|
|
135
|
+
def iter_frames_ids(self):
|
|
136
|
+
for frame_id in self.__data.keys():
|
|
137
|
+
yield frame_id
|
|
138
|
+
|
|
139
|
+
def iter_frame_id_and_variants(self):
|
|
140
|
+
for id, frame in self.__data.items():
|
|
141
|
+
for variant in frame[self.__variants_key]:
|
|
142
|
+
yield id, variant
|
|
143
|
+
|
|
144
|
+
# endregion
|
|
145
|
+
|
|
146
|
+
# region private methods
|
|
147
|
+
|
|
148
|
+
def __check_has_frame_polarity_key(self, frame_id):
|
|
149
|
+
return self.__polarity_key in self.__data[frame_id][self.__frames_key]
|
|
150
|
+
|
|
151
|
+
def __frame_polarity_from_args(self, args):
|
|
152
|
+
return RuSentiFramesFramePolarity(role_src=args[0],
|
|
153
|
+
role_dest=args[1],
|
|
154
|
+
label=self.__labels_fmt.str_to_label(args[2]),
|
|
155
|
+
prob=args[3])
|
|
156
|
+
|
|
157
|
+
# endregion
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from arekit.common.labels.base import Label
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class FrameEffect(object):
|
|
5
|
+
|
|
6
|
+
def __init__(self, role, label, prob):
|
|
7
|
+
assert(isinstance(role, str))
|
|
8
|
+
assert(isinstance(label, Label))
|
|
9
|
+
assert(isinstance(prob, float))
|
|
10
|
+
self.__role = role
|
|
11
|
+
self.__label = label
|
|
12
|
+
self.__prob = prob
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def Role(self):
|
|
16
|
+
return self.__role
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def Label(self):
|
|
20
|
+
return self.__label
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def Prob(self):
|
|
24
|
+
return self.__prob
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from os import path
|
|
2
|
+
|
|
3
|
+
from arekit.contrib.source.zip_utils import ZipArchiveUtils
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class RuSentiFramesIOUtils(ZipArchiveUtils):
|
|
7
|
+
|
|
8
|
+
# region internal methods
|
|
9
|
+
|
|
10
|
+
@staticmethod
|
|
11
|
+
def get_archive_filepath(version):
|
|
12
|
+
assert(isinstance(version, str))
|
|
13
|
+
return path.join(RuSentiFramesIOUtils.get_data_root(), "rusentiframes-{version}.zip".format(version=version))
|
|
14
|
+
|
|
15
|
+
@staticmethod
|
|
16
|
+
def get_collection_filepath():
|
|
17
|
+
return "frames.json"
|
|
18
|
+
|
|
19
|
+
# endregion
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from arekit.common.labels.base import Label
|
|
2
|
+
from arekit.common.labels.str_fmt import StringLabelsFormatter
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class RuSentiFramesLabelsFormatter(StringLabelsFormatter):
|
|
6
|
+
|
|
7
|
+
def __init__(self, pos_label_type, neg_label_type):
|
|
8
|
+
assert(issubclass(pos_label_type, Label))
|
|
9
|
+
assert(issubclass(neg_label_type, Label))
|
|
10
|
+
stol = {'neg': neg_label_type, 'pos': pos_label_type}
|
|
11
|
+
super(RuSentiFramesLabelsFormatter, self).__init__(stol=stol)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class RuSentiFramesEffectLabelsFormatter(StringLabelsFormatter):
|
|
15
|
+
""" Effect formatter utilizes '-' and '+' signs.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def __init__(self, pos_label_type, neg_label_type):
|
|
19
|
+
assert(issubclass(pos_label_type, Label))
|
|
20
|
+
assert(issubclass(neg_label_type, Label))
|
|
21
|
+
stol = {'-': neg_label_type, '+': pos_label_type}
|
|
22
|
+
super(RuSentiFramesEffectLabelsFormatter, self).__init__(stol=stol)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from arekit.common.frames.connotations.descriptor import FrameConnotationDescriptor
|
|
2
|
+
from arekit.common.labels.base import Label
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class RuSentiFramesFramePolarity(FrameConnotationDescriptor):
|
|
6
|
+
"""
|
|
7
|
+
Polarity description between source (Agent) towards dest (Theme)
|
|
8
|
+
The latter are related to roles of frame polarity.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, role_src, role_dest, label, prob):
|
|
12
|
+
assert(isinstance(role_src, str))
|
|
13
|
+
assert(isinstance(role_dest, str))
|
|
14
|
+
assert(isinstance(label, Label))
|
|
15
|
+
assert(isinstance(prob, float))
|
|
16
|
+
self.__role_src = role_src
|
|
17
|
+
self.__role_dest = role_dest
|
|
18
|
+
self.__label = label
|
|
19
|
+
self.__prob = prob
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def Source(self):
|
|
23
|
+
return self.__role_src
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def Destination(self):
|
|
27
|
+
return self.__role_dest
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def Label(self):
|
|
31
|
+
return self.__label
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def Prob(self):
|
|
35
|
+
return self.__prob
|