arekit 0.24.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arekit/__init__.py +0 -0
- arekit/common/__init__.py +0 -0
- arekit/common/bound.py +48 -0
- arekit/common/context/__init__.py +0 -0
- arekit/common/context/terms_mapper.py +51 -0
- arekit/common/context/token.py +16 -0
- arekit/common/data/__init__.py +0 -0
- arekit/common/data/const.py +21 -0
- arekit/common/data/doc_provider.py +6 -0
- arekit/common/data/input/__init__.py +0 -0
- arekit/common/data/input/providers/__init__.py +0 -0
- arekit/common/data/input/providers/columns/__init__.py +0 -0
- arekit/common/data/input/providers/columns/base.py +9 -0
- arekit/common/data/input/providers/columns/sample.py +59 -0
- arekit/common/data/input/providers/const.py +3 -0
- arekit/common/data/input/providers/contents.py +9 -0
- arekit/common/data/input/providers/instances/__init__.py +0 -0
- arekit/common/data/input/providers/instances/base.py +14 -0
- arekit/common/data/input/providers/instances/multiple.py +27 -0
- arekit/common/data/input/providers/instances/single.py +8 -0
- arekit/common/data/input/providers/label/__init__.py +0 -0
- arekit/common/data/input/providers/label/base.py +24 -0
- arekit/common/data/input/providers/label/binary.py +11 -0
- arekit/common/data/input/providers/label/multiple.py +15 -0
- arekit/common/data/input/providers/rows/__init__.py +0 -0
- arekit/common/data/input/providers/rows/base.py +64 -0
- arekit/common/data/input/providers/rows/samples.py +227 -0
- arekit/common/data/input/providers/sample/__init__.py +0 -0
- arekit/common/data/input/providers/sample/cropped.py +43 -0
- arekit/common/data/input/providers/text/__init__.py +0 -0
- arekit/common/data/input/providers/text/single.py +49 -0
- arekit/common/data/input/repositories/__init__.py +0 -0
- arekit/common/data/input/repositories/base.py +68 -0
- arekit/common/data/input/repositories/sample.py +22 -0
- arekit/common/data/input/sample.py +66 -0
- arekit/common/data/input/terms_mapper.py +88 -0
- arekit/common/data/rows_fmt.py +82 -0
- arekit/common/data/rows_parser.py +43 -0
- arekit/common/data/storages/__init__.py +0 -0
- arekit/common/data/storages/base.py +109 -0
- arekit/common/data/views/__init__.py +0 -0
- arekit/common/data/views/samples.py +26 -0
- arekit/common/docs/__init__.py +0 -0
- arekit/common/docs/base.py +30 -0
- arekit/common/docs/entities_grouping.py +16 -0
- arekit/common/docs/entity.py +18 -0
- arekit/common/docs/objects_parser.py +37 -0
- arekit/common/docs/parsed/__init__.py +0 -0
- arekit/common/docs/parsed/base.py +101 -0
- arekit/common/docs/parsed/providers/__init__.py +0 -0
- arekit/common/docs/parsed/providers/base.py +68 -0
- arekit/common/docs/parsed/providers/base_pairs.py +51 -0
- arekit/common/docs/parsed/providers/entity_service.py +175 -0
- arekit/common/docs/parsed/providers/opinion_pairs.py +20 -0
- arekit/common/docs/parsed/providers/text_opinion_pairs.py +78 -0
- arekit/common/docs/parsed/service.py +31 -0
- arekit/common/docs/parsed/term_position.py +42 -0
- arekit/common/docs/parser.py +34 -0
- arekit/common/docs/sentence.py +14 -0
- arekit/common/entities/__init__.py +0 -0
- arekit/common/entities/base.py +51 -0
- arekit/common/entities/collection.py +72 -0
- arekit/common/entities/str_fmt.py +8 -0
- arekit/common/entities/types.py +9 -0
- arekit/common/experiment/__init__.py +0 -0
- arekit/common/experiment/api/__init__.py +0 -0
- arekit/common/experiment/api/base_samples_io.py +20 -0
- arekit/common/experiment/data_type.py +17 -0
- arekit/common/frames/__init__.py +0 -0
- arekit/common/frames/connotations/__init__.py +0 -0
- arekit/common/frames/connotations/descriptor.py +17 -0
- arekit/common/frames/connotations/provider.py +4 -0
- arekit/common/frames/text_variant.py +43 -0
- arekit/common/frames/variants/__init__.py +0 -0
- arekit/common/frames/variants/base.py +21 -0
- arekit/common/frames/variants/collection.py +60 -0
- arekit/common/labels/__init__.py +0 -0
- arekit/common/labels/base.py +19 -0
- arekit/common/labels/provider/__init__.py +0 -0
- arekit/common/labels/provider/base.py +7 -0
- arekit/common/labels/provider/constant.py +14 -0
- arekit/common/labels/scaler/__init__.py +0 -0
- arekit/common/labels/scaler/base.py +85 -0
- arekit/common/labels/scaler/sentiment.py +7 -0
- arekit/common/labels/scaler/single.py +10 -0
- arekit/common/labels/str_fmt.py +55 -0
- arekit/common/linkage/__init__.py +0 -0
- arekit/common/linkage/base.py +44 -0
- arekit/common/linkage/meta.py +23 -0
- arekit/common/linkage/opinions.py +9 -0
- arekit/common/linkage/text_opinions.py +22 -0
- arekit/common/log_utils.py +29 -0
- arekit/common/model/__init__.py +0 -0
- arekit/common/model/labeling/__init__.py +0 -0
- arekit/common/model/labeling/base.py +24 -0
- arekit/common/model/labeling/modes.py +8 -0
- arekit/common/model/labeling/single.py +24 -0
- arekit/common/opinions/__init__.py +0 -0
- arekit/common/opinions/annot/__init__.py +0 -0
- arekit/common/opinions/annot/algo/__init__.py +0 -0
- arekit/common/opinions/annot/algo/base.py +4 -0
- arekit/common/opinions/annot/algo/pair_based.py +99 -0
- arekit/common/opinions/annot/algo/predefined.py +16 -0
- arekit/common/opinions/annot/algo_based.py +55 -0
- arekit/common/opinions/annot/base.py +15 -0
- arekit/common/opinions/base.py +74 -0
- arekit/common/opinions/collection.py +150 -0
- arekit/common/opinions/enums.py +6 -0
- arekit/common/opinions/provider.py +4 -0
- arekit/common/opinions/writer.py +4 -0
- arekit/common/pipeline/__init__.py +0 -0
- arekit/common/pipeline/base.py +25 -0
- arekit/common/pipeline/context.py +36 -0
- arekit/common/pipeline/conts.py +2 -0
- arekit/common/pipeline/items/__init__.py +0 -0
- arekit/common/pipeline/items/base.py +12 -0
- arekit/common/pipeline/items/flatten.py +14 -0
- arekit/common/pipeline/items/handle.py +17 -0
- arekit/common/pipeline/items/iter.py +11 -0
- arekit/common/pipeline/items/map.py +11 -0
- arekit/common/pipeline/items/map_nested.py +13 -0
- arekit/common/synonyms/__init__.py +0 -0
- arekit/common/synonyms/base.py +151 -0
- arekit/common/synonyms/grouping.py +21 -0
- arekit/common/text/__init__.py +0 -0
- arekit/common/text/enums.py +12 -0
- arekit/common/text/parsed.py +42 -0
- arekit/common/text/parser.py +12 -0
- arekit/common/text/partitioning/__init__.py +0 -0
- arekit/common/text/partitioning/base.py +4 -0
- arekit/common/text/partitioning/str.py +36 -0
- arekit/common/text/partitioning/terms.py +35 -0
- arekit/common/text/stemmer.py +16 -0
- arekit/common/text_opinions/__init__.py +0 -0
- arekit/common/text_opinions/base.py +105 -0
- arekit/common/utils.py +129 -0
- arekit/contrib/__init__.py +0 -0
- arekit/contrib/bert/__init__.py +0 -0
- arekit/contrib/bert/input/__init__.py +0 -0
- arekit/contrib/bert/input/providers/__init__.py +0 -0
- arekit/contrib/bert/input/providers/cropped_sample.py +17 -0
- arekit/contrib/bert/input/providers/text_pair.py +62 -0
- arekit/contrib/bert/terms/__init__.py +0 -0
- arekit/contrib/bert/terms/mapper.py +20 -0
- arekit/contrib/networks/__init__.py +0 -0
- arekit/contrib/networks/embedding.py +149 -0
- arekit/contrib/networks/embedding_io.py +18 -0
- arekit/contrib/networks/input/__init__.py +0 -0
- arekit/contrib/networks/input/const.py +6 -0
- arekit/contrib/networks/input/ctx_serialization.py +28 -0
- arekit/contrib/networks/input/embedding/__init__.py +0 -0
- arekit/contrib/networks/input/embedding/matrix.py +29 -0
- arekit/contrib/networks/input/embedding/offsets.py +55 -0
- arekit/contrib/networks/input/formatters/__init__.py +0 -0
- arekit/contrib/networks/input/formatters/pos_mapper.py +22 -0
- arekit/contrib/networks/input/providers/__init__.py +0 -0
- arekit/contrib/networks/input/providers/sample.py +129 -0
- arekit/contrib/networks/input/providers/term_connotation.py +23 -0
- arekit/contrib/networks/input/providers/text.py +24 -0
- arekit/contrib/networks/input/rows_parser.py +47 -0
- arekit/contrib/networks/input/term_types.py +13 -0
- arekit/contrib/networks/input/terms_mapping.py +60 -0
- arekit/contrib/networks/vectorizer.py +6 -0
- arekit/contrib/prompt/__init__.py +0 -0
- arekit/contrib/prompt/sample.py +61 -0
- arekit/contrib/source/__init__.py +0 -0
- arekit/contrib/source/brat/__init__.py +0 -0
- arekit/contrib/source/brat/annot.py +84 -0
- arekit/contrib/source/brat/doc.py +28 -0
- arekit/contrib/source/brat/entities/__init__.py +0 -0
- arekit/contrib/source/brat/entities/compound.py +13 -0
- arekit/contrib/source/brat/entities/entity.py +42 -0
- arekit/contrib/source/brat/entities/parser.py +53 -0
- arekit/contrib/source/brat/opinions/__init__.py +0 -0
- arekit/contrib/source/brat/opinions/converter.py +19 -0
- arekit/contrib/source/brat/relation.py +32 -0
- arekit/contrib/source/brat/sentence.py +69 -0
- arekit/contrib/source/brat/sentences_reader.py +128 -0
- arekit/contrib/source/download.py +41 -0
- arekit/contrib/source/nerel/__init__.py +0 -0
- arekit/contrib/source/nerel/entities.py +55 -0
- arekit/contrib/source/nerel/folding/__init__.py +0 -0
- arekit/contrib/source/nerel/folding/fixed.py +74 -0
- arekit/contrib/source/nerel/io_utils.py +62 -0
- arekit/contrib/source/nerel/labels.py +241 -0
- arekit/contrib/source/nerel/reader.py +46 -0
- arekit/contrib/source/nerel/utils.py +24 -0
- arekit/contrib/source/nerel/versions.py +12 -0
- arekit/contrib/source/nerelbio/__init__.py +0 -0
- arekit/contrib/source/nerelbio/io_utils.py +62 -0
- arekit/contrib/source/nerelbio/labels.py +265 -0
- arekit/contrib/source/nerelbio/reader.py +8 -0
- arekit/contrib/source/nerelbio/versions.py +8 -0
- arekit/contrib/source/ruattitudes/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/collection.py +36 -0
- arekit/contrib/source/ruattitudes/doc.py +51 -0
- arekit/contrib/source/ruattitudes/doc_brat.py +44 -0
- arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/entity/parser.py +7 -0
- arekit/contrib/source/ruattitudes/io_utils.py +56 -0
- arekit/contrib/source/ruattitudes/labels_fmt.py +12 -0
- arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
- arekit/contrib/source/ruattitudes/opinions/base.py +28 -0
- arekit/contrib/source/ruattitudes/opinions/converter.py +37 -0
- arekit/contrib/source/ruattitudes/reader.py +268 -0
- arekit/contrib/source/ruattitudes/sentence.py +73 -0
- arekit/contrib/source/ruattitudes/synonyms.py +17 -0
- arekit/contrib/source/ruattitudes/text_object.py +59 -0
- arekit/contrib/source/rusentiframes/__init__.py +0 -0
- arekit/contrib/source/rusentiframes/collection.py +157 -0
- arekit/contrib/source/rusentiframes/effect.py +24 -0
- arekit/contrib/source/rusentiframes/io_utils.py +19 -0
- arekit/contrib/source/rusentiframes/labels_fmt.py +22 -0
- arekit/contrib/source/rusentiframes/polarity.py +35 -0
- arekit/contrib/source/rusentiframes/role.py +15 -0
- arekit/contrib/source/rusentiframes/state.py +24 -0
- arekit/contrib/source/rusentiframes/types.py +42 -0
- arekit/contrib/source/rusentiframes/value.py +2 -0
- arekit/contrib/source/rusentrel/__init__.py +0 -0
- arekit/contrib/source/rusentrel/const.py +3 -0
- arekit/contrib/source/rusentrel/docs_reader.py +51 -0
- arekit/contrib/source/rusentrel/entities.py +26 -0
- arekit/contrib/source/rusentrel/io_utils.py +125 -0
- arekit/contrib/source/rusentrel/labels_fmt.py +12 -0
- arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
- arekit/contrib/source/rusentrel/opinions/collection.py +30 -0
- arekit/contrib/source/rusentrel/opinions/converter.py +40 -0
- arekit/contrib/source/rusentrel/opinions/provider.py +54 -0
- arekit/contrib/source/rusentrel/opinions/writer.py +42 -0
- arekit/contrib/source/rusentrel/synonyms.py +17 -0
- arekit/contrib/source/sentinerel/__init__.py +0 -0
- arekit/contrib/source/sentinerel/entities.py +52 -0
- arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
- arekit/contrib/source/sentinerel/folding/factory.py +31 -0
- arekit/contrib/source/sentinerel/folding/fixed.py +70 -0
- arekit/contrib/source/sentinerel/io_utils.py +87 -0
- arekit/contrib/source/sentinerel/labels.py +53 -0
- arekit/contrib/source/sentinerel/labels_scaler.py +30 -0
- arekit/contrib/source/sentinerel/reader.py +42 -0
- arekit/contrib/source/synonyms/__init__.py +0 -0
- arekit/contrib/source/synonyms/utils.py +19 -0
- arekit/contrib/source/zip_utils.py +47 -0
- arekit/contrib/utils/__init__.py +0 -0
- arekit/contrib/utils/bert/__init__.py +0 -0
- arekit/contrib/utils/bert/samplers.py +17 -0
- arekit/contrib/utils/connotations/__init__.py +0 -0
- arekit/contrib/utils/connotations/rusentiframes_sentiment.py +23 -0
- arekit/contrib/utils/data/__init__.py +0 -0
- arekit/contrib/utils/data/contents/__init__.py +0 -0
- arekit/contrib/utils/data/contents/opinions.py +37 -0
- arekit/contrib/utils/data/doc_provider/__init__.py +0 -0
- arekit/contrib/utils/data/doc_provider/dict_based.py +13 -0
- arekit/contrib/utils/data/doc_provider/dir_based.py +53 -0
- arekit/contrib/utils/data/readers/__init__.py +0 -0
- arekit/contrib/utils/data/readers/base.py +7 -0
- arekit/contrib/utils/data/readers/csv_pd.py +38 -0
- arekit/contrib/utils/data/readers/jsonl.py +15 -0
- arekit/contrib/utils/data/service/__init__.py +0 -0
- arekit/contrib/utils/data/service/balance.py +50 -0
- arekit/contrib/utils/data/storages/__init__.py +0 -0
- arekit/contrib/utils/data/storages/jsonl_based.py +18 -0
- arekit/contrib/utils/data/storages/pandas_based.py +123 -0
- arekit/contrib/utils/data/storages/row_cache.py +48 -0
- arekit/contrib/utils/data/writers/__init__.py +0 -0
- arekit/contrib/utils/data/writers/base.py +27 -0
- arekit/contrib/utils/data/writers/csv_native.py +63 -0
- arekit/contrib/utils/data/writers/csv_pd.py +40 -0
- arekit/contrib/utils/data/writers/json_opennre.py +132 -0
- arekit/contrib/utils/data/writers/sqlite_native.py +110 -0
- arekit/contrib/utils/download.py +77 -0
- arekit/contrib/utils/embeddings/__init__.py +0 -0
- arekit/contrib/utils/embeddings/rusvectores.py +58 -0
- arekit/contrib/utils/embeddings/tokens.py +30 -0
- arekit/contrib/utils/entities/__init__.py +0 -0
- arekit/contrib/utils/entities/filter.py +7 -0
- arekit/contrib/utils/entities/formatters/__init__.py +0 -0
- arekit/contrib/utils/entities/formatters/str_display.py +11 -0
- arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py +15 -0
- arekit/contrib/utils/io_utils/__init__.py +0 -0
- arekit/contrib/utils/io_utils/embedding.py +72 -0
- arekit/contrib/utils/io_utils/opinions.py +37 -0
- arekit/contrib/utils/io_utils/samples.py +79 -0
- arekit/contrib/utils/io_utils/utils.py +39 -0
- arekit/contrib/utils/lexicons/__init__.py +0 -0
- arekit/contrib/utils/lexicons/lexicon.py +41 -0
- arekit/contrib/utils/lexicons/relation.py +42 -0
- arekit/contrib/utils/lexicons/rusentilex.py +37 -0
- arekit/contrib/utils/nn/__init__.py +0 -0
- arekit/contrib/utils/nn/rows.py +83 -0
- arekit/contrib/utils/np_utils/__init__.py +0 -0
- arekit/contrib/utils/np_utils/embedding.py +22 -0
- arekit/contrib/utils/np_utils/npz_utils.py +13 -0
- arekit/contrib/utils/np_utils/vocab.py +20 -0
- arekit/contrib/utils/pipelines/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/sampling/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/sampling/base.py +99 -0
- arekit/contrib/utils/pipelines/items/sampling/networks.py +54 -0
- arekit/contrib/utils/pipelines/items/text/__init__.py +0 -0
- arekit/contrib/utils/pipelines/items/text/entities_default.py +23 -0
- arekit/contrib/utils/pipelines/items/text/frames.py +86 -0
- arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +36 -0
- arekit/contrib/utils/pipelines/items/text/frames_negation.py +32 -0
- arekit/contrib/utils/pipelines/items/text/terms_splitter.py +10 -0
- arekit/contrib/utils/pipelines/items/text/tokenizer.py +107 -0
- arekit/contrib/utils/pipelines/items/text/translator.py +135 -0
- arekit/contrib/utils/pipelines/opinion_collections.py +85 -0
- arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py +27 -0
- arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +65 -0
- arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +60 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py +29 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +64 -0
- arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +79 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py +56 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +20 -0
- arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +65 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py +21 -0
- arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +107 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py +29 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +62 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +180 -0
- arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +50 -0
- arekit/contrib/utils/pipelines/text_opinion/__init__.py +0 -0
- arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py +0 -0
- arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +34 -0
- arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +88 -0
- arekit/contrib/utils/pipelines/text_opinion/extraction.py +93 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py +0 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/base.py +4 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py +16 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py +29 -0
- arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py +26 -0
- arekit/contrib/utils/processing/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/mods.py +12 -0
- arekit/contrib/utils/processing/languages/pos.py +23 -0
- arekit/contrib/utils/processing/languages/ru/__init__.py +0 -0
- arekit/contrib/utils/processing/languages/ru/cases.py +78 -0
- arekit/contrib/utils/processing/languages/ru/constants.py +6 -0
- arekit/contrib/utils/processing/languages/ru/mods.py +13 -0
- arekit/contrib/utils/processing/languages/ru/number.py +23 -0
- arekit/contrib/utils/processing/languages/ru/pos_service.py +36 -0
- arekit/contrib/utils/processing/lemmatization/__init__.py +0 -0
- arekit/contrib/utils/processing/lemmatization/mystem.py +51 -0
- arekit/contrib/utils/processing/pos/__init__.py +0 -0
- arekit/contrib/utils/processing/pos/base.py +12 -0
- arekit/contrib/utils/processing/pos/mystem_wrap.py +134 -0
- arekit/contrib/utils/processing/pos/russian.py +10 -0
- arekit/contrib/utils/processing/text/__init__.py +0 -0
- arekit/contrib/utils/processing/text/tokens.py +127 -0
- arekit/contrib/utils/resources.py +25 -0
- arekit/contrib/utils/serializer.py +43 -0
- arekit/contrib/utils/sources/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
- arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +63 -0
- arekit/contrib/utils/synonyms/__init__.py +0 -0
- arekit/contrib/utils/synonyms/simple.py +15 -0
- arekit/contrib/utils/synonyms/stemmer_based.py +38 -0
- arekit/contrib/utils/vectorizers/__init__.py +0 -0
- arekit/contrib/utils/vectorizers/bpe.py +93 -0
- arekit/contrib/utils/vectorizers/random_norm.py +39 -0
- arekit/download_data.py +11 -0
- arekit-0.24.0.dist-info/LICENSE +21 -0
- arekit-0.24.0.dist-info/METADATA +23 -0
- arekit-0.24.0.dist-info/RECORD +374 -0
- arekit-0.24.0.dist-info/WHEEL +5 -0
- arekit-0.24.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
logger = logging.getLogger(__name__)
|
|
4
|
+
logging.basicConfig(level=logging.DEBUG)
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def log_synonym_existed(value):
|
|
8
|
+
logger.info("Collection already has a value '{}'. Skipped".format(value.encode('utf-8')))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def log_synonym_for_entity_does_not_exist(entity_value, end_type, raise_exception):
|
|
12
|
+
message = "'{s}' for end {e} does not exist in read-only SynonymsCollection".format(
|
|
13
|
+
s=entity_value,
|
|
14
|
+
e=end_type)
|
|
15
|
+
|
|
16
|
+
if raise_exception:
|
|
17
|
+
raise Exception(message)
|
|
18
|
+
else:
|
|
19
|
+
logger.info(message)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def log_opinion_already_exist(opinion, raise_exception, display_log):
|
|
23
|
+
message = "'{s}->{t}' already exists in collection".format(s=opinion.SourceValue,
|
|
24
|
+
t=opinion.TargetValue).encode('utf-8')
|
|
25
|
+
|
|
26
|
+
if raise_exception:
|
|
27
|
+
raise Exception(message)
|
|
28
|
+
elif display_log:
|
|
29
|
+
logger.info(message + ' [REJECTED]')
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from arekit.common.labels.scaler.base import BaseLabelScaler
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class LabelsHelper(object):
|
|
5
|
+
|
|
6
|
+
def __init__(self, label_scaler):
|
|
7
|
+
assert(isinstance(label_scaler, BaseLabelScaler))
|
|
8
|
+
self._label_scaler = label_scaler
|
|
9
|
+
|
|
10
|
+
def label_from_uint(self, value):
|
|
11
|
+
return self._label_scaler.uint_to_label(value=value)
|
|
12
|
+
|
|
13
|
+
def label_to_uint(self, label):
|
|
14
|
+
return self._label_scaler.label_to_uint(label=label)
|
|
15
|
+
|
|
16
|
+
def get_classes_count(self):
|
|
17
|
+
return len(self._label_scaler.ordered_suppoted_labels())
|
|
18
|
+
|
|
19
|
+
def aggregate_labels(self, labels_list, label_calc_mode):
|
|
20
|
+
raise NotImplementedError()
|
|
21
|
+
|
|
22
|
+
@staticmethod
|
|
23
|
+
def compose_opinion(text_opinion, label):
|
|
24
|
+
raise NotImplementedError()
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from arekit.common.model.labeling.base import LabelsHelper
|
|
4
|
+
from arekit.common.model.labeling.modes import LabelCalculationMode
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SingleLabelsHelper(LabelsHelper):
|
|
8
|
+
|
|
9
|
+
def aggregate_labels(self, labels_list, label_calc_mode):
|
|
10
|
+
assert(isinstance(labels_list, list))
|
|
11
|
+
assert(isinstance(label_calc_mode, LabelCalculationMode))
|
|
12
|
+
|
|
13
|
+
label = None
|
|
14
|
+
|
|
15
|
+
if label_calc_mode == LabelCalculationMode.FIRST_APPEARED:
|
|
16
|
+
label = labels_list[0]
|
|
17
|
+
|
|
18
|
+
if label_calc_mode == LabelCalculationMode.AVERAGE:
|
|
19
|
+
int_labels = [self._label_scaler.label_to_int(label)
|
|
20
|
+
for label in labels_list]
|
|
21
|
+
label = self._label_scaler.int_to_label(int(np.sign(sum(int_labels))))
|
|
22
|
+
|
|
23
|
+
return label
|
|
24
|
+
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from arekit.common.entities.types import OpinionEntityType
|
|
2
|
+
from arekit.common.labels.provider.base import BasePairLabelProvider
|
|
3
|
+
from arekit.common.docs.entity import DocumentEntity
|
|
4
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
5
|
+
from arekit.common.docs.parsed.providers.entity_service import EntityServiceProvider, DistanceType
|
|
6
|
+
from arekit.common.docs.parsed.providers.opinion_pairs import OpinionPairsProvider
|
|
7
|
+
from arekit.common.opinions.annot.algo.base import BaseOpinionAnnotationAlgorithm
|
|
8
|
+
from arekit.common.opinions.base import Opinion
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
|
|
12
|
+
""" Is a pair-based annotation algorithm which assumes to compose source-target entity pairs
|
|
13
|
+
This is a default annotator which found its application in Sentiment Attitude Extraction task [1].
|
|
14
|
+
|
|
15
|
+
References:
|
|
16
|
+
[1] Extracting Sentiment Attitudes from Analytical Texts https://arxiv.org/pdf/1808.08932.pdf
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, dist_in_terms_bound, label_provider, entity_index_func, dist_in_sents=0,
|
|
20
|
+
is_entity_ignored_func=None):
|
|
21
|
+
"""
|
|
22
|
+
dist_in_terms_bound: int
|
|
23
|
+
max allowed distance in term (less than passed value)
|
|
24
|
+
is_entity_ignored_func: func
|
|
25
|
+
entity, type -> bool
|
|
26
|
+
"""
|
|
27
|
+
assert(isinstance(dist_in_terms_bound, int) or dist_in_terms_bound is None)
|
|
28
|
+
assert(isinstance(label_provider, BasePairLabelProvider))
|
|
29
|
+
assert(callable(entity_index_func))
|
|
30
|
+
assert(isinstance(dist_in_sents, int))
|
|
31
|
+
assert(callable(is_entity_ignored_func) or is_entity_ignored_func is None)
|
|
32
|
+
|
|
33
|
+
self.__label_provider = label_provider
|
|
34
|
+
self.__dist_in_terms_bound = dist_in_terms_bound
|
|
35
|
+
self.__dist_in_sents = dist_in_sents
|
|
36
|
+
self.__is_entity_ignored_func = is_entity_ignored_func
|
|
37
|
+
self.__entity_index_func = entity_index_func
|
|
38
|
+
|
|
39
|
+
# region private methods
|
|
40
|
+
|
|
41
|
+
@staticmethod
|
|
42
|
+
def __create_key_by_entity_pair(e1, e2):
|
|
43
|
+
assert(isinstance(e1, DocumentEntity))
|
|
44
|
+
assert(isinstance(e2, DocumentEntity))
|
|
45
|
+
return "{}_{}".format(e1.IdInDocument, e2.IdInDocument)
|
|
46
|
+
|
|
47
|
+
def __try_create_pair_key(self, entity_service, e1, e2, existed_opinions):
|
|
48
|
+
assert(isinstance(entity_service, EntityServiceProvider))
|
|
49
|
+
assert(isinstance(e1, DocumentEntity))
|
|
50
|
+
assert(isinstance(e2, DocumentEntity))
|
|
51
|
+
|
|
52
|
+
if e1.IdInDocument == e2.IdInDocument:
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
if self.__is_entity_ignored_func is not None:
|
|
56
|
+
if self.__is_entity_ignored_func(e1, OpinionEntityType.Subject):
|
|
57
|
+
return
|
|
58
|
+
if self.__is_entity_ignored_func(e2, OpinionEntityType.Object):
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
s_dist = entity_service.calc_dist_between_entities(e1=e1, e2=e2, distance_type=DistanceType.InSentences)
|
|
62
|
+
|
|
63
|
+
if s_dist > self.__dist_in_sents:
|
|
64
|
+
return
|
|
65
|
+
|
|
66
|
+
t_dist = entity_service.calc_dist_between_entities(e1=e1, e2=e2, distance_type=DistanceType.InTerms)
|
|
67
|
+
|
|
68
|
+
if self.__dist_in_terms_bound is not None and t_dist > self.__dist_in_terms_bound:
|
|
69
|
+
return
|
|
70
|
+
|
|
71
|
+
if existed_opinions is not None:
|
|
72
|
+
o = Opinion(source_value=e1.Value,
|
|
73
|
+
target_value=e2.Value,
|
|
74
|
+
label=self.__label_provider.provide(source=e1, target=e2))
|
|
75
|
+
if existed_opinions.has_synonymous_opinion(opinion=o):
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
return self.__create_key_by_entity_pair(e1=e1, e2=e2)
|
|
79
|
+
|
|
80
|
+
# endregion
|
|
81
|
+
|
|
82
|
+
def iter_opinions(self, parsed_doc, existed_opinions=None):
|
|
83
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
84
|
+
|
|
85
|
+
def __filter_pair_func(e1, e2):
|
|
86
|
+
key = self.__try_create_pair_key(entity_service=entity_service_provider,
|
|
87
|
+
e1=e1, e2=e2,
|
|
88
|
+
existed_opinions=existed_opinions)
|
|
89
|
+
|
|
90
|
+
return key is not None
|
|
91
|
+
|
|
92
|
+
# Initialize providers.
|
|
93
|
+
opinions_provider = OpinionPairsProvider(entity_index_func=self.__entity_index_func)
|
|
94
|
+
entity_service_provider = EntityServiceProvider(entity_index_func=self.__entity_index_func)
|
|
95
|
+
opinions_provider.init_parsed_doc(parsed_doc)
|
|
96
|
+
entity_service_provider.init_parsed_doc(parsed_doc)
|
|
97
|
+
|
|
98
|
+
return opinions_provider.iter_from_all(label_provider=self.__label_provider,
|
|
99
|
+
filter_func=__filter_pair_func)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
2
|
+
from arekit.common.opinions.annot.algo.base import BaseOpinionAnnotationAlgorithm
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class PredefinedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
|
|
6
|
+
""" A placeholder of the algorithm which is consider to return
|
|
7
|
+
a predefined list of opinions, provided by a given document_id.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def __init__(self, get_opinions_by_doc_id_func):
|
|
11
|
+
assert(callable(get_opinions_by_doc_id_func))
|
|
12
|
+
self.__get_opinions_by_doc_id_func = get_opinions_by_doc_id_func
|
|
13
|
+
|
|
14
|
+
def iter_opinions(self, parsed_doc, existed_opinions=None):
|
|
15
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
16
|
+
return self.__get_opinions_by_doc_id_func(parsed_doc.RelatedDocID)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from arekit.common.docs.parsed.base import ParsedDocument
|
|
4
|
+
from arekit.common.opinions.annot.algo.base import BaseOpinionAnnotationAlgorithm
|
|
5
|
+
from arekit.common.opinions.annot.base import BaseOpinionAnnotator
|
|
6
|
+
from arekit.common.opinions.collection import OpinionCollection
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
logging.basicConfig(level=logging.INFO)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class AlgorithmBasedOpinionAnnotator(BaseOpinionAnnotator):
|
|
13
|
+
""" Algorithm-based annotator
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(self, annot_algo, create_empty_collection_func, get_doc_existed_opinions_func=None):
|
|
17
|
+
""" create_empty_collection_func: func
|
|
18
|
+
function that creates an empty OpinionCollection
|
|
19
|
+
get_doc_existed_opinions_func: func or None
|
|
20
|
+
function that provides existed opinions for a document;
|
|
21
|
+
if None, then we consider an absence of the existed document-level opinions.
|
|
22
|
+
"""
|
|
23
|
+
assert(isinstance(annot_algo, BaseOpinionAnnotationAlgorithm))
|
|
24
|
+
assert(callable(get_doc_existed_opinions_func) or get_doc_existed_opinions_func is None)
|
|
25
|
+
super(AlgorithmBasedOpinionAnnotator, self).__init__()
|
|
26
|
+
|
|
27
|
+
self.__annot_algo = annot_algo
|
|
28
|
+
self.__create_empty_collection_func = create_empty_collection_func
|
|
29
|
+
self.__get_existed_opinions_func = (lambda _: None) \
|
|
30
|
+
if get_doc_existed_opinions_func is None else get_doc_existed_opinions_func
|
|
31
|
+
|
|
32
|
+
# region private methods
|
|
33
|
+
|
|
34
|
+
def _annot_collection_core(self, parsed_doc):
|
|
35
|
+
assert(isinstance(parsed_doc, ParsedDocument))
|
|
36
|
+
|
|
37
|
+
opinions = self.__get_existed_opinions_func(parsed_doc.RelatedDocID)
|
|
38
|
+
assert(isinstance(opinions, OpinionCollection) or opinions is None)
|
|
39
|
+
|
|
40
|
+
annotated_opinions_it = self.__annot_algo.iter_opinions(
|
|
41
|
+
parsed_doc=parsed_doc, existed_opinions=opinions)
|
|
42
|
+
|
|
43
|
+
collection = self.__create_empty_collection_func()
|
|
44
|
+
assert(isinstance(collection, OpinionCollection))
|
|
45
|
+
|
|
46
|
+
# Filling. Keep all the opinions without duplications.
|
|
47
|
+
for opinion in annotated_opinions_it:
|
|
48
|
+
if collection.has_synonymous_opinion(opinion):
|
|
49
|
+
continue
|
|
50
|
+
collection.add_opinion(opinion)
|
|
51
|
+
|
|
52
|
+
return collection
|
|
53
|
+
|
|
54
|
+
# endregion
|
|
55
|
+
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
class BaseOpinionAnnotator(object):
|
|
2
|
+
"""
|
|
3
|
+
Performs annotation for a particular data_type
|
|
4
|
+
using OpinOps and DocOps API.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
def _annot_collection_core(self, parsed_doc):
|
|
8
|
+
raise NotImplementedError
|
|
9
|
+
|
|
10
|
+
# region public methods
|
|
11
|
+
|
|
12
|
+
def annotate_collection(self, parsed_doc):
|
|
13
|
+
return self._annot_collection_core(parsed_doc=parsed_doc)
|
|
14
|
+
|
|
15
|
+
# endregion
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
from arekit.common.labels.base import Label
|
|
2
|
+
from arekit.common.opinions.enums import OpinionEndTypes
|
|
3
|
+
from arekit.common.synonyms.base import SynonymsCollection
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Opinion(object):
|
|
7
|
+
""" Source opinion description
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
def __init__(self, source_value, target_value, label):
|
|
11
|
+
assert(isinstance(source_value, str))
|
|
12
|
+
assert(isinstance(target_value, str))
|
|
13
|
+
assert(isinstance(label, Label))
|
|
14
|
+
self.__source_value = source_value
|
|
15
|
+
self.__target_value = target_value
|
|
16
|
+
self.__label = label
|
|
17
|
+
self.__tag = None
|
|
18
|
+
|
|
19
|
+
# region properties
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def SourceValue(self):
|
|
23
|
+
return self.__source_value
|
|
24
|
+
|
|
25
|
+
@property
|
|
26
|
+
def TargetValue(self):
|
|
27
|
+
return self.__target_value
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
def Label(self):
|
|
31
|
+
return self.__label
|
|
32
|
+
|
|
33
|
+
@property
|
|
34
|
+
def Tag(self):
|
|
35
|
+
return self.__tag
|
|
36
|
+
|
|
37
|
+
# endregion
|
|
38
|
+
|
|
39
|
+
def __get_end_synonym_inds(self, synonyms):
|
|
40
|
+
s_ind = synonyms.get_synonym_group_index(self.__source_value)
|
|
41
|
+
t_ind = synonyms.get_synonym_group_index(self.__target_value)
|
|
42
|
+
return s_ind, t_ind
|
|
43
|
+
|
|
44
|
+
# region public methods
|
|
45
|
+
|
|
46
|
+
def get_value(self, end_type):
|
|
47
|
+
assert(isinstance(end_type, OpinionEndTypes))
|
|
48
|
+
|
|
49
|
+
if end_type == OpinionEndTypes.Source:
|
|
50
|
+
return self.SourceValue
|
|
51
|
+
|
|
52
|
+
if end_type == OpinionEndTypes.Target:
|
|
53
|
+
return self.TargetValue
|
|
54
|
+
|
|
55
|
+
raise Exception("Unknown end_type='{e_type}'".format(e_type=end_type))
|
|
56
|
+
|
|
57
|
+
def set_tag(self, value):
|
|
58
|
+
self.__tag = value
|
|
59
|
+
|
|
60
|
+
def is_loop(self, synonyms):
|
|
61
|
+
s_ind, t_ind = self.__get_end_synonym_inds(synonyms)
|
|
62
|
+
return s_ind == t_ind
|
|
63
|
+
|
|
64
|
+
def create_synonym_id(self, synonyms):
|
|
65
|
+
assert(isinstance(synonyms, SynonymsCollection))
|
|
66
|
+
s_ind, t_ind = self.__get_end_synonym_inds(synonyms)
|
|
67
|
+
return "{}_{}".format(s_ind, t_ind)
|
|
68
|
+
|
|
69
|
+
def has_synonym_for_end(self, synonyms, end_type):
|
|
70
|
+
assert(isinstance(synonyms, SynonymsCollection))
|
|
71
|
+
assert(isinstance(end_type, OpinionEndTypes))
|
|
72
|
+
return synonyms.contains_synonym_value(self.get_value(end_type))
|
|
73
|
+
|
|
74
|
+
# endregion
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
|
|
3
|
+
from arekit.common import log_utils
|
|
4
|
+
from arekit.common.labels.base import Label
|
|
5
|
+
from arekit.common.opinions.base import Opinion
|
|
6
|
+
from arekit.common.opinions.enums import OpinionEndTypes
|
|
7
|
+
from arekit.common.synonyms.base import SynonymsCollection
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class OpinionCollection(object):
|
|
11
|
+
"""
|
|
12
|
+
Document-level Collection of labeled opinions
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, synonyms,
|
|
16
|
+
opinions=None,
|
|
17
|
+
error_on_duplicates=True,
|
|
18
|
+
error_on_synonym_end_missed=True):
|
|
19
|
+
"""
|
|
20
|
+
opinions: list
|
|
21
|
+
list of opinions
|
|
22
|
+
synonyms: SynonymsCollection
|
|
23
|
+
raise_exception_on_duplicates: bool
|
|
24
|
+
denotes whether there is a need to fire exception for duplicates in opinions list.
|
|
25
|
+
"""
|
|
26
|
+
assert(isinstance(opinions, Iterable) or isinstance(opinions, type(None)))
|
|
27
|
+
assert(isinstance(synonyms, SynonymsCollection))
|
|
28
|
+
assert(isinstance(error_on_duplicates, bool))
|
|
29
|
+
assert(isinstance(error_on_synonym_end_missed, bool))
|
|
30
|
+
|
|
31
|
+
self.__by_synonyms = {}
|
|
32
|
+
self.__ordered_opinion_keys = []
|
|
33
|
+
self.__synonyms = synonyms
|
|
34
|
+
self.__error_on_duplicates = error_on_duplicates
|
|
35
|
+
self.__error_on_synonym_end_missed = error_on_synonym_end_missed
|
|
36
|
+
|
|
37
|
+
if opinions is None:
|
|
38
|
+
return
|
|
39
|
+
|
|
40
|
+
for opinion in opinions:
|
|
41
|
+
self.__register_opinion(
|
|
42
|
+
opinion=opinion,
|
|
43
|
+
error_on_existence=error_on_duplicates,
|
|
44
|
+
error_on_synonym_end_missed=error_on_synonym_end_missed)
|
|
45
|
+
|
|
46
|
+
# region public methods
|
|
47
|
+
|
|
48
|
+
def try_get_synonyms_opinion(self, opinion, label=None):
|
|
49
|
+
return self.__try_get_synonyms_opinion(opinion=opinion, label=label)
|
|
50
|
+
|
|
51
|
+
def has_synonymous_opinion(self, opinion, label=None):
|
|
52
|
+
return self.__try_get_synonyms_opinion(opinion=opinion, label=label) is not None
|
|
53
|
+
|
|
54
|
+
def add_opinion(self, opinion):
|
|
55
|
+
assert(isinstance(opinion, Opinion))
|
|
56
|
+
self.__register_opinion(opinion=opinion,
|
|
57
|
+
error_on_existence=True,
|
|
58
|
+
error_on_synonym_end_missed=True)
|
|
59
|
+
|
|
60
|
+
# endregion
|
|
61
|
+
|
|
62
|
+
# region private methods
|
|
63
|
+
|
|
64
|
+
def __try_get_synonyms_opinion(self, opinion, label=None):
|
|
65
|
+
assert(isinstance(opinion, Opinion))
|
|
66
|
+
assert(label is None or isinstance(label, Label))
|
|
67
|
+
|
|
68
|
+
for end_type in OpinionEndTypes:
|
|
69
|
+
if not opinion.has_synonym_for_end(synonyms=self.__synonyms, end_type=end_type):
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
s_id = opinion.create_synonym_id(self.__synonyms)
|
|
73
|
+
if s_id not in self.__by_synonyms:
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
f_o = self.__by_synonyms[s_id]
|
|
77
|
+
if label is None:
|
|
78
|
+
return f_o
|
|
79
|
+
elif f_o.Label == label:
|
|
80
|
+
return f_o
|
|
81
|
+
else:
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
def __add_synonym(self, value):
|
|
85
|
+
self.__synonyms.add_synonym_value(value)
|
|
86
|
+
|
|
87
|
+
def __register_opinion(self, opinion,
|
|
88
|
+
error_on_existence,
|
|
89
|
+
error_on_synonym_end_missed,
|
|
90
|
+
show_duplications=False):
|
|
91
|
+
assert(isinstance(error_on_existence, bool))
|
|
92
|
+
assert(isinstance(error_on_synonym_end_missed, bool))
|
|
93
|
+
|
|
94
|
+
for end_type in OpinionEndTypes:
|
|
95
|
+
value = opinion.get_value(end_type)
|
|
96
|
+
if opinion.has_synonym_for_end(synonyms=self.__synonyms, end_type=end_type):
|
|
97
|
+
# OK.
|
|
98
|
+
continue
|
|
99
|
+
if not self.__synonyms.IsReadOnly:
|
|
100
|
+
# OK. Registering new synonyms as it is possible.
|
|
101
|
+
self.__add_synonym(value)
|
|
102
|
+
continue
|
|
103
|
+
|
|
104
|
+
log_utils.log_synonym_for_entity_does_not_exist(
|
|
105
|
+
entity_value=value,
|
|
106
|
+
end_type=end_type,
|
|
107
|
+
raise_exception=error_on_synonym_end_missed)
|
|
108
|
+
|
|
109
|
+
# Rejecting.
|
|
110
|
+
return False
|
|
111
|
+
|
|
112
|
+
if opinion.is_loop(self.__synonyms):
|
|
113
|
+
# Ignoring loops.
|
|
114
|
+
return False
|
|
115
|
+
|
|
116
|
+
key = opinion.create_synonym_id(self.__synonyms)
|
|
117
|
+
|
|
118
|
+
assert(isinstance(key, str))
|
|
119
|
+
if key in self.__by_synonyms:
|
|
120
|
+
|
|
121
|
+
log_utils.log_opinion_already_exist(opinion=opinion,
|
|
122
|
+
raise_exception=error_on_existence,
|
|
123
|
+
display_log=show_duplications)
|
|
124
|
+
|
|
125
|
+
# Rejecting.
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
# Perform registration.
|
|
129
|
+
self.__by_synonyms[key] = opinion
|
|
130
|
+
self.__ordered_opinion_keys.append(key)
|
|
131
|
+
|
|
132
|
+
return True
|
|
133
|
+
|
|
134
|
+
# endregion
|
|
135
|
+
|
|
136
|
+
# region base methods
|
|
137
|
+
|
|
138
|
+
def __len__(self):
|
|
139
|
+
return len(self.__by_synonyms)
|
|
140
|
+
|
|
141
|
+
def __iter__(self):
|
|
142
|
+
for key in self.__ordered_opinion_keys:
|
|
143
|
+
yield self.__by_synonyms[key]
|
|
144
|
+
|
|
145
|
+
def __getitem__(self, item):
|
|
146
|
+
assert(isinstance(item, int))
|
|
147
|
+
key = self.__ordered_opinion_keys[item]
|
|
148
|
+
return self.__by_synonyms[key]
|
|
149
|
+
|
|
150
|
+
# endregion
|
|
File without changes
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from arekit.common.pipeline.context import PipelineContext
|
|
2
|
+
from arekit.common.pipeline.items.base import BasePipelineItem
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class BasePipeline(object):
|
|
6
|
+
|
|
7
|
+
def __init__(self, pipeline):
|
|
8
|
+
assert(isinstance(pipeline, list))
|
|
9
|
+
self.__pipeline = pipeline
|
|
10
|
+
|
|
11
|
+
def run(self, input_data, params_dict=None, parent_ctx=None):
|
|
12
|
+
assert(isinstance(params_dict, dict) or params_dict is None)
|
|
13
|
+
|
|
14
|
+
pipeline_ctx = PipelineContext(d=params_dict if params_dict is not None else dict(),
|
|
15
|
+
parent_ctx=parent_ctx)
|
|
16
|
+
|
|
17
|
+
for item in filter(lambda itm: itm is not None, self.__pipeline):
|
|
18
|
+
assert(isinstance(item, BasePipelineItem))
|
|
19
|
+
input_data = item.apply(input_data=input_data, pipeline_ctx=pipeline_ctx)
|
|
20
|
+
|
|
21
|
+
return input_data
|
|
22
|
+
|
|
23
|
+
def append(self, item):
|
|
24
|
+
assert(isinstance(item, BasePipelineItem))
|
|
25
|
+
self.__pipeline.append(item)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from arekit.common.pipeline.conts import PARENT_CTX
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class PipelineContext(object):
|
|
5
|
+
""" Context of parameters utilized in pipeline
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
def __init__(self, d, parent_ctx=None):
|
|
9
|
+
assert(isinstance(d, dict))
|
|
10
|
+
assert(isinstance(parent_ctx, PipelineContext) or parent_ctx is None)
|
|
11
|
+
assert(PARENT_CTX not in d)
|
|
12
|
+
self._d = d
|
|
13
|
+
self._d[PARENT_CTX] = parent_ctx
|
|
14
|
+
|
|
15
|
+
def __provide(self, param):
|
|
16
|
+
return self._d[param]
|
|
17
|
+
|
|
18
|
+
# region public
|
|
19
|
+
|
|
20
|
+
def provide(self, param):
|
|
21
|
+
return self.__provide(param)
|
|
22
|
+
|
|
23
|
+
def provide_or_none(self, param):
|
|
24
|
+
return self.__provide(param) if param in self._d else None
|
|
25
|
+
|
|
26
|
+
def update(self, param, value):
|
|
27
|
+
self._d[param] = value
|
|
28
|
+
|
|
29
|
+
# endregion
|
|
30
|
+
|
|
31
|
+
# region base methods
|
|
32
|
+
|
|
33
|
+
def __contains__(self, item):
|
|
34
|
+
return item in self._d
|
|
35
|
+
|
|
36
|
+
# endregion
|
|
File without changes
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
class BasePipelineItem(object):
|
|
2
|
+
""" Single pipeline item that might be instatiated and embedded into pipeline.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
def apply_core(self, input_data, pipeline_ctx):
|
|
6
|
+
raise NotImplementedError()
|
|
7
|
+
|
|
8
|
+
def apply(self, input_data, pipeline_ctx=None):
|
|
9
|
+
""" Performs input processing an update it for a further pipeline items.
|
|
10
|
+
"""
|
|
11
|
+
output_data = self.apply_core(input_data=input_data, pipeline_ctx=pipeline_ctx)
|
|
12
|
+
return output_data if output_data is not None else input_data
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from arekit.common.pipeline.items.base import BasePipelineItem
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class FlattenIterPipelineItem(BasePipelineItem):
|
|
5
|
+
""" Considered to flat iterations of items that represent iterations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
def __flat_iter(self, iter_data):
|
|
9
|
+
for iter_item in iter_data:
|
|
10
|
+
for item in iter_item:
|
|
11
|
+
yield item
|
|
12
|
+
|
|
13
|
+
def apply_core(self, input_data, pipeline_ctx):
|
|
14
|
+
return self.__flat_iter(input_data)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from arekit.common.pipeline.items.base import BasePipelineItem
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class HandleIterPipelineItem(BasePipelineItem):
|
|
5
|
+
|
|
6
|
+
def __init__(self, handle_func=None):
|
|
7
|
+
assert(callable(handle_func))
|
|
8
|
+
self.__handle_func = handle_func
|
|
9
|
+
|
|
10
|
+
def __updated_data(self, items_iter):
|
|
11
|
+
for item in items_iter:
|
|
12
|
+
# Perform item handling
|
|
13
|
+
self.__handle_func(item)
|
|
14
|
+
yield item
|
|
15
|
+
|
|
16
|
+
def apply_core(self, input_data, pipeline_ctx):
|
|
17
|
+
return self.__updated_data(input_data)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from arekit.common.pipeline.items.base import BasePipelineItem
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class FilterPipelineItem(BasePipelineItem):
|
|
5
|
+
|
|
6
|
+
def __init__(self, filter_func=None):
|
|
7
|
+
assert(callable(filter_func))
|
|
8
|
+
self.__filter_func = filter_func
|
|
9
|
+
|
|
10
|
+
def apply_core(self, input_data, pipeline_ctx):
|
|
11
|
+
return filter(self.__filter_func, input_data)
|