PyPI - arekit - Versions diffs - 0.25.1__py3-none-any.whl → 0.25.2__py3-none-any.whl - Mend

arekit 0.25.1py3-none-any.whl → 0.25.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

arekit/common/context/terms_mapper.py CHANGED Viewed

@@ -1,12 +1,15 @@
 from collections.abc import Iterable
 from arekit.common.context.token import Token
-from arekit.common.entities.base import Entity
 from arekit.common.frames.text_variant import TextFrameVariant
 class TextTermsMapper(object):
+    def __init__(self, is_entity_func):
+        assert(callable(is_entity_func))
+        self.__is_entity_func = is_entity_func
     def iter_mapped(self, terms):
         """ Performs mapping operation of each terms in a sequence
         """
@@ -22,7 +25,7 @@ class TextTermsMapper(object):
                 m_term = self.map_token(i, term)
             elif isinstance(term, TextFrameVariant):
                 m_term = self.map_text_frame_variant(i, term)
-            elif isinstance(term, Entity):
+            elif self.__is_entity_func(term):
                 m_term = self.map_entity(i, term)
             else:
                 raise Exception("Unsupported type {}".format(term))

arekit/common/data/input/providers/rows/samples.py CHANGED Viewed

@@ -9,13 +9,11 @@ from arekit.common.data.input.providers.label.multiple import MultipleLabelProvi
 from arekit.common.data.input.providers.rows.base import BaseRowProvider
 from arekit.common.data.input.providers.text.single import BaseSingleTextProvider
 from arekit.common.data.rows_fmt import create_base_column_fmt
-from arekit.common.entities.base import Entity
-from arekit.common.labels.base import Label
-from arekit.common.linkage.text_opinions import TextOpinionsLinkage
 from arekit.common.docs.parsed.base import ParsedDocument
 from arekit.common.docs.parsed.providers.entity_service import EntityEndType, EntityServiceProvider
 from arekit.common.docs.parsed.term_position import TermPositionTypes
+from arekit.common.labels.base import Label
+from arekit.common.linkage.text_opinions import TextOpinionsLinkage
 from arekit.common.text_opinions.base import TextOpinion
@@ -26,13 +24,15 @@ class BaseSampleRowProvider(BaseRowProvider):
     """ Rows provider for samples storage.
     """
-    def __init__(self, label_provider, text_provider):
+    def __init__(self, is_entity_func, label_provider, text_provider):
+        assert(callable(is_entity_func))
         assert(isinstance(label_provider, LabelProvider))
         assert(isinstance(text_provider, BaseSingleTextProvider))
         super(BaseSampleRowProvider, self).__init__()
         self._label_provider = label_provider
         self.__text_provider = text_provider
+        self.__is_entity_func = is_entity_func
         self.__instances_provider = self.__create_instances_provider(label_provider)
         self.__store_labels = None
         self._val_fmt = create_base_column_fmt(fmt_type="writer")
@@ -65,7 +65,7 @@ class BaseSampleRowProvider(BaseRowProvider):
             parsed_doc=parsed_doc, sentence_ind=sentence_ind, s_ind=s_ind, t_ind=t_ind)
         # Entity indices from the related context.
-        entities = list(filter(lambda term: isinstance(term, Entity), sentence_terms))
+        entities = list(filter(self.__is_entity_func, sentence_terms))
         # Values mapping.
         vm = {
@@ -76,7 +76,7 @@ class BaseSampleRowProvider(BaseRowProvider):
             const.SENT_IND: sentence_ind,
             const.ENTITY_VALUES: entities,
             const.ENTITY_TYPES: entities,
-            const.ENTITIES: [str(i) for i, t in enumerate(sentence_terms) if isinstance(t, Entity)],
+            const.ENTITIES: [str(i) for i, t in enumerate(sentence_terms) if self.__is_entity_func(t)],
             const.S_IND: actual_s_ind,
             const.T_IND: actual_t_ind,
             const.LABEL_UINT: None,
@@ -143,9 +143,6 @@ class BaseSampleRowProvider(BaseRowProvider):
     def __provide_rows(self, row_dict, parsed_doc, entity_service,
                        text_opinion_linkage, index_in_linked, idle_mode):
-        """
-        Providing Rows depending on row_id_formatter type
-        """
         assert(isinstance(parsed_doc, ParsedDocument))
         assert(isinstance(row_dict, OrderedDict))
         assert(isinstance(text_opinion_linkage, TextOpinionsLinkage))
@@ -153,7 +150,6 @@ class BaseSampleRowProvider(BaseRowProvider):
         etalon_label = self.__instances_provider.provide_label(text_opinion_linkage)
         for instance in self.__instances_provider.iter_instances(text_opinion_linkage):
             yield self.__create_row(row=row_dict,
-                                    row_id=0,
                                     parsed_doc=parsed_doc,
                                     entity_service=entity_service,
                                     text_opinions_linkage=instance,
@@ -162,7 +158,7 @@ class BaseSampleRowProvider(BaseRowProvider):
                                     etalon_label=etalon_label,
                                     idle_mode=idle_mode)
-    def __create_row(self, row, row_id, parsed_doc, entity_service, text_opinions_linkage,
+    def __create_row(self, row, parsed_doc, entity_service, text_opinions_linkage,
                      index_in_linked, etalon_label, idle_mode):
         """
         Composing row in following format:

arekit/common/data/input/providers/sample/cropped.py CHANGED Viewed

@@ -8,10 +8,11 @@ class CroppedSampleRowProvider(BaseSampleRowProvider):
         attitude inside.
     """
-    def __init__(self, crop_window_size, label_scaler, text_provider):
+    def __init__(self, crop_window_size, label_scaler, **kwargs):
         assert(isinstance(crop_window_size, int) and crop_window_size > 0)
-        super(CroppedSampleRowProvider, self).__init__(label_provider=MultipleLabelProvider(label_scaler),
-                                                       text_provider=text_provider)
+        super(CroppedSampleRowProvider, self).__init__(
+            label_provider=MultipleLabelProvider(label_scaler),
+            **kwargs)
         self.__crop_window_size = crop_window_size
     @staticmethod

arekit/common/data/input/terms_mapper.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from arekit.common.context.terms_mapper import TextTermsMapper
 from arekit.common.context.token import Token
-from arekit.common.entities.base import Entity
 from arekit.common.entities.str_fmt import StringEntitiesFormatter
 from arekit.common.entities.types import OpinionEntityType
 from arekit.common.frames.text_variant import TextFrameVariant
@@ -12,9 +11,12 @@ class OpinionContainingTextTermsMapper(TextTermsMapper):
     The latter might be utilized with synonyms collection
     """
-    def __init__(self, entity_formatter):
+    def __init__(self, entity_formatter, entity_group_ind_func, **kwargs):
         assert(isinstance(entity_formatter, StringEntitiesFormatter))
+        assert(callable(entity_group_ind_func))
+        super(OpinionContainingTextTermsMapper, self).__init__(**kwargs)
         self.__entities_formatter = entity_formatter
+        self.__syn_group = entity_group_ind_func
         self.__s_ind = None
         self.__t_ind = None
         self.__s_group = None
@@ -24,12 +26,6 @@ class OpinionContainingTextTermsMapper(TextTermsMapper):
     def StringEntitiesFormatter(self):
         return self.__entities_formatter
-    def __syn_group(self, entity):
-        """ Note: here we guarantee that entity has GroupIndex.
-        """
-        assert(isinstance(entity, Entity))
-        return entity.GroupIndex if entity is not None else None
     def set_s_ind(self, s_ind):
         assert(isinstance(s_ind, int))
         self.__s_ind = s_ind

arekit/common/data/storages/base.py CHANGED Viewed

@@ -34,9 +34,6 @@ class BaseRowsStorage(object):
     def _get_rows_count(self):
         raise NotImplemented()
-    def get_row(self, row_index):
-        raise NotImplemented()
     def init_empty(self, columns_provider):
         raise NotImplemented()

arekit/common/docs/entities_grouping.py CHANGED Viewed

@@ -1,17 +1,19 @@
-from arekit.common.entities.base import Entity
 from arekit.common.pipeline.items.base import BasePipelineItem
 class EntitiesGroupingPipelineItem(BasePipelineItem):
-    def __init__(self, value_to_group_id_func, **kwargs):
+    def __init__(self, value_to_group_id_func, is_entity_func, **kwargs):
         assert(callable(value_to_group_id_func))
+        assert(callable(is_entity_func))
         super(EntitiesGroupingPipelineItem, self).__init__(**kwargs)
         self.__value_to_group_id_func = value_to_group_id_func
+        self.__is_entity_func = is_entity_func
     def apply_core(self, input_data, pipeline_ctx):
         assert(isinstance(input_data, list))
-        for entity in filter(lambda term: isinstance(term, Entity), input_data):
+        for entity in filter(lambda term: self.__is_entity_func(term), input_data):
             group_index = self.__value_to_group_id_func(entity.Value)
             entity.set_group_index(group_index)

arekit/common/docs/parsed/base.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from collections.abc import Iterable
-from arekit.common.entities.base import Entity
 from arekit.common.text.enums import TermFormat
 from arekit.common.text.parsed import BaseParsedText
@@ -73,8 +72,9 @@ class ParsedDocument(object):
         assert(isinstance(s_ind, int))
         return self.__parsed_sentences[s_ind]
-    def iter_entities(self):
-        for entity in self.__iter_all_raw_terms(term_only=True, filter_func=lambda t: isinstance(t, Entity)):
+    def iter_entities(self, is_entity_func):
+        assert(callable(is_entity_func))
+        for entity in self.__iter_all_raw_terms(term_only=True, filter_func=is_entity_func):
             yield entity
     def iter_terms(self, filter_func=None, term_only=True):

arekit/common/docs/parsed/providers/base.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from arekit.common.entities.base import Entity
 from arekit.common.docs.entity import DocumentEntity
 from arekit.common.docs.parsed.base import ParsedDocument
@@ -6,7 +5,7 @@ from arekit.common.docs.parsed.base import ParsedDocument
 class BaseParsedDocumentServiceProvider(object):
     def __init__(self, entity_index_func=None):
-        """ Outside enity indexing function
+        """ Outside entity indexing function
             entity_index_func: provides id for a given entity, i.e.
                 func(entity) -> int (id)
         """
@@ -19,7 +18,7 @@ class BaseParsedDocumentServiceProvider(object):
     def Name(self):
         raise NotImplementedError()
-    def init_parsed_doc(self, parsed_doc):
+    def init_parsed_doc(self, parsed_doc, is_entity_func):
         assert(isinstance(parsed_doc, ParsedDocument))
         def __iter_childs_and_root_node(entity):
@@ -37,7 +36,7 @@ class BaseParsedDocumentServiceProvider(object):
         self.__entity_map.clear()
         current_id = 0
-        for _, entity in enumerate(parsed_doc.iter_entities()):
+        for _, entity in enumerate(parsed_doc.iter_entities(is_entity_func=is_entity_func)):
             child_doc_entities = []
             for tree_entity, is_child in __iter_childs_and_root_node(entity):
@@ -61,7 +60,6 @@ class BaseParsedDocumentServiceProvider(object):
     def get_document_entity(self, entity):
         """ Maps entity to the related one with DocumentEntity type
         """
-        assert(isinstance(entity, Entity))
         return self.__entity_map[self.__entity_index_func(entity)]
     def contains_entity(self, entity):

arekit/common/docs/parsed/providers/entity_service.py CHANGED Viewed

@@ -1,8 +1,6 @@
 from enum import Enum
-from arekit.common.entities.base import Entity
 from arekit.common.docs.entity import DocumentEntity
-from arekit.common.docs.parsed.base import ParsedDocument
 from arekit.common.docs.parsed.providers.base import BaseParsedDocumentServiceProvider
 from arekit.common.docs.parsed.term_position import TermPositionTypes, TermPosition
 from arekit.common.text_opinions.base import TextOpinion
@@ -41,9 +39,8 @@ class EntityServiceProvider(BaseParsedDocumentServiceProvider):
     NAME = "entity-service-provider"
-    def __init__(self, entity_index_func):
-        assert(callable(entity_index_func))
-        super(EntityServiceProvider, self).__init__(entity_index_func=entity_index_func)
+    def __init__(self, **kwargs):
+        super(EntityServiceProvider, self).__init__(**kwargs)
         # Initialize API.
         self.__iter_raw_terms_func = None
         # Initialize entity positions.
@@ -53,24 +50,16 @@ class EntityServiceProvider(BaseParsedDocumentServiceProvider):
     def Name(self):
         return self.NAME
-    def init_parsed_doc(self, parsed_doc):
-        super(EntityServiceProvider, self).init_parsed_doc(parsed_doc)
-        assert(isinstance(parsed_doc, ParsedDocument))
+    def init_parsed_doc(self, parsed_doc, is_entity_func):
+        super(EntityServiceProvider, self).init_parsed_doc(parsed_doc=parsed_doc, is_entity_func=is_entity_func)
         self.__iter_raw_terms_func = lambda: parsed_doc.iter_terms(filter_func=None, term_only=False)
-        self.__entity_positions = self.__calculate_entity_positions()
-    # region public 'extract' methods
-    def extract_entity_value(self, text_opinion, end_type):
-        return self.__extract_entity_value(text_opinion=text_opinion, end_type=end_type)
+        self.__entity_positions = self.__calculate_entity_positions(is_entity_func=is_entity_func)
     def extract_entity_position(self, text_opinion, end_type, position_type=None):
         return self.__get_entity_position(text_opinion=text_opinion,
                                           end_type=end_type,
                                           position_type=position_type)
-    # endregion
     # region public 'calculate' methods
     @staticmethod
@@ -112,20 +101,10 @@ class EntityServiceProvider(BaseParsedDocumentServiceProvider):
         return e_pos.get_index(position_type)
-    def get_entity_value(self, id_in_document):
-        entity = self._doc_entities[id_in_document]
-        assert(isinstance(entity, Entity))
-        return entity.Value
     # endregion
     # region private methods
-    def __extract_entity_value(self, text_opinion, end_type):
-        assert(isinstance(text_opinion, TextOpinion))
-        end_id = self.__get_end_id(text_opinion=text_opinion, end_type=end_type)
-        return self.get_entity_value(end_id)
     def __get_entity_position(self, text_opinion, end_type, position_type=None):
         assert(isinstance(text_opinion, TextOpinion))
         end_id = self.__get_end_id(text_opinion=text_opinion, end_type=end_type)
@@ -147,7 +126,7 @@ class EntityServiceProvider(BaseParsedDocumentServiceProvider):
         assert(end_type == EntityEndType.Source or end_type == EntityEndType.Target)
         return text_opinion.SourceId if end_type == EntityEndType.Source else text_opinion.TargetId
-    def __calculate_entity_positions(self):
+    def __calculate_entity_positions(self, is_entity_func):
         """ Note: here we consider the same order as in self._entities.
         """
         t_ind_in_doc = -1
@@ -157,7 +136,7 @@ class EntityServiceProvider(BaseParsedDocumentServiceProvider):
             t_ind_in_doc += 1
-            if not isinstance(term, Entity):
+            if not is_entity_func(term):
                 continue
             # We consider that entities within a single tree has the same positions.

arekit/common/docs/parsed/providers/opinion_pairs.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from arekit.common.entities.base import Entity
 from arekit.common.docs.parsed.providers.base_pairs import BasePairProvider
 from arekit.common.opinions.base import Opinion
@@ -7,14 +6,15 @@ class OpinionPairsProvider(BasePairProvider):
     NAME = "opinion-pairs-provider"
+    def __init__(self, entity_value_func, **kwargs):
+        super(OpinionPairsProvider, self).__init__(**kwargs)
+        self.__entity_value_func = entity_value_func
     @property
     def Name(self):
         return self.NAME
     def _create_pair(self, source_entity, target_entity, label):
-        assert(isinstance(source_entity, Entity))
-        assert(isinstance(target_entity, Entity))
-        return Opinion(source_value=source_entity.Value,
-                       target_value=target_entity.Value,
+        return Opinion(source_value=self.__entity_value_func(source_entity),
+                       target_value=self.__entity_value_func(target_entity),
                        label=label)

arekit/common/docs/parsed/providers/text_opinion_pairs.py CHANGED Viewed

@@ -16,8 +16,8 @@ class TextOpinionPairsProvider(BasePairProvider):
     NAME = "text-opinion-pairs-provider"
-    def __init__(self, value_to_group_id_func):
-        super(TextOpinionPairsProvider, self).__init__()
+    def __init__(self, value_to_group_id_func, **kwargs):
+        super(TextOpinionPairsProvider, self).__init__(**kwargs)
         self.__value_to_group_id_func = value_to_group_id_func
         self.__doc_id = None
         self.__entities_collection = None
@@ -36,8 +36,8 @@ class TextOpinionPairsProvider(BasePairProvider):
                            label=label,
                            text_opinion_id=None)
-    def init_parsed_doc(self, parsed_doc):
-        super(TextOpinionPairsProvider, self).init_parsed_doc(parsed_doc)
+    def init_parsed_doc(self, parsed_doc, is_entity_func):
+        super(TextOpinionPairsProvider, self).init_parsed_doc(parsed_doc=parsed_doc, is_entity_func=is_entity_func)
         self.__doc_id = parsed_doc.RelatedDocID
         self.__entities_collection = EntityCollection(
             entities=list(self._doc_entities),

arekit/common/docs/parsed/service.py CHANGED Viewed

@@ -6,7 +6,7 @@ class ParsedDocumentService(object):
     """ Represents a collection of providers, combined with the parsed doc.
     """
-    def __init__(self, parsed_doc, providers):
+    def __init__(self, parsed_doc, providers, is_entity_func):
         assert(isinstance(parsed_doc, ParsedDocument))
         assert(isinstance(providers, list))
         self.__parsed_doc = parsed_doc
@@ -20,7 +20,7 @@ class ParsedDocumentService(object):
             self.__providers[provider.Name] = provider
             # Post initialize with the related parsed doc.
-            provider.init_parsed_doc(self.__parsed_doc)
+            provider.init_parsed_doc(self.__parsed_doc, is_entity_func=is_entity_func)
     @property

arekit/common/model/labeling/single.py CHANGED Viewed

@@ -1,11 +1,15 @@
-import numpy as np
 from arekit.common.model.labeling.base import LabelsHelper
 from arekit.common.model.labeling.modes import LabelCalculationMode
 class SingleLabelsHelper(LabelsHelper):
+    @staticmethod
+    def __sign(x):
+        if x == 0:
+            return 0
+        return -1 if x < 0 else 1
     def aggregate_labels(self, labels_list, label_calc_mode):
         assert(isinstance(labels_list, list))
         assert(isinstance(label_calc_mode, LabelCalculationMode))
@@ -18,7 +22,7 @@ class SingleLabelsHelper(LabelsHelper):
         if label_calc_mode == LabelCalculationMode.AVERAGE:
             int_labels = [self._label_scaler.label_to_int(label)
                           for label in labels_list]
-            label = self._label_scaler.int_to_label(int(np.sign(sum(int_labels))))
+            label = self._label_scaler.int_to_label(SingleLabelsHelper.__sign(sum(int_labels)))
         return label

arekit/common/opinions/annot/algo/pair_based.py CHANGED Viewed

@@ -16,8 +16,9 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
             [1] Extracting Sentiment Attitudes from Analytical Texts https://arxiv.org/pdf/1808.08932.pdf
     """
-    def __init__(self, dist_in_terms_bound, label_provider, entity_index_func, dist_in_sents=0,
-                 is_entity_ignored_func=None):
+    def __init__(self, dist_in_terms_bound, label_provider,
+                 is_entity_func, entity_index_func, entity_value_func,
+                 dist_in_sents=0, is_entity_ignored_func=None):
         """
         dist_in_terms_bound: int
             max allowed distance in term (less than passed value)
@@ -34,7 +35,9 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
         self.__dist_in_terms_bound = dist_in_terms_bound
         self.__dist_in_sents = dist_in_sents
         self.__is_entity_ignored_func = is_entity_ignored_func
+        self.__is_entity_func = is_entity_func
         self.__entity_index_func = entity_index_func
+        self.__entity_value_func = entity_value_func
     # region private methods
@@ -90,10 +93,11 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
             return key is not None
         # Initialize providers.
-        opinions_provider = OpinionPairsProvider(entity_index_func=self.__entity_index_func)
+        opinions_provider = OpinionPairsProvider(entity_index_func=self.__entity_index_func,
+                                                 entity_value_func=self.__entity_value_func)
         entity_service_provider = EntityServiceProvider(entity_index_func=self.__entity_index_func)
-        opinions_provider.init_parsed_doc(parsed_doc)
-        entity_service_provider.init_parsed_doc(parsed_doc)
+        opinions_provider.init_parsed_doc(parsed_doc=parsed_doc, is_entity_func=self.__is_entity_func)
+        entity_service_provider.init_parsed_doc(parsed_doc=parsed_doc, is_entity_func=self.__is_entity_func)
         return opinions_provider.iter_from_all(label_provider=self.__label_provider,
                                                filter_func=__filter_pair_func)

arekit/common/pipeline/base.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from arekit.common.pipeline.context import PipelineContext
-from arekit.common.pipeline.items.base import BasePipelineItem
 class BasePipelineLauncher:
@@ -11,7 +10,6 @@ class BasePipelineLauncher:
         assert(isinstance(src_key, str) or src_key is None)
         for ind, item in enumerate(filter(lambda itm: itm is not None, pipeline)):
-            assert(isinstance(item, BasePipelineItem))
             do_force_key = src_key is not None and ind == 0
             input_data = item.get_source(pipeline_ctx, force_key=src_key if do_force_key else None) \
                 if has_input or ind > 0 else None

arekit/common/pipeline/batching.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from arekit.common.pipeline.context import PipelineContext
-from arekit.common.pipeline.items.base import BasePipelineItem
 class BatchingPipelineLauncher:
@@ -11,8 +10,6 @@ class BatchingPipelineLauncher:
         assert(isinstance(src_key, str) or src_key is None)
         for ind, item in enumerate(filter(lambda itm: itm is not None, pipeline)):
-            assert (isinstance(item, BasePipelineItem))
             # Handle the content of the batch or batch itself.
             content = item.get_source(pipeline_ctx, call_func=False, force_key=src_key if ind == 0 else None)
             handled_batch = [item._src_func(i) if item._src_func is not None else i for i in content]

arekit/contrib/bert/input/providers/cropped_sample.py CHANGED Viewed

@@ -5,13 +5,10 @@ from arekit.contrib.bert.input.providers.text_pair import PairTextProvider
 class CroppedBertSampleRowProvider(CroppedSampleRowProvider):
-    def __init__(self, crop_window_size, label_scaler, text_terms_mapper, text_b_template):
+    def __init__(self, text_b_template, text_terms_mapper, **kwargs):
         text_provider = BaseSingleTextProvider(text_terms_mapper=text_terms_mapper) \
             if text_b_template is None else PairTextProvider(text_b_prompt=text_b_template,
                                                              text_terms_mapper=text_terms_mapper)
-        super(CroppedBertSampleRowProvider, self).__init__(
-            crop_window_size=crop_window_size,
-            label_scaler=label_scaler,
-            text_provider=text_provider)
+        super(CroppedBertSampleRowProvider, self).__init__(text_provider=text_provider, **kwargs)

arekit/contrib/bert/terms/mapper.py CHANGED Viewed

@@ -7,11 +7,11 @@ class BertDefaultStringTextTermsMapper(OpinionContainingTextTermsMapper):
         a base class assumes to provide an orginal frame variant value.
     """
-    def __init__(self, entity_formatter, word_separator=' '):
+    def __init__(self, word_separator=' ', **kwargs):
         """ See https://github.com/nicolay-r/AREkit/issues/377
             for a greater details.
         """
-        super(BertDefaultStringTextTermsMapper, self).__init__(entity_formatter)
+        super(BertDefaultStringTextTermsMapper, self).__init__(**kwargs)
         self.__word_separator = word_separator
     def map_entity(self, e_ind, entity):

arekit/contrib/prompt/sample.py CHANGED Viewed

@@ -8,7 +8,7 @@ class PromptedSampleRowProvider(CroppedSampleRowProvider):
     """ Sample, enriched with the prompt technique.
     """
-    def __init__(self, crop_window_size, label_scaler, text_provider, prompt, label_fmt=None):
+    def __init__(self, prompt, label_fmt=None, **kwargs):
         """ crop_window_size: int
                 crop window size for the original text.
             prompt: str
@@ -17,12 +17,8 @@ class PromptedSampleRowProvider(CroppedSampleRowProvider):
                     text, s_ind, t_ind, s_val, t_val, label_uint
         """
         assert(isinstance(prompt, str))
-        assert(isinstance(text_provider, BaseSingleTextProvider))
         assert(isinstance(label_fmt, StringLabelsFormatter) or label_fmt is None)
-        super(PromptedSampleRowProvider, self).__init__(crop_window_size=crop_window_size,
-                                                        label_scaler=label_scaler,
-                                                        text_provider=text_provider)
+        super(PromptedSampleRowProvider, self).__init__(**kwargs)
         self.__prompt = prompt
         self.__labels_fmt = label_fmt

arekit/contrib/utils/bert/samplers.py CHANGED Viewed

@@ -5,7 +5,7 @@ from arekit.common.data.input.terms_mapper import OpinionContainingTextTermsMapp
 from arekit.contrib.bert.input.providers.text_pair import PairTextProvider
-def create_sample_provider(label_scaler, text_terms_mapper, text_b_prompt=None):
+def create_sample_provider(is_entity_func, label_scaler, text_terms_mapper, text_b_prompt=None):
     assert(isinstance(text_terms_mapper, OpinionContainingTextTermsMapper))
     text_provider = BaseSingleTextProvider(text_terms_mapper=text_terms_mapper) \
@@ -14,4 +14,6 @@ def create_sample_provider(label_scaler, text_terms_mapper, text_b_prompt=None):
     label_provider = MultipleLabelProvider(label_scaler=label_scaler)
-    return BaseSampleRowProvider(text_provider=text_provider, label_provider=label_provider)
+    return BaseSampleRowProvider(text_provider=text_provider,
+                                 label_provider=label_provider,
+                                 is_entity_func=is_entity_func)

arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py CHANGED Viewed

@@ -9,7 +9,7 @@ class AlgorithmBasedTextOpinionAnnotator(AlgorithmBasedOpinionAnnotator):
     """
     def __init__(self, value_to_group_id_func, annot_algo, create_empty_collection_func,
-                 get_doc_existed_opinions_func=None):
+                 is_entity_func, get_doc_existed_opinions_func=None):
         """ get_doc_existed_opinions_func: func or None
                 function that provides existed opinions for a document;
                 if None, then we consider an absence of the existed document-level opinions.
@@ -20,14 +20,17 @@ class AlgorithmBasedTextOpinionAnnotator(AlgorithmBasedOpinionAnnotator):
             create_empty_collection_func=create_empty_collection_func,
             get_doc_existed_opinions_func=get_doc_existed_opinions_func)
         self.__value_to_group_id_func = value_to_group_id_func
+        self.__is_entity_func = is_entity_func
     def __create_service(self, parsed_doc):
-        return ParsedDocumentService(parsed_doc=parsed_doc, providers=[
-            TextOpinionPairsProvider(self.__value_to_group_id_func)
-        ])
+        return ParsedDocumentService(
+            parsed_doc=parsed_doc,
+            providers=[TextOpinionPairsProvider(self.__value_to_group_id_func, entity_index_func=None)],
+            is_entity_func=self.__is_entity_func
+        )
     def annotate_collection(self, parsed_doc):
-        service = self.__create_service(parsed_doc)
+        service = self.__create_service(parsed_doc=parsed_doc)
         topp = service.get_provider(TextOpinionPairsProvider.NAME)
         for opinion in super(AlgorithmBasedTextOpinionAnnotator, self).annotate_collection(parsed_doc):
             for text_opinion in topp.iter_from_opinion(opinion):

arekit/contrib/utils/pipelines/text_opinion/extraction.py CHANGED Viewed

@@ -12,7 +12,8 @@ from arekit.contrib.utils.pipelines.text_opinion.filters.base import TextOpinion
 from arekit.contrib.utils.pipelines.text_opinion.filters.limitation import FrameworkLimitationsTextOpinionFilter
-def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
+def __iter_text_opinion_linkages(parsed_doc, annotators,
+                                 is_entity_func, entity_index_func,
                                  text_opinion_filters, use_meta):
     """ use_meta: bool
             this is mainly for the progress-bar and other console parameters to stay up-to-date
@@ -27,7 +28,9 @@ def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
     def __to_id(text_opinion):
         return "{}_{}".format(text_opinion.SourceId, text_opinion.TargetId)
-    service = ParsedDocumentService(parsed_doc=parsed_doc, providers=[EntityServiceProvider(entity_index_func)])
+    service = ParsedDocumentService(parsed_doc=parsed_doc,
+                                    providers=[EntityServiceProvider(entity_index_func=entity_index_func)],
+                                    is_entity_func=is_entity_func)
     esp = service.get_provider(EntityServiceProvider.NAME)
     predefined = set()
@@ -62,9 +65,12 @@ def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
         yield MetaEmptyLinkedDataWrapper(doc_id=parsed_doc.RelatedDocID)
-def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotators, entity_index_func, batch_size,
+def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotators,
+                                     is_entity_func, entity_index_func, batch_size,
                                      text_opinion_filters=None, use_meta_between_docs=True):
     assert(callable(get_doc_by_id_func))
+    assert(callable(is_entity_func))
+    assert(callable(entity_index_func))
     assert(isinstance(annotators, list))
     assert(isinstance(text_opinion_filters, list) or text_opinion_filters is None)
     assert(isinstance(use_meta_between_docs, bool))
@@ -83,9 +89,10 @@ def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotat
         # (parsed_doc) -> (text_opinions)
         MapPipelineItem(map_func=lambda parsed_doc: __iter_text_opinion_linkages(
-            annotators=annotators, parsed_doc=parsed_doc, entity_index_func=entity_index_func,
+            annotators=annotators, parsed_doc=parsed_doc,
+            is_entity_func=is_entity_func, entity_index_func=entity_index_func,
             text_opinion_filters=actual_text_opinion_filters, use_meta=use_meta_between_docs)),
         # linkages[] -> linkages
         FlattenIterPipelineItem()
-    ]
+    ]

{arekit-0.25.1.dist-info → arekit-0.25.2.dist-info}/METADATA RENAMED Viewed

@@ -1,12 +1,13 @@
 Metadata-Version: 2.1
 Name: arekit
-Version: 0.25.1
+Version: 0.25.2
 Summary: Document level Attitude and Relation Extraction toolkit (AREkit) for sampling and prompting mass-media news into datasets for ML-model training
 Home-page: https://github.com/nicolay-r/AREkit
 Author: Nicolay Rusnachenko
 Author-email: rusnicolay@gmail.com
 License: MIT License
 Keywords: natural language processing,relation extraction,sentiment analysis
+Platform: UNKNOWN
 Classifier: Programming Language :: Python
 Classifier: Programming Language :: Python :: 3.6
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
@@ -14,14 +15,14 @@ Classifier: Topic :: Scientific/Engineering :: Information Analysis
 Classifier: Topic :: Text Processing :: Linguistic
 Requires-Python: >=3.6
 Description-Content-Type: text/markdown
-License-File: LICENSE
+Requires-Dist: enum34 (==1.1.10)
 Requires-Dist: tqdm
-Requires-Dist: enum34==1.1.10
-Requires-Dist: numpy>=1.14.5
-# AREkit 0.25.1
+# AREkit 0.25.2
 ![](https://img.shields.io/badge/Python-3.9+-brightgreen.svg)
+[![PyPI downloads](https://img.shields.io/pypi/dm/arekit.svg)](https://pypistats.org/packages/arekit)
 <p align="center">
     <img src="logo.png"/>
@@ -59,7 +60,7 @@ for sentence level relations preparation (dubbed as contexts);
 ## Installation
 ```bash
-pip install git+https://github.com/nicolay-r/AREkit.git@0.25.1-rc
+pip install git+https://github.com/nicolay-r/AREkit.git@0.25.2-rc
 ```
 ## Usage
@@ -79,3 +80,5 @@ if you use or extend our work, please cite as follows:
   organization={Springer}
 }
 ```

{arekit-0.25.1.dist-info → arekit-0.25.2.dist-info}/RECORD RENAMED Viewed

@@ -4,7 +4,7 @@ arekit/common/bound.py,sha256=lPpHY6ct_CU9e4qXeYjhJfWbTj6Sb_NVtZ1CJheQPNE,1402
 arekit/common/log_utils.py,sha256=OfEQxbExkuRAl9dxlgFEqcFhI4HHoMYT7WE8ud0IPOM,924
 arekit/common/utils.py,sha256=N061ENJJgvsB338Q9cixc6RWyuikSPQq4Tc8mmgwy9s,2659
 arekit/common/context/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/common/context/terms_mapper.py,sha256=QA02Cv7D2JKTlXkez_0w0J8HuvNziNF2vrqLgy4Bwc8,1447
+arekit/common/context/terms_mapper.py,sha256=tBs_dMettLjVrqwPwTMZg3Pgxo6PZJpu-Qh6ZOWWFJA,1532
 arekit/common/context/token.py,sha256=CpWAlvprUnJfCtYvO8lwdfU_ofSKAOGOudXTwppyzSk,459
 arekit/common/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/common/data/const.py,sha256=J74zim3CGJlLJp-AVn5z9TTuBfmttjiM_8sRW1Pc-iE,457
@@ -13,7 +13,7 @@ arekit/common/data/rows_fmt.py,sha256=klq9HdzSnhbRBhOw7O4ctp3PZ5L6ZVy-0eIV2vLLYY
 arekit/common/data/rows_parser.py,sha256=qYSEETvhX_0_JuAqm0bjK_V28_53qq7OY9JAnBdRC78,1513
 arekit/common/data/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/common/data/input/sample.py,sha256=6JeGxsLbEUXVKPWA1hIlkTDNOaYg4bHCJWw0ULrLByg,2143
-arekit/common/data/input/terms_mapper.py,sha256=DUOMbGwiQETY7qhztoU8uU30d1cQPsIsgNLldpjcufg,3197
+arekit/common/data/input/terms_mapper.py,sha256=pOD8lGsdM-23maXr9nlHM1QMJ3hsx_5HGe6X3aQcq6k,3133
 arekit/common/data/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/common/data/input/providers/const.py,sha256=GDvPkgP7hllHW3QiueMBQgQyu2CtNFI4JYNNja2Im6Q,187
 arekit/common/data/input/providers/contents.py,sha256=jT1LJE_5Igw5H2e1jKsWWciHSbPVg649phT177SzhEA,261
@@ -30,38 +30,34 @@ arekit/common/data/input/providers/label/binary.py,sha256=jPD6Jn8DYMrdI3jN8ueoWv
 arekit/common/data/input/providers/label/multiple.py,sha256=HWbHF_CwwbiLQbYm5dgvnXAm0b6tJOyFYFEUBxuWAqI,492
 arekit/common/data/input/providers/rows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/common/data/input/providers/rows/base.py,sha256=syH7ZEW3Agwfb1IR0G7n_Amy3Kkg0EZk2V7kH3r7ADg,2517
-arekit/common/data/input/providers/rows/samples.py,sha256=uqLTP8fnz-0wC7ALLlIDUYtXTG4OpnRqp70Fgv_1Iiw,9427
+arekit/common/data/input/providers/rows/samples.py,sha256=iUBmKTnevAyfXDb4d6_Wntfw59wWASqSteXOhD5ez64,9334
 arekit/common/data/input/providers/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/common/data/input/providers/sample/cropped.py,sha256=jJSos4Si-qy-wb-QmomXxxgURR1UhJnvY0tZoowlfVc,1885
+arekit/common/data/input/providers/sample/cropped.py,sha256=RSoDIoqIodANBW7zmj91ltgw4eYGISCWfl6zLuQXwFM,1831
 arekit/common/data/input/providers/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/common/data/input/providers/text/single.py,sha256=vm3sShIYZcmses-hmZX9cOfveWXCYGwvKLgQ0qs3VXQ,1604
 arekit/common/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/common/data/storages/base.py,sha256=psxo5uIc3hUDi5Cgf4j3Cm-935Fy1VQBYzcBzCcCFZE,2661
+arekit/common/data/storages/base.py,sha256=xMMfHhG68ZraERLbipCN_OhqpLBSDq_S56qAtxGsU7Y,2595
 arekit/common/docs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/common/docs/base.py,sha256=uXUOtpR9BEsDBfDHg4eLqOjfSVOV_o9VPii3nSxLZuY,734
-arekit/common/docs/entities_grouping.py,sha256=_r254fNr0j6BjHuLZBLjj21yWm4_k__5aOcBXcAaQUQ,704
+arekit/common/docs/entities_grouping.py,sha256=9Xr5NsrWD9_jjKLFE7HOqjkOibzjz840ef04CekkXNU,765
 arekit/common/docs/entity.py,sha256=TxrZMdIEgjk-PgCyskCkVis2KAw_M7vTBp3ppP6G05M,662
 arekit/common/docs/parser.py,sha256=dzWjpbbYt-C9UU9sSy_Holnm0kQxJqtz1_6va6kS_L4,1780
 arekit/common/docs/sentence.py,sha256=nZCCFj2yk71POoXCBfEMN3pteM2qQdj60eEzxMVY_3k,302
 arekit/common/docs/parsed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/common/docs/parsed/base.py,sha256=WPstqOpBuLKjtz6UO_bI0DpOPF3Sm0wYEVwjtldbPXE,3175
-arekit/common/docs/parsed/service.py,sha256=fSzwtRcSvmvlW8LyK6XPf7wJAx66GWlbRgH_3oQf-BU,1029
+arekit/common/docs/parsed/base.py,sha256=e43kQyxeO-eaPKr3-5SyZ4N33QIDDePTE_CGmEliO7c,3168
+arekit/common/docs/parsed/service.py,sha256=k_4k9EQ7iFq97bvAZHz6dtxCltiJQMd3Suv5W_t7MBE,1076
 arekit/common/docs/parsed/term_position.py,sha256=H9eQQeanLxwP6og30TQUnpcXymGEPwXClRpaE8VnpLs,1040
 arekit/common/docs/parsed/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/common/docs/parsed/providers/base.py,sha256=IjnG7c6Q78cYYAPTrwuZCOiMQDfMaujDQ6U0gK7JCcw,2587
+arekit/common/docs/parsed/providers/base.py,sha256=9MPqxC8mTD4naXH_AoOH0bIPNR7wR9GkOL-Nm2D6Kdo,2543
 arekit/common/docs/parsed/providers/base_pairs.py,sha256=RDYjspkENPQU2pn7Jp5mFrL9566eVWgXMEzWBQlMdRo,2195
-arekit/common/docs/parsed/providers/entity_service.py,sha256=oaBfferpkDXfAFL17vpecSZUsV1Pjvq6lqgHDHsIEZY,6657
-arekit/common/docs/parsed/providers/opinion_pairs.py,sha256=ibeFmvpMBBARtqQ3EKEocIOulgzavv0DeYxePGQK5-U,633
-arekit/common/docs/parsed/providers/text_opinion_pairs.py,sha256=BC4uVgFxy3oZTkCq9VgOlqoqhODia2Z3anoGyGoy0ao,3139
+arekit/common/docs/parsed/providers/entity_service.py,sha256=An_urYXU4r1PKIUNfhlGCjK6UNLwr3EkebkiaodBsRg,5895
+arekit/common/docs/parsed/providers/opinion_pairs.py,sha256=dSd698VSbVefT0VbuQehaErquFixBfs42OAdX3BJH5M,693
+arekit/common/docs/parsed/providers/text_opinion_pairs.py,sha256=MK1-m2_LJgjeis6AvY1hwT2N8rqHRCpIp7oWqXzgk9I,3215
 arekit/common/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/common/entities/base.py,sha256=kpJFo4pCRVBQX6T8PibLKspp9UwoIrkHDoFMTM9KkUs,1646
 arekit/common/entities/collection.py,sha256=ySSriMYP6zzdto1mC0V9VPXmkAqyJN3mmGoqoNValGI,1931
 arekit/common/entities/str_fmt.py,sha256=gAPeS8RXdhh8Px_u5eOAPbtLREiiyMueid0lQoa4EbQ,250
 arekit/common/entities/types.py,sha256=pxFB0gsevdsmnduN_Ffk7_P2TRiMt6NAHyrutuKOFvs,145
-arekit/common/experiment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/common/experiment/data_type.py,sha256=DezUkfwLTf6XLYheqPiaWyx3ZwcldsJ8wDV8aNgJtDk,227
-arekit/common/experiment/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/common/experiment/api/base_samples_io.py,sha256=SN8CnbEYaazE3SldvnENfjoNRHsTejtrg4jJfqfZLMs,516
 arekit/common/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/common/frames/text_variant.py,sha256=TlWR4jnuF7HW9BMHhOTKkr768V_Ub0wd0E5A4YTwD0c,875
 arekit/common/frames/connotations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -89,7 +85,7 @@ arekit/common/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
 arekit/common/model/labeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/common/model/labeling/base.py,sha256=uj7_igCWEU23OjnzabNy0LyxoZ6S_qSfCA-ZaoL1erA,727
 arekit/common/model/labeling/modes.py,sha256=DiwC6Aomke-ojwwpR2pcd4qgQSwmRdGCvQlyHHhN3YY,127
-arekit/common/model/labeling/single.py,sha256=Eggi0obocjiT9ofv_U0zLiFoEIeUQhaMCqjCWn14Fh8,773
+arekit/common/model/labeling/single.py,sha256=HJMFffbxfmV6dKK8t-MKjD-bOx_wuWUs35zmcSWcUL0,878
 arekit/common/opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/common/opinions/base.py,sha256=eIx1RzsngCkpnF2Utju5i_Qp7gqF_rDIe_UDeMGXtmo,2112
 arekit/common/opinions/collection.py,sha256=bdx-CIYYdE-DrjyB1mRTGtkLb-lrGPTSLl25xv5EHnM,4938
@@ -101,11 +97,11 @@ arekit/common/opinions/annot/algo_based.py,sha256=cvDGDmUoUaQ1Xcbyouxrjs0CkHRfRo
 arekit/common/opinions/annot/base.py,sha256=IvwrwT8O3s6b2_R0arpMR4Uog7kuWQZUAyRP5cq_27A,382
 arekit/common/opinions/annot/algo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/common/opinions/annot/algo/base.py,sha256=ymll-4-SplCY7CLswjOZEC1vsVHIEzUP0JMYgvL8hbo,124
-arekit/common/opinions/annot/algo/pair_based.py,sha256=HbYn1mAsn5g11NiC9pfrMqNtJn_GzvqPFGpafMqqB2o,4419
+arekit/common/opinions/annot/algo/pair_based.py,sha256=0m0l-KEDvtARDEnl8Sr_MeEJp3yT1re_VsNAO2ZQQUM,4762
 arekit/common/opinions/annot/algo/predefined.py,sha256=zU39SADPKnykHCNB-Bmn_0bvd6gYWWYmfgfi-68hHSs,741
 arekit/common/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/common/pipeline/base.py,sha256=8TgWNy5QrnKEp1bq3lhyGSgIfYe5ZIZU3c-DYBJ9LPA,957
-arekit/common/pipeline/batching.py,sha256=DdOvOladOo2aEv3JZ8NQnCvsNGcWk4TFzENrZqTGyXk,1239
+arekit/common/pipeline/base.py,sha256=RHpZs4OT2t9wGTMUxtpBM7q-zCrNQbf3-BFDy9Bcz4M,839
+arekit/common/pipeline/batching.py,sha256=zm1SLSJz8T9gXrBdiztzS2f7VSWb4uFcYkzEu5TIfrE,1119
 arekit/common/pipeline/context.py,sha256=Fw25lBVakHNAXjtkdEqopR-Jh59cDKGWD2jCJxBrj7Y,1126
 arekit/common/pipeline/conts.py,sha256=NAQNsHt1kK3HnxWv3M6yXi0c7C6Mx6ZZ6KZc0yE0eas,70
 arekit/common/pipeline/utils.py,sha256=5VqH1LtRa4tYUbyiRvWdBmP4biFhTKq9vhr8QiRFFkY,882
@@ -130,15 +126,15 @@ arekit/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/contrib/bert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/contrib/bert/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/contrib/bert/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/contrib/bert/input/providers/cropped_sample.py,sha256=46uHHhAe8cGxV2JlfO3thog5XV6T2niUIflFghfUSBM,866
+arekit/contrib/bert/input/providers/cropped_sample.py,sha256=WJNAzILJDMYYhGpxg1r1F3f1X71kVV30gDhkgwH59H0,755
 arekit/contrib/bert/input/providers/text_pair.py,sha256=_1d-he0n42y3ksj8RjJlNHgHnaQUEq0aQhUdTPRMKgg,2817
 arekit/contrib/bert/terms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/contrib/bert/terms/mapper.py,sha256=oHX-lsaZYjBFLjngzSKT5z_JPJCHbclUsEe4i4fup_8,992
+arekit/contrib/bert/terms/mapper.py,sha256=YMY1JasNc___83ihiV1KqzwGyC3qs3ZNN90NmHqBEZ0,976
 arekit/contrib/prompt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/contrib/prompt/sample.py,sha256=MxpbDR0ww7WmdtuPu74B8R6QKVXeuzO0CKGOJIYwbRk,3164
+arekit/contrib/prompt/sample.py,sha256=iDwe65pUBIrk0Hjh8v7o1XesRPxCVsJojw-dcASPmWc,2867
 arekit/contrib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/contrib/utils/bert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/contrib/utils/bert/samplers.py,sha256=ZVe3rbUAH0Jw1xR_yHE1DoUJf3CI0pDgbBQQzlLWevc,989
+arekit/contrib/utils/bert/samplers.py,sha256=vleluRLRFzDkGRZ_ReeHsY8IJAS-TxJgoTTro4mYrs4,1102
 arekit/contrib/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/contrib/utils/data/contents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/contrib/utils/data/contents/opinions.py,sha256=MSV7NytEe15adKhhHCq5KiCj6ZBq31nV-u2rcSfFCgE,1738
@@ -147,7 +143,6 @@ arekit/contrib/utils/data/doc_provider/dict_based.py,sha256=zUOiiIbj5zby4xqMb0m9
 arekit/contrib/utils/data/doc_provider/dir_based.py,sha256=FTw3kLV_CYtPoUoHl39IrP6RjLvTecCno9May95jVXw,1916
 arekit/contrib/utils/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/contrib/utils/data/storages/jsonl_based.py,sha256=dz8uizu9t1C215o0HEL8y4LiDKR4aC_-OwDu_xF0xIM,522
-arekit/contrib/utils/data/storages/pandas_based.py,sha256=gMkWUFHZE9Oe1Uy04vEBcUfTIAdh46r5zpjlPAwwG2g,3842
 arekit/contrib/utils/data/storages/row_cache.py,sha256=MRK0uJFvw6O99k2aFb3JLZhLUBo2JUO-WYQ4EeRRu6M,2051
 arekit/contrib/utils/data/storages/sqlite_based.py,sha256=cIYAHyiB4CMftKgrgLqw-L4F1WnhbspjwWLSPqH5NHk,682
 arekit/contrib/utils/data/writers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -155,7 +150,6 @@ arekit/contrib/utils/data/writers/base.py,sha256=JLwf5WVl_U319sdMev8YOn4OoCcrgNI
 arekit/contrib/utils/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/contrib/utils/entities/filter.py,sha256=aHTExIMFaMdy4QL8iYE23eiby3qLImAakXR6gNqG6fs,145
 arekit/contrib/utils/entities/formatters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/contrib/utils/entities/formatters/str_display.py,sha256=N8igv7EVaTFayvLXkyBGtm67KwHaeP-M-L8d7oqBG9Q,401
 arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py,sha256=rEUIma9O3kOBWIguGtJ69JH-00Dhm0vUBOd5yNcKweY,653
 arekit/contrib/utils/io_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/contrib/utils/io_utils/utils.py,sha256=310SIJTsNLn2OZrGPer9W4ZP52PHkjBK3zsyqxVs3h0,537
@@ -163,24 +157,22 @@ arekit/contrib/utils/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
 arekit/contrib/utils/pipelines/opinion_collections.py,sha256=y9-klVJGCN9mPd7t1ECllAiCnAb3MKVXC1PnYddp5sQ,3195
 arekit/contrib/utils/pipelines/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/contrib/utils/pipelines/items/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/contrib/utils/pipelines/items/text/entities_default.py,sha256=vNx5ir2mf7a1gg_OeqUsf_p1Fu2k7QIFxVpe-CuwZ84,727
 arekit/contrib/utils/pipelines/items/text/frames.py,sha256=pZQybYfgEQB1DM3PtmsgrtB2Xl0HejmP4rhT0nR_YKE,2586
 arekit/contrib/utils/pipelines/text_opinion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=MT1WMlvVI25JRL0g7W83bV8BGUr7_MNOQBj7ZAHgrnU,4245
+arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=kKBQTvZxYYf9tBYmUv3Ipj9OOYKmHnYG0y5Gyjt27yA,4587
 arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py,sha256=bwS-UR2x3rgp_xqnf6z-73T-eIZE_kltRSGYxgd_WpU,1751
+arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py,sha256=69xmuxqVmsYxBYpV2gYF7j3Z5iPk0ndjnOZe2Yy5WDA,1911
 arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/contrib/utils/pipelines/text_opinion/filters/base.py,sha256=GnKnJB4MKqiMSJny3a9Na7l7Csm7abbt6GADBCY18Mw,143
 arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py,sha256=3Pjq4IJJMT7dYpK266lN66WQJUnQO3P0rG6wcAvJOOA,649
 arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py,sha256=pdWFJaKh4kKIsUuBNp3WNy5Rj80CjWEy2wp-0axFnrI,1254
 arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py,sha256=4AFS5zhocJuYphGO2ZMWmYTtIhGItKDTkB0--AmjgnA,1151
-arekit/contrib/utils/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/contrib/utils/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 arekit/contrib/utils/synonyms/simple.py,sha256=ST9EwuWP88FzbyV8Gi0-biTPgGOsZ7OWyaBWHL_U_eo,557
 arekit/contrib/utils/synonyms/stemmer_based.py,sha256=q19P_XOCWN2_JrBtybAt7ToMIr1ambw4ahr0fSEEHmQ,1400
-arekit-0.25.1.data/data/logo.png,sha256=S8OZ4MGGD72Pf5co7ngYbXKkJH1EUhbErUXv1ZjUWiU,45718
-arekit-0.25.1.dist-info/LICENSE,sha256=JO9tIbxAvhwDv73cX-gUStr9yA-TY7wusUeLHRx7JuY,1076
-arekit-0.25.1.dist-info/METADATA,sha256=ryWGTL4fYqR36z2qh1UuYBg6UIU6n7_U9Y09KPRS6xk,3177
-arekit-0.25.1.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
-arekit-0.25.1.dist-info/top_level.txt,sha256=4pXuFE8IE0lBsqi6ZsR7figx0H939VIX4_-76YIbkOQ,7
-arekit-0.25.1.dist-info/RECORD,,
+arekit-0.25.2.data/data/logo.png,sha256=S8OZ4MGGD72Pf5co7ngYbXKkJH1EUhbErUXv1ZjUWiU,45718
+arekit-0.25.2.dist-info/LICENSE,sha256=JO9tIbxAvhwDv73cX-gUStr9yA-TY7wusUeLHRx7JuY,1076
+arekit-0.25.2.dist-info/METADATA,sha256=CsXviPZIM44LGhiyBRH-MK0DGOP7UAc4GHbvSaLcwxw,3252
+arekit-0.25.2.dist-info/WHEEL,sha256=g4nMs7d-Xl9-xC9XovUrsDHGXt-FT0E17Yqo92DEfvY,92
+arekit-0.25.2.dist-info/top_level.txt,sha256=4pXuFE8IE0lBsqi6ZsR7figx0H939VIX4_-76YIbkOQ,7
+arekit-0.25.2.dist-info/RECORD,,

{arekit-0.25.1.dist-info → arekit-0.25.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.44.0)
+Generator: bdist_wheel (0.34.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

arekit/common/experiment/__init__.py DELETED Viewed

File without changes

arekit/common/experiment/api/__init__.py DELETED Viewed

File without changes

arekit/common/experiment/api/base_samples_io.py DELETED Viewed

@@ -1,20 +0,0 @@
-class BaseSamplesIO(object):
-    """ Represents base experiment utils for input/output for:
-        samples -- data that utilized for experiments;
-        results -- evaluation of experiments.
-    """
-    @property
-    def Reader(self):
-        raise NotImplementedError()
-    @property
-    def Writer(self):
-        """ For serialization
-        """
-        raise NotImplementedError()
-    def create_target(self, data_type):
-        """ Path for reaiding/viewing
-        """
-        raise NotImplementedError()

arekit/common/experiment/data_type.py DELETED Viewed

@@ -1,17 +0,0 @@
-from enum import Enum
-class DataType(Enum):
-    """
-    Describes collection types that supportes in
-    current implementation, and provides by collections.
-    """
-    Train = 1
-    Test = 2
-    Dev = 3
-    Etalon = 4

arekit/contrib/utils/data/storages/pandas_based.py DELETED Viewed

@@ -1,108 +0,0 @@
-import importlib
-import numpy as np
-from arekit.common.data.input.providers.columns.base import BaseColumnsProvider
-from arekit.common.data.storages.base import BaseRowsStorage, logger
-from arekit.common.utils import progress_bar_iter
-class PandasBasedRowsStorage(BaseRowsStorage):
-    """ Storage Kernel functions implementation,
-        based on the pandas DataFrames.
-    """
-    def __init__(self, df=None, **kwargs):
-        super(PandasBasedRowsStorage, self).__init__(**kwargs)
-        self._df = df
-    @property
-    def DataFrame(self):
-        # TODO. Temporary hack, however this should be removed in future.
-        return self._df
-    @staticmethod
-    def __create_empty(cols_with_types):
-        """ cols_with_types: list of pairs ("name", dtype)
-        """
-        assert(isinstance(cols_with_types, list))
-        data = np.empty(0, dtype=np.dtype(cols_with_types))
-        pd = importlib.import_module("pandas")
-        return pd.DataFrame(data)
-    def __filter(self, column_name, value):
-        return self._df[self._df[column_name] == value]
-    @staticmethod
-    def __iter_rows_core(df):
-        for row_index, row in df.iterrows():
-            yield row_index, row
-    def __fill_with_blank_rows(self, row_id_column_name, rows_count):
-        assert(isinstance(row_id_column_name, str))
-        assert(isinstance(rows_count, int))
-        self._df[row_id_column_name] = list(range(rows_count))
-        self._df.set_index(row_id_column_name, inplace=True)
-    # region protected methods
-    def iter_column_names(self):
-        return iter(self._df.columns)
-    def iter_column_types(self):
-        return iter(self._df.dtypes)
-    def _set_row_value(self, row_ind, column, value):
-        self._df.at[row_ind, column] = value
-    def _iter_rows(self):
-        for row_index, row in self.__iter_rows_core(self._df):
-            yield row_index, row.to_dict()
-    def _get_rows_count(self):
-        return len(self._df)
-    # endregion
-    # region public methods
-    def fill(self, iter_rows_func, columns_provider, row_handler=None, rows_count=None, desc=""):
-        """ NOTE: We provide the rows counting which is required
-            in order to know an expected amount of rows in advace
-            due to the specifics of the pandas memory allocation
-            for the DataFrames.
-            The latter allows us avoid rows appending, which
-            may significantly affects on performance once the size
-            of DataFrame becomes relatively large.
-        """
-        assert(isinstance(columns_provider, BaseColumnsProvider))
-        logger.info("Rows calculation process started. [Required by Pandas-Based storage kernel]")
-        logged_rows_it = progress_bar_iter(
-            iterable=iter_rows_func(True),
-            desc="Calculating rows count ({reason})".format(reason=desc),
-            unit="rows")
-        rows_count = sum(1 for _ in logged_rows_it)
-        logger.info("Filling with blank rows: {}".format(rows_count))
-        self.__fill_with_blank_rows(row_id_column_name=columns_provider.ROW_ID,
-                                    rows_count=rows_count)
-        logger.info("Completed!")
-        super(PandasBasedRowsStorage, self).fill(iter_rows_func=iter_rows_func,
-                                                 row_handler=row_handler,
-                                                 columns_provider=columns_provider,
-                                                 rows_count=rows_count)
-    def get_row(self, row_index):
-        return self._df.iloc[row_index]
-    def init_empty(self, columns_provider):
-        cols_with_types = columns_provider.get_columns_list_with_types()
-        self._df = self.__create_empty(cols_with_types)
-    def free(self):
-        del self._df
-        super(PandasBasedRowsStorage, self).free()
-    # endregion

arekit/contrib/utils/entities/formatters/str_display.py DELETED Viewed

@@ -1,11 +0,0 @@
-from arekit.common.entities.base import Entity
-from arekit.common.entities.str_fmt import StringEntitiesFormatter
-class StringEntitiesDisplayValueFormatter(StringEntitiesFormatter):
-    """ Provides the contents of the DisplayValue property.
-    """
-    def to_string(self, original_value, entity_type):
-        assert(isinstance(original_value, Entity))
-        return original_value.DisplayValue

arekit/contrib/utils/pipelines/items/text/entities_default.py DELETED Viewed

@@ -1,23 +0,0 @@
-from arekit.common.entities.base import Entity
-from arekit.common.pipeline.items.base import BasePipelineItem
-class TextEntitiesParser(BasePipelineItem):
-    def __init__(self, **kwargs):
-        super(TextEntitiesParser, self).__init__(**kwargs)
-    @staticmethod
-    def __process_word(word):
-        assert(isinstance(word, str))
-        # If this is a special word which is related to the [entity] mention.
-        if word[0] == "[" and word[-1] == "]":
-            entity = Entity(value=word[1:-1], e_type=None)
-            return entity
-        return word
-    def apply_core(self, input_data, pipeline_ctx):
-        assert(isinstance(input_data, list))
-        return [self.__process_word(w) for w in input_data]

arekit/contrib/utils/processing/__init__.py DELETED Viewed

File without changes

{arekit-0.25.1.data → arekit-0.25.2.data}/data/logo.png RENAMED Viewed

File without changes

{arekit-0.25.1.dist-info → arekit-0.25.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{arekit-0.25.1.dist-info → arekit-0.25.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

arekit 0.25.1__py3-none-any.whl → 0.25.2__py3-none-any.whl

arekit 0.25.1py3-none-any.whl → 0.25.2py3-none-any.whl