arekit 0.25.0__py3-none-any.whl → 0.25.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. arekit/common/context/terms_mapper.py +5 -2
  2. arekit/common/data/input/providers/rows/samples.py +8 -12
  3. arekit/common/data/input/providers/sample/cropped.py +4 -3
  4. arekit/common/data/input/terms_mapper.py +4 -8
  5. arekit/common/data/storages/base.py +4 -18
  6. arekit/common/docs/entities_grouping.py +5 -3
  7. arekit/common/docs/parsed/base.py +3 -3
  8. arekit/common/docs/parsed/providers/base.py +3 -5
  9. arekit/common/docs/parsed/providers/entity_service.py +7 -28
  10. arekit/common/docs/parsed/providers/opinion_pairs.py +6 -6
  11. arekit/common/docs/parsed/providers/text_opinion_pairs.py +4 -4
  12. arekit/common/docs/parsed/service.py +2 -2
  13. arekit/common/docs/parser.py +3 -30
  14. arekit/common/model/labeling/single.py +7 -3
  15. arekit/common/opinions/annot/algo/pair_based.py +9 -5
  16. arekit/common/pipeline/base.py +0 -2
  17. arekit/common/pipeline/batching.py +0 -3
  18. arekit/common/pipeline/items/base.py +1 -1
  19. arekit/common/utils.py +11 -8
  20. arekit/contrib/bert/input/providers/cropped_sample.py +2 -5
  21. arekit/contrib/bert/terms/mapper.py +2 -2
  22. arekit/contrib/prompt/sample.py +2 -6
  23. arekit/contrib/utils/bert/samplers.py +4 -2
  24. arekit/contrib/utils/data/storages/jsonl_based.py +2 -1
  25. arekit/contrib/utils/data/storages/row_cache.py +2 -1
  26. arekit/contrib/utils/data/storages/sqlite_based.py +2 -1
  27. arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +8 -5
  28. arekit/contrib/utils/pipelines/text_opinion/extraction.py +16 -8
  29. {arekit-0.25.0.dist-info → arekit-0.25.2.dist-info}/METADATA +10 -8
  30. {arekit-0.25.0.dist-info → arekit-0.25.2.dist-info}/RECORD +34 -115
  31. {arekit-0.25.0.dist-info → arekit-0.25.2.dist-info}/WHEEL +1 -1
  32. arekit/common/data/input/repositories/__init__.py +0 -0
  33. arekit/common/data/input/repositories/base.py +0 -68
  34. arekit/common/data/input/repositories/sample.py +0 -22
  35. arekit/common/data/views/__init__.py +0 -0
  36. arekit/common/data/views/samples.py +0 -26
  37. arekit/common/experiment/__init__.py +0 -0
  38. arekit/common/experiment/api/__init__.py +0 -0
  39. arekit/common/experiment/api/base_samples_io.py +0 -20
  40. arekit/common/experiment/data_type.py +0 -17
  41. arekit/common/service/__init__.py +0 -0
  42. arekit/common/service/sqlite.py +0 -36
  43. arekit/contrib/networks/__init__.py +0 -0
  44. arekit/contrib/networks/embedding.py +0 -149
  45. arekit/contrib/networks/embedding_io.py +0 -18
  46. arekit/contrib/networks/input/__init__.py +0 -0
  47. arekit/contrib/networks/input/const.py +0 -6
  48. arekit/contrib/networks/input/ctx_serialization.py +0 -28
  49. arekit/contrib/networks/input/embedding/__init__.py +0 -0
  50. arekit/contrib/networks/input/embedding/matrix.py +0 -29
  51. arekit/contrib/networks/input/embedding/offsets.py +0 -55
  52. arekit/contrib/networks/input/formatters/__init__.py +0 -0
  53. arekit/contrib/networks/input/formatters/pos_mapper.py +0 -22
  54. arekit/contrib/networks/input/providers/__init__.py +0 -0
  55. arekit/contrib/networks/input/providers/sample.py +0 -129
  56. arekit/contrib/networks/input/providers/term_connotation.py +0 -23
  57. arekit/contrib/networks/input/providers/text.py +0 -24
  58. arekit/contrib/networks/input/rows_parser.py +0 -47
  59. arekit/contrib/networks/input/term_types.py +0 -13
  60. arekit/contrib/networks/input/terms_mapping.py +0 -60
  61. arekit/contrib/networks/vectorizer.py +0 -6
  62. arekit/contrib/utils/data/readers/__init__.py +0 -0
  63. arekit/contrib/utils/data/readers/base.py +0 -7
  64. arekit/contrib/utils/data/readers/csv_pd.py +0 -38
  65. arekit/contrib/utils/data/readers/jsonl.py +0 -15
  66. arekit/contrib/utils/data/readers/sqlite.py +0 -14
  67. arekit/contrib/utils/data/service/__init__.py +0 -0
  68. arekit/contrib/utils/data/service/balance.py +0 -50
  69. arekit/contrib/utils/data/storages/pandas_based.py +0 -123
  70. arekit/contrib/utils/data/writers/csv_native.py +0 -63
  71. arekit/contrib/utils/data/writers/csv_pd.py +0 -40
  72. arekit/contrib/utils/data/writers/json_opennre.py +0 -132
  73. arekit/contrib/utils/data/writers/sqlite_native.py +0 -114
  74. arekit/contrib/utils/embeddings/__init__.py +0 -0
  75. arekit/contrib/utils/embeddings/rusvectores.py +0 -58
  76. arekit/contrib/utils/embeddings/tokens.py +0 -30
  77. arekit/contrib/utils/entities/formatters/str_display.py +0 -11
  78. arekit/contrib/utils/io_utils/embedding.py +0 -72
  79. arekit/contrib/utils/np_utils/__init__.py +0 -0
  80. arekit/contrib/utils/np_utils/embedding.py +0 -22
  81. arekit/contrib/utils/np_utils/npz_utils.py +0 -13
  82. arekit/contrib/utils/np_utils/vocab.py +0 -20
  83. arekit/contrib/utils/pipelines/items/sampling/__init__.py +0 -0
  84. arekit/contrib/utils/pipelines/items/sampling/base.py +0 -94
  85. arekit/contrib/utils/pipelines/items/sampling/networks.py +0 -55
  86. arekit/contrib/utils/pipelines/items/text/entities_default.py +0 -23
  87. arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +0 -36
  88. arekit/contrib/utils/pipelines/items/text/frames_negation.py +0 -33
  89. arekit/contrib/utils/pipelines/items/text/tokenizer.py +0 -105
  90. arekit/contrib/utils/pipelines/items/text/translator.py +0 -136
  91. arekit/contrib/utils/processing/__init__.py +0 -0
  92. arekit/contrib/utils/processing/languages/__init__.py +0 -0
  93. arekit/contrib/utils/processing/languages/mods.py +0 -12
  94. arekit/contrib/utils/processing/languages/pos.py +0 -23
  95. arekit/contrib/utils/processing/languages/ru/__init__.py +0 -0
  96. arekit/contrib/utils/processing/languages/ru/cases.py +0 -78
  97. arekit/contrib/utils/processing/languages/ru/constants.py +0 -6
  98. arekit/contrib/utils/processing/languages/ru/mods.py +0 -13
  99. arekit/contrib/utils/processing/languages/ru/number.py +0 -23
  100. arekit/contrib/utils/processing/languages/ru/pos_service.py +0 -36
  101. arekit/contrib/utils/processing/lemmatization/__init__.py +0 -0
  102. arekit/contrib/utils/processing/lemmatization/mystem.py +0 -51
  103. arekit/contrib/utils/processing/pos/__init__.py +0 -0
  104. arekit/contrib/utils/processing/pos/base.py +0 -12
  105. arekit/contrib/utils/processing/pos/mystem_wrap.py +0 -134
  106. arekit/contrib/utils/processing/pos/russian.py +0 -10
  107. arekit/contrib/utils/processing/text/__init__.py +0 -0
  108. arekit/contrib/utils/processing/text/tokens.py +0 -127
  109. arekit/contrib/utils/serializer.py +0 -42
  110. arekit/contrib/utils/vectorizers/__init__.py +0 -0
  111. arekit/contrib/utils/vectorizers/bpe.py +0 -93
  112. arekit/contrib/utils/vectorizers/random_norm.py +0 -39
  113. {arekit-0.25.0.data → arekit-0.25.2.data}/data/logo.png +0 -0
  114. {arekit-0.25.0.dist-info → arekit-0.25.2.dist-info}/LICENSE +0 -0
  115. {arekit-0.25.0.dist-info → arekit-0.25.2.dist-info}/top_level.txt +0 -0
@@ -5,13 +5,10 @@ from arekit.contrib.bert.input.providers.text_pair import PairTextProvider
5
5
 
6
6
  class CroppedBertSampleRowProvider(CroppedSampleRowProvider):
7
7
 
8
- def __init__(self, crop_window_size, label_scaler, text_terms_mapper, text_b_template):
8
+ def __init__(self, text_b_template, text_terms_mapper, **kwargs):
9
9
 
10
10
  text_provider = BaseSingleTextProvider(text_terms_mapper=text_terms_mapper) \
11
11
  if text_b_template is None else PairTextProvider(text_b_prompt=text_b_template,
12
12
  text_terms_mapper=text_terms_mapper)
13
13
 
14
- super(CroppedBertSampleRowProvider, self).__init__(
15
- crop_window_size=crop_window_size,
16
- label_scaler=label_scaler,
17
- text_provider=text_provider)
14
+ super(CroppedBertSampleRowProvider, self).__init__(text_provider=text_provider, **kwargs)
@@ -7,11 +7,11 @@ class BertDefaultStringTextTermsMapper(OpinionContainingTextTermsMapper):
7
7
  a base class assumes to provide an orginal frame variant value.
8
8
  """
9
9
 
10
- def __init__(self, entity_formatter, word_separator=' '):
10
+ def __init__(self, word_separator=' ', **kwargs):
11
11
  """ See https://github.com/nicolay-r/AREkit/issues/377
12
12
  for a greater details.
13
13
  """
14
- super(BertDefaultStringTextTermsMapper, self).__init__(entity_formatter)
14
+ super(BertDefaultStringTextTermsMapper, self).__init__(**kwargs)
15
15
  self.__word_separator = word_separator
16
16
 
17
17
  def map_entity(self, e_ind, entity):
@@ -8,7 +8,7 @@ class PromptedSampleRowProvider(CroppedSampleRowProvider):
8
8
  """ Sample, enriched with the prompt technique.
9
9
  """
10
10
 
11
- def __init__(self, crop_window_size, label_scaler, text_provider, prompt, label_fmt=None):
11
+ def __init__(self, prompt, label_fmt=None, **kwargs):
12
12
  """ crop_window_size: int
13
13
  crop window size for the original text.
14
14
  prompt: str
@@ -17,12 +17,8 @@ class PromptedSampleRowProvider(CroppedSampleRowProvider):
17
17
  text, s_ind, t_ind, s_val, t_val, label_uint
18
18
  """
19
19
  assert(isinstance(prompt, str))
20
- assert(isinstance(text_provider, BaseSingleTextProvider))
21
20
  assert(isinstance(label_fmt, StringLabelsFormatter) or label_fmt is None)
22
-
23
- super(PromptedSampleRowProvider, self).__init__(crop_window_size=crop_window_size,
24
- label_scaler=label_scaler,
25
- text_provider=text_provider)
21
+ super(PromptedSampleRowProvider, self).__init__(**kwargs)
26
22
 
27
23
  self.__prompt = prompt
28
24
  self.__labels_fmt = label_fmt
@@ -5,7 +5,7 @@ from arekit.common.data.input.terms_mapper import OpinionContainingTextTermsMapp
5
5
  from arekit.contrib.bert.input.providers.text_pair import PairTextProvider
6
6
 
7
7
 
8
- def create_sample_provider(label_scaler, text_terms_mapper, text_b_prompt=None):
8
+ def create_sample_provider(is_entity_func, label_scaler, text_terms_mapper, text_b_prompt=None):
9
9
  assert(isinstance(text_terms_mapper, OpinionContainingTextTermsMapper))
10
10
 
11
11
  text_provider = BaseSingleTextProvider(text_terms_mapper=text_terms_mapper) \
@@ -14,4 +14,6 @@ def create_sample_provider(label_scaler, text_terms_mapper, text_b_prompt=None):
14
14
 
15
15
  label_provider = MultipleLabelProvider(label_scaler=label_scaler)
16
16
 
17
- return BaseSampleRowProvider(text_provider=text_provider, label_provider=label_provider)
17
+ return BaseSampleRowProvider(text_provider=text_provider,
18
+ label_provider=label_provider,
19
+ is_entity_func=is_entity_func)
@@ -5,8 +5,9 @@ from arekit.common.data.storages.base import BaseRowsStorage
5
5
 
6
6
  class JsonlBasedRowsStorage(BaseRowsStorage):
7
7
 
8
- def __init__(self, rows):
8
+ def __init__(self, rows, **kwargs):
9
9
  assert(isinstance(rows, list))
10
+ super(JsonlBasedRowsStorage, self).__init__(**kwargs)
10
11
  self.__rows = rows
11
12
 
12
13
  def _iter_rows(self):
@@ -6,13 +6,14 @@ class RowCacheStorage(BaseRowsStorage):
6
6
  """ Row Caching storage kernel, based on python dictionary.
7
7
  """
8
8
 
9
- def __init__(self, force_collect_columns=None):
9
+ def __init__(self, force_collect_columns=None, **kwargs):
10
10
  """ This is a particular/related solution for the following issue:
11
11
  https://github.com/nicolay-r/AREkit/issues/464
12
12
  force_collect_columns: list
13
13
  columns that supposed to be additionally considered in output.
14
14
  """
15
15
  assert(isinstance(force_collect_columns, list) or force_collect_columns is None)
16
+ super(RowCacheStorage, self).__init__(**kwargs)
16
17
  self.__f = None
17
18
  self.__row_cache = {}
18
19
  self.__column_names = []
@@ -4,7 +4,8 @@ from arekit.common.data.storages.base import BaseRowsStorage
4
4
 
5
5
  class SQliteBasedRowsStorage(BaseRowsStorage):
6
6
 
7
- def __init__(self, path, table_name):
7
+ def __init__(self, path, table_name, **kwargs):
8
+ super(SQliteBasedRowsStorage, self).__init__(**kwargs)
8
9
  self.__path = path
9
10
  self.__table_name = table_name
10
11
  self.__conn = None
@@ -9,7 +9,7 @@ class AlgorithmBasedTextOpinionAnnotator(AlgorithmBasedOpinionAnnotator):
9
9
  """
10
10
 
11
11
  def __init__(self, value_to_group_id_func, annot_algo, create_empty_collection_func,
12
- get_doc_existed_opinions_func=None):
12
+ is_entity_func, get_doc_existed_opinions_func=None):
13
13
  """ get_doc_existed_opinions_func: func or None
14
14
  function that provides existed opinions for a document;
15
15
  if None, then we consider an absence of the existed document-level opinions.
@@ -20,14 +20,17 @@ class AlgorithmBasedTextOpinionAnnotator(AlgorithmBasedOpinionAnnotator):
20
20
  create_empty_collection_func=create_empty_collection_func,
21
21
  get_doc_existed_opinions_func=get_doc_existed_opinions_func)
22
22
  self.__value_to_group_id_func = value_to_group_id_func
23
+ self.__is_entity_func = is_entity_func
23
24
 
24
25
  def __create_service(self, parsed_doc):
25
- return ParsedDocumentService(parsed_doc=parsed_doc, providers=[
26
- TextOpinionPairsProvider(self.__value_to_group_id_func)
27
- ])
26
+ return ParsedDocumentService(
27
+ parsed_doc=parsed_doc,
28
+ providers=[TextOpinionPairsProvider(self.__value_to_group_id_func, entity_index_func=None)],
29
+ is_entity_func=self.__is_entity_func
30
+ )
28
31
 
29
32
  def annotate_collection(self, parsed_doc):
30
- service = self.__create_service(parsed_doc)
33
+ service = self.__create_service(parsed_doc=parsed_doc)
31
34
  topp = service.get_provider(TextOpinionPairsProvider.NAME)
32
35
  for opinion in super(AlgorithmBasedTextOpinionAnnotator, self).annotate_collection(parsed_doc):
33
36
  for text_opinion in topp.iter_from_opinion(opinion):
@@ -12,10 +12,11 @@ from arekit.contrib.utils.pipelines.text_opinion.filters.base import TextOpinion
12
12
  from arekit.contrib.utils.pipelines.text_opinion.filters.limitation import FrameworkLimitationsTextOpinionFilter
13
13
 
14
14
 
15
- def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
15
+ def __iter_text_opinion_linkages(parsed_doc, annotators,
16
+ is_entity_func, entity_index_func,
16
17
  text_opinion_filters, use_meta):
17
18
  """ use_meta: bool
18
- this is mainly for tqdm and other console parameters to stay up-to-date
19
+ this is mainly for the progress-bar and other console parameters to stay up-to-date
19
20
  with the state in the case we do not have that much output results
20
21
  across multiple amount of documents.
21
22
  """
@@ -27,7 +28,9 @@ def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
27
28
  def __to_id(text_opinion):
28
29
  return "{}_{}".format(text_opinion.SourceId, text_opinion.TargetId)
29
30
 
30
- service = ParsedDocumentService(parsed_doc=parsed_doc, providers=[EntityServiceProvider(entity_index_func)])
31
+ service = ParsedDocumentService(parsed_doc=parsed_doc,
32
+ providers=[EntityServiceProvider(entity_index_func=entity_index_func)],
33
+ is_entity_func=is_entity_func)
31
34
  esp = service.get_provider(EntityServiceProvider.NAME)
32
35
 
33
36
  predefined = set()
@@ -62,12 +65,16 @@ def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
62
65
  yield MetaEmptyLinkedDataWrapper(doc_id=parsed_doc.RelatedDocID)
63
66
 
64
67
 
65
- def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotators, entity_index_func,
68
+ def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotators,
69
+ is_entity_func, entity_index_func, batch_size,
66
70
  text_opinion_filters=None, use_meta_between_docs=True):
67
71
  assert(callable(get_doc_by_id_func))
72
+ assert(callable(is_entity_func))
73
+ assert(callable(entity_index_func))
68
74
  assert(isinstance(annotators, list))
69
75
  assert(isinstance(text_opinion_filters, list) or text_opinion_filters is None)
70
76
  assert(isinstance(use_meta_between_docs, bool))
77
+ assert(isinstance(batch_size, int) and batch_size > 0)
71
78
 
72
79
  extra_filters = [] if text_opinion_filters is None else text_opinion_filters
73
80
  actual_text_opinion_filters = [FrameworkLimitationsTextOpinionFilter()] + extra_filters
@@ -77,14 +84,15 @@ def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotat
77
84
  MapPipelineItem(map_func=lambda doc_id: get_doc_by_id_func(doc_id)),
78
85
 
79
86
  # (doc, ppl_ctx) -> (parsed_doc)
80
- MapNestedPipelineItem(map_func=lambda doc, ppl_ctx: DocumentParsers.parse(
81
- doc=doc, pipeline_items=pipeline_items, parent_ppl_ctx=ppl_ctx)),
87
+ MapNestedPipelineItem(map_func=lambda doc, ppl_ctx: DocumentParsers.parse_batch(
88
+ doc=doc, pipeline_items=pipeline_items, parent_ppl_ctx=ppl_ctx, batch_size=batch_size)),
82
89
 
83
90
  # (parsed_doc) -> (text_opinions)
84
91
  MapPipelineItem(map_func=lambda parsed_doc: __iter_text_opinion_linkages(
85
- annotators=annotators, parsed_doc=parsed_doc, entity_index_func=entity_index_func,
92
+ annotators=annotators, parsed_doc=parsed_doc,
93
+ is_entity_func=is_entity_func, entity_index_func=entity_index_func,
86
94
  text_opinion_filters=actual_text_opinion_filters, use_meta=use_meta_between_docs)),
87
95
 
88
96
  # linkages[] -> linkages
89
97
  FlattenIterPipelineItem()
90
- ]
98
+ ]
@@ -1,12 +1,13 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arekit
3
- Version: 0.25.0
3
+ Version: 0.25.2
4
4
  Summary: Document level Attitude and Relation Extraction toolkit (AREkit) for sampling and prompting mass-media news into datasets for ML-model training
5
5
  Home-page: https://github.com/nicolay-r/AREkit
6
6
  Author: Nicolay Rusnachenko
7
7
  Author-email: rusnicolay@gmail.com
8
8
  License: MIT License
9
9
  Keywords: natural language processing,relation extraction,sentiment analysis
10
+ Platform: UNKNOWN
10
11
  Classifier: Programming Language :: Python
11
12
  Classifier: Programming Language :: Python :: 3.6
12
13
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
@@ -14,15 +15,14 @@ Classifier: Topic :: Scientific/Engineering :: Information Analysis
14
15
  Classifier: Topic :: Text Processing :: Linguistic
15
16
  Requires-Python: >=3.6
16
17
  Description-Content-Type: text/markdown
17
- License-File: LICENSE
18
+ Requires-Dist: enum34 (==1.1.10)
18
19
  Requires-Dist: tqdm
19
- Requires-Dist: enum34==1.1.10
20
- Requires-Dist: numpy>=1.14.5
21
- Requires-Dist: pymystem3==0.2.0
22
20
 
23
- # AREkit 0.25.0
21
+ # AREkit 0.25.2
24
22
 
25
23
  ![](https://img.shields.io/badge/Python-3.9+-brightgreen.svg)
24
+ [![PyPI downloads](https://img.shields.io/pypi/dm/arekit.svg)](https://pypistats.org/packages/arekit)
25
+
26
26
 
27
27
  <p align="center">
28
28
  <img src="logo.png"/>
@@ -34,7 +34,7 @@ is a python toolkit, devoted to document level Attitude and Relation Extraction
34
34
  ## Description
35
35
 
36
36
 
37
- This toolkit aims at memory-effective data processing in Relation Extraction (RE) related tasks.
37
+ This toolkit aims at memory-effective data processing in [Relation Extraction (RE)](https://nlpprogress.com/english/relationship_extraction.html) related tasks.
38
38
 
39
39
  <p align="center">
40
40
  <img src="docs/arekit-pipeline-concept.png"/>
@@ -60,7 +60,7 @@ for sentence level relations preparation (dubbed as contexts);
60
60
  ## Installation
61
61
 
62
62
  ```bash
63
- pip install git+https://github.com/nicolay-r/AREkit.git@0.25.0-rc
63
+ pip install git+https://github.com/nicolay-r/AREkit.git@0.25.2-rc
64
64
  ```
65
65
 
66
66
  ## Usage
@@ -80,3 +80,5 @@ if you use or extend our work, please cite as follows:
80
80
  organization={Springer}
81
81
  }
82
82
  ```
83
+
84
+
@@ -2,9 +2,9 @@ arekit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  arekit/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  arekit/common/bound.py,sha256=lPpHY6ct_CU9e4qXeYjhJfWbTj6Sb_NVtZ1CJheQPNE,1402
4
4
  arekit/common/log_utils.py,sha256=OfEQxbExkuRAl9dxlgFEqcFhI4HHoMYT7WE8ud0IPOM,924
5
- arekit/common/utils.py,sha256=eVRGhRy882ow-63Glncc3pJ-_43KSI0ukBePjC8ogAY,2394
5
+ arekit/common/utils.py,sha256=N061ENJJgvsB338Q9cixc6RWyuikSPQq4Tc8mmgwy9s,2659
6
6
  arekit/common/context/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
- arekit/common/context/terms_mapper.py,sha256=QA02Cv7D2JKTlXkez_0w0J8HuvNziNF2vrqLgy4Bwc8,1447
7
+ arekit/common/context/terms_mapper.py,sha256=tBs_dMettLjVrqwPwTMZg3Pgxo6PZJpu-Qh6ZOWWFJA,1532
8
8
  arekit/common/context/token.py,sha256=CpWAlvprUnJfCtYvO8lwdfU_ofSKAOGOudXTwppyzSk,459
9
9
  arekit/common/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  arekit/common/data/const.py,sha256=J74zim3CGJlLJp-AVn5z9TTuBfmttjiM_8sRW1Pc-iE,457
@@ -13,7 +13,7 @@ arekit/common/data/rows_fmt.py,sha256=klq9HdzSnhbRBhOw7O4ctp3PZ5L6ZVy-0eIV2vLLYY
13
13
  arekit/common/data/rows_parser.py,sha256=qYSEETvhX_0_JuAqm0bjK_V28_53qq7OY9JAnBdRC78,1513
14
14
  arekit/common/data/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  arekit/common/data/input/sample.py,sha256=6JeGxsLbEUXVKPWA1hIlkTDNOaYg4bHCJWw0ULrLByg,2143
16
- arekit/common/data/input/terms_mapper.py,sha256=DUOMbGwiQETY7qhztoU8uU30d1cQPsIsgNLldpjcufg,3197
16
+ arekit/common/data/input/terms_mapper.py,sha256=pOD8lGsdM-23maXr9nlHM1QMJ3hsx_5HGe6X3aQcq6k,3133
17
17
  arekit/common/data/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
18
  arekit/common/data/input/providers/const.py,sha256=GDvPkgP7hllHW3QiueMBQgQyu2CtNFI4JYNNja2Im6Q,187
19
19
  arekit/common/data/input/providers/contents.py,sha256=jT1LJE_5Igw5H2e1jKsWWciHSbPVg649phT177SzhEA,261
@@ -30,43 +30,34 @@ arekit/common/data/input/providers/label/binary.py,sha256=jPD6Jn8DYMrdI3jN8ueoWv
30
30
  arekit/common/data/input/providers/label/multiple.py,sha256=HWbHF_CwwbiLQbYm5dgvnXAm0b6tJOyFYFEUBxuWAqI,492
31
31
  arekit/common/data/input/providers/rows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
32
  arekit/common/data/input/providers/rows/base.py,sha256=syH7ZEW3Agwfb1IR0G7n_Amy3Kkg0EZk2V7kH3r7ADg,2517
33
- arekit/common/data/input/providers/rows/samples.py,sha256=uqLTP8fnz-0wC7ALLlIDUYtXTG4OpnRqp70Fgv_1Iiw,9427
33
+ arekit/common/data/input/providers/rows/samples.py,sha256=iUBmKTnevAyfXDb4d6_Wntfw59wWASqSteXOhD5ez64,9334
34
34
  arekit/common/data/input/providers/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- arekit/common/data/input/providers/sample/cropped.py,sha256=jJSos4Si-qy-wb-QmomXxxgURR1UhJnvY0tZoowlfVc,1885
35
+ arekit/common/data/input/providers/sample/cropped.py,sha256=RSoDIoqIodANBW7zmj91ltgw4eYGISCWfl6zLuQXwFM,1831
36
36
  arekit/common/data/input/providers/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  arekit/common/data/input/providers/text/single.py,sha256=vm3sShIYZcmses-hmZX9cOfveWXCYGwvKLgQ0qs3VXQ,1604
38
- arekit/common/data/input/repositories/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
- arekit/common/data/input/repositories/base.py,sha256=4DmLVORc85gu6bxtXVZgxi176NxnIaqHz2tVebMyGZ8,2557
40
- arekit/common/data/input/repositories/sample.py,sha256=LAdpaA1N_nq1iInLwkWQVvL6HGH64JYWSJ9tywU0llY,784
41
38
  arekit/common/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- arekit/common/data/storages/base.py,sha256=L9OLpVOZwlAXZION0YP1T6ZN1t_dfQpnAPAU4ztSs48,2956
43
- arekit/common/data/views/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- arekit/common/data/views/samples.py,sha256=LDqUDqArGt90ujRB4kDFgDHLmR2_AQoUnzhxpXYWYaM,882
39
+ arekit/common/data/storages/base.py,sha256=xMMfHhG68ZraERLbipCN_OhqpLBSDq_S56qAtxGsU7Y,2595
45
40
  arekit/common/docs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
41
  arekit/common/docs/base.py,sha256=uXUOtpR9BEsDBfDHg4eLqOjfSVOV_o9VPii3nSxLZuY,734
47
- arekit/common/docs/entities_grouping.py,sha256=_r254fNr0j6BjHuLZBLjj21yWm4_k__5aOcBXcAaQUQ,704
42
+ arekit/common/docs/entities_grouping.py,sha256=9Xr5NsrWD9_jjKLFE7HOqjkOibzjz840ef04CekkXNU,765
48
43
  arekit/common/docs/entity.py,sha256=TxrZMdIEgjk-PgCyskCkVis2KAw_M7vTBp3ppP6G05M,662
49
- arekit/common/docs/parser.py,sha256=514lQNrZiwU_mxgyuWBkDhqjS5SVAvcIHx9GQUTuVG8,2883
44
+ arekit/common/docs/parser.py,sha256=dzWjpbbYt-C9UU9sSy_Holnm0kQxJqtz1_6va6kS_L4,1780
50
45
  arekit/common/docs/sentence.py,sha256=nZCCFj2yk71POoXCBfEMN3pteM2qQdj60eEzxMVY_3k,302
51
46
  arekit/common/docs/parsed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
- arekit/common/docs/parsed/base.py,sha256=WPstqOpBuLKjtz6UO_bI0DpOPF3Sm0wYEVwjtldbPXE,3175
53
- arekit/common/docs/parsed/service.py,sha256=fSzwtRcSvmvlW8LyK6XPf7wJAx66GWlbRgH_3oQf-BU,1029
47
+ arekit/common/docs/parsed/base.py,sha256=e43kQyxeO-eaPKr3-5SyZ4N33QIDDePTE_CGmEliO7c,3168
48
+ arekit/common/docs/parsed/service.py,sha256=k_4k9EQ7iFq97bvAZHz6dtxCltiJQMd3Suv5W_t7MBE,1076
54
49
  arekit/common/docs/parsed/term_position.py,sha256=H9eQQeanLxwP6og30TQUnpcXymGEPwXClRpaE8VnpLs,1040
55
50
  arekit/common/docs/parsed/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- arekit/common/docs/parsed/providers/base.py,sha256=IjnG7c6Q78cYYAPTrwuZCOiMQDfMaujDQ6U0gK7JCcw,2587
51
+ arekit/common/docs/parsed/providers/base.py,sha256=9MPqxC8mTD4naXH_AoOH0bIPNR7wR9GkOL-Nm2D6Kdo,2543
57
52
  arekit/common/docs/parsed/providers/base_pairs.py,sha256=RDYjspkENPQU2pn7Jp5mFrL9566eVWgXMEzWBQlMdRo,2195
58
- arekit/common/docs/parsed/providers/entity_service.py,sha256=oaBfferpkDXfAFL17vpecSZUsV1Pjvq6lqgHDHsIEZY,6657
59
- arekit/common/docs/parsed/providers/opinion_pairs.py,sha256=ibeFmvpMBBARtqQ3EKEocIOulgzavv0DeYxePGQK5-U,633
60
- arekit/common/docs/parsed/providers/text_opinion_pairs.py,sha256=BC4uVgFxy3oZTkCq9VgOlqoqhODia2Z3anoGyGoy0ao,3139
53
+ arekit/common/docs/parsed/providers/entity_service.py,sha256=An_urYXU4r1PKIUNfhlGCjK6UNLwr3EkebkiaodBsRg,5895
54
+ arekit/common/docs/parsed/providers/opinion_pairs.py,sha256=dSd698VSbVefT0VbuQehaErquFixBfs42OAdX3BJH5M,693
55
+ arekit/common/docs/parsed/providers/text_opinion_pairs.py,sha256=MK1-m2_LJgjeis6AvY1hwT2N8rqHRCpIp7oWqXzgk9I,3215
61
56
  arekit/common/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
57
  arekit/common/entities/base.py,sha256=kpJFo4pCRVBQX6T8PibLKspp9UwoIrkHDoFMTM9KkUs,1646
63
58
  arekit/common/entities/collection.py,sha256=ySSriMYP6zzdto1mC0V9VPXmkAqyJN3mmGoqoNValGI,1931
64
59
  arekit/common/entities/str_fmt.py,sha256=gAPeS8RXdhh8Px_u5eOAPbtLREiiyMueid0lQoa4EbQ,250
65
60
  arekit/common/entities/types.py,sha256=pxFB0gsevdsmnduN_Ffk7_P2TRiMt6NAHyrutuKOFvs,145
66
- arekit/common/experiment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
67
- arekit/common/experiment/data_type.py,sha256=DezUkfwLTf6XLYheqPiaWyx3ZwcldsJ8wDV8aNgJtDk,227
68
- arekit/common/experiment/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
- arekit/common/experiment/api/base_samples_io.py,sha256=SN8CnbEYaazE3SldvnENfjoNRHsTejtrg4jJfqfZLMs,516
70
61
  arekit/common/frames/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
62
  arekit/common/frames/text_variant.py,sha256=TlWR4jnuF7HW9BMHhOTKkr768V_Ub0wd0E5A4YTwD0c,875
72
63
  arekit/common/frames/connotations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -94,7 +85,7 @@ arekit/common/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
94
85
  arekit/common/model/labeling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
95
86
  arekit/common/model/labeling/base.py,sha256=uj7_igCWEU23OjnzabNy0LyxoZ6S_qSfCA-ZaoL1erA,727
96
87
  arekit/common/model/labeling/modes.py,sha256=DiwC6Aomke-ojwwpR2pcd4qgQSwmRdGCvQlyHHhN3YY,127
97
- arekit/common/model/labeling/single.py,sha256=Eggi0obocjiT9ofv_U0zLiFoEIeUQhaMCqjCWn14Fh8,773
88
+ arekit/common/model/labeling/single.py,sha256=HJMFffbxfmV6dKK8t-MKjD-bOx_wuWUs35zmcSWcUL0,878
98
89
  arekit/common/opinions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
99
90
  arekit/common/opinions/base.py,sha256=eIx1RzsngCkpnF2Utju5i_Qp7gqF_rDIe_UDeMGXtmo,2112
100
91
  arekit/common/opinions/collection.py,sha256=bdx-CIYYdE-DrjyB1mRTGtkLb-lrGPTSLl25xv5EHnM,4938
@@ -106,23 +97,21 @@ arekit/common/opinions/annot/algo_based.py,sha256=cvDGDmUoUaQ1Xcbyouxrjs0CkHRfRo
106
97
  arekit/common/opinions/annot/base.py,sha256=IvwrwT8O3s6b2_R0arpMR4Uog7kuWQZUAyRP5cq_27A,382
107
98
  arekit/common/opinions/annot/algo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
108
99
  arekit/common/opinions/annot/algo/base.py,sha256=ymll-4-SplCY7CLswjOZEC1vsVHIEzUP0JMYgvL8hbo,124
109
- arekit/common/opinions/annot/algo/pair_based.py,sha256=HbYn1mAsn5g11NiC9pfrMqNtJn_GzvqPFGpafMqqB2o,4419
100
+ arekit/common/opinions/annot/algo/pair_based.py,sha256=0m0l-KEDvtARDEnl8Sr_MeEJp3yT1re_VsNAO2ZQQUM,4762
110
101
  arekit/common/opinions/annot/algo/predefined.py,sha256=zU39SADPKnykHCNB-Bmn_0bvd6gYWWYmfgfi-68hHSs,741
111
102
  arekit/common/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
112
- arekit/common/pipeline/base.py,sha256=8TgWNy5QrnKEp1bq3lhyGSgIfYe5ZIZU3c-DYBJ9LPA,957
113
- arekit/common/pipeline/batching.py,sha256=DdOvOladOo2aEv3JZ8NQnCvsNGcWk4TFzENrZqTGyXk,1239
103
+ arekit/common/pipeline/base.py,sha256=RHpZs4OT2t9wGTMUxtpBM7q-zCrNQbf3-BFDy9Bcz4M,839
104
+ arekit/common/pipeline/batching.py,sha256=zm1SLSJz8T9gXrBdiztzS2f7VSWb4uFcYkzEu5TIfrE,1119
114
105
  arekit/common/pipeline/context.py,sha256=Fw25lBVakHNAXjtkdEqopR-Jh59cDKGWD2jCJxBrj7Y,1126
115
106
  arekit/common/pipeline/conts.py,sha256=NAQNsHt1kK3HnxWv3M6yXi0c7C6Mx6ZZ6KZc0yE0eas,70
116
107
  arekit/common/pipeline/utils.py,sha256=5VqH1LtRa4tYUbyiRvWdBmP4biFhTKq9vhr8QiRFFkY,882
117
108
  arekit/common/pipeline/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
118
- arekit/common/pipeline/items/base.py,sha256=dWIZVGJjYuURLCiZj8YQHWtsS725SOi9SPZaCPV7NvI,1694
109
+ arekit/common/pipeline/items/base.py,sha256=15-z8ERQ0QxaRszs7sHQduU0KIBJIm8B0V2nwCva6d0,1695
119
110
  arekit/common/pipeline/items/flatten.py,sha256=9T4jWqPGv4UDxajlM0Nm0-gvwUgqqYB8XH0efTum9a0,542
120
111
  arekit/common/pipeline/items/handle.py,sha256=QS5Byj7-o5jmFi0ag58NE3zm2-JzVIunIgc3Pn1ij6g,578
121
112
  arekit/common/pipeline/items/iter.py,sha256=Tk9WdUMPOq20s7jEWEpU4PmillnVtQ8nIa2ct7iw-3s,406
122
113
  arekit/common/pipeline/items/map.py,sha256=G5wBdjaaxePD0pijrxsfpJACeP7kzj7HerjCkNIhmII,381
123
114
  arekit/common/pipeline/items/map_nested.py,sha256=vs0GdJNr3qSF9p2yd1nWji5E1HGzECbvOfN2MqoHc2A,630
124
- arekit/common/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
125
- arekit/common/service/sqlite.py,sha256=1jLIszkcJGeT0hUos8Y0Chp3o9XRUfljG2P9q0T2_Ds,1440
126
115
  arekit/common/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
127
116
  arekit/common/synonyms/base.py,sha256=YxD-CKCjlEtar1zTdumnfC3vKgbP2wLODR9mMEwbbnA,4237
128
117
  arekit/common/synonyms/grouping.py,sha256=fi7QQbBvsTvvP2CPTesSPEsPNmGfc6euqj-HPhVvtlg,698
@@ -137,123 +126,53 @@ arekit/contrib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
137
126
  arekit/contrib/bert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
138
127
  arekit/contrib/bert/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
139
128
  arekit/contrib/bert/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
140
- arekit/contrib/bert/input/providers/cropped_sample.py,sha256=46uHHhAe8cGxV2JlfO3thog5XV6T2niUIflFghfUSBM,866
129
+ arekit/contrib/bert/input/providers/cropped_sample.py,sha256=WJNAzILJDMYYhGpxg1r1F3f1X71kVV30gDhkgwH59H0,755
141
130
  arekit/contrib/bert/input/providers/text_pair.py,sha256=_1d-he0n42y3ksj8RjJlNHgHnaQUEq0aQhUdTPRMKgg,2817
142
131
  arekit/contrib/bert/terms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
143
- arekit/contrib/bert/terms/mapper.py,sha256=oHX-lsaZYjBFLjngzSKT5z_JPJCHbclUsEe4i4fup_8,992
144
- arekit/contrib/networks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
145
- arekit/contrib/networks/embedding.py,sha256=lrLdB6CdmnmzwavAL6MZuLHceNM3PsZZiWLZ4BjGeXc,3845
146
- arekit/contrib/networks/embedding_io.py,sha256=hV1MBr9wu9-10gQgnAzLuC-l897aB-8KNcw4h69B5VM,460
147
- arekit/contrib/networks/vectorizer.py,sha256=KKV_f0GZD10ZpeYgqZfvMapJtsKa3NBddR6W_GdYqrM,155
148
- arekit/contrib/networks/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
- arekit/contrib/networks/input/const.py,sha256=nPeuO-G6MILNlIkGc5HzSDj_RmTwLflReF7n5htFAUI,176
150
- arekit/contrib/networks/input/ctx_serialization.py,sha256=eCOw4xjp8A7Z2WFanshooS3MqSy7dbZ8ywf_DA2LZO8,982
151
- arekit/contrib/networks/input/rows_parser.py,sha256=6_43LbAelveY9yEWMU5BdvQlpWwm4RDOjUEmqHuPYdE,1807
152
- arekit/contrib/networks/input/term_types.py,sha256=P8E5LKegZE5ZEh4vNtC55Lu8USbQt8_Eo14op_anmvU,348
153
- arekit/contrib/networks/input/terms_mapping.py,sha256=NAnuTAbj7tBTe1Ga4js2IfnUdAWlTV9fcgSQEgYqQUQ,2129
154
- arekit/contrib/networks/input/embedding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
- arekit/contrib/networks/input/embedding/matrix.py,sha256=BFn7eXhiqXY7c4tUfy1fzemIqRnZYx_GiEv873QnIEs,952
156
- arekit/contrib/networks/input/embedding/offsets.py,sha256=HrBfbFD03o_Y0ZvEGTd-FRxmPx55_5vqItTranMFy88,1313
157
- arekit/contrib/networks/input/formatters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
- arekit/contrib/networks/input/formatters/pos_mapper.py,sha256=yftPKYU7noVb_q0KAflHf7bqjuUXt5siIgbnwMEoWrw,773
159
- arekit/contrib/networks/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
160
- arekit/contrib/networks/input/providers/sample.py,sha256=MHtXhhBD-kM0yzTACTbY14KMPIuhiLgUKEYXfhvumfo,5445
161
- arekit/contrib/networks/input/providers/term_connotation.py,sha256=Q90pVN4hQgYAk3oBSCPYc6_1xQUQE1b6ksiU_k8frcM,1157
162
- arekit/contrib/networks/input/providers/text.py,sha256=kucezKm6Ilmy5wuM2jUP5xk9zh1K1Pf8KcMd1prrp8k,917
132
+ arekit/contrib/bert/terms/mapper.py,sha256=YMY1JasNc___83ihiV1KqzwGyC3qs3ZNN90NmHqBEZ0,976
163
133
  arekit/contrib/prompt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
- arekit/contrib/prompt/sample.py,sha256=MxpbDR0ww7WmdtuPu74B8R6QKVXeuzO0CKGOJIYwbRk,3164
134
+ arekit/contrib/prompt/sample.py,sha256=iDwe65pUBIrk0Hjh8v7o1XesRPxCVsJojw-dcASPmWc,2867
165
135
  arekit/contrib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
- arekit/contrib/utils/serializer.py,sha256=D9LJ2ZXeVx3YntV-HqEnt32xW-s4GauwD97XRVlqr0g,1626
167
136
  arekit/contrib/utils/bert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
168
- arekit/contrib/utils/bert/samplers.py,sha256=ZVe3rbUAH0Jw1xR_yHE1DoUJf3CI0pDgbBQQzlLWevc,989
137
+ arekit/contrib/utils/bert/samplers.py,sha256=vleluRLRFzDkGRZ_ReeHsY8IJAS-TxJgoTTro4mYrs4,1102
169
138
  arekit/contrib/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
170
139
  arekit/contrib/utils/data/contents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
171
140
  arekit/contrib/utils/data/contents/opinions.py,sha256=MSV7NytEe15adKhhHCq5KiCj6ZBq31nV-u2rcSfFCgE,1738
172
141
  arekit/contrib/utils/data/doc_provider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
173
142
  arekit/contrib/utils/data/doc_provider/dict_based.py,sha256=zUOiiIbj5zby4xqMb0m9N-a6enavJJ7wFmPaGErykWU,371
174
143
  arekit/contrib/utils/data/doc_provider/dir_based.py,sha256=FTw3kLV_CYtPoUoHl39IrP6RjLvTecCno9May95jVXw,1916
175
- arekit/contrib/utils/data/readers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
176
- arekit/contrib/utils/data/readers/base.py,sha256=zAsZLX5ng0_gb_ysL6wQchptmBHlNgqgQilw295Y5Aw,153
177
- arekit/contrib/utils/data/readers/csv_pd.py,sha256=Ym49j04Z-_WQN-7xJMiiN1y2TIMnMDtPxy5h0mT3WBQ,1383
178
- arekit/contrib/utils/data/readers/jsonl.py,sha256=c2bHwnTfNEwb1c8B9fRwaQyeze5x3nOd2UXXAp4MbxQ,426
179
- arekit/contrib/utils/data/readers/sqlite.py,sha256=U1138XNCIwqycNivxwzwIUnowj3jDkP4M6J_Kvyedbc,416
180
- arekit/contrib/utils/data/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
181
- arekit/contrib/utils/data/service/balance.py,sha256=PgA5B6qSPmt8ITPLsQuCkniE8-u2NO_eQ2m-U9Akh98,1547
182
144
  arekit/contrib/utils/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
183
- arekit/contrib/utils/data/storages/jsonl_based.py,sha256=Oj5u8aW_UtVDSDxMpIQsgMlZlV-KBD0qVHPVVT3m8nA,450
184
- arekit/contrib/utils/data/storages/pandas_based.py,sha256=m8z34tO_7NupYd_zQ4L1miTXJQkmMMB90zPFqEeYCNs,4301
185
- arekit/contrib/utils/data/storages/row_cache.py,sha256=V1InYIqRf5WMWV_JndHNH9JzAjFS3ZL38f4_pDPLo_8,1985
186
- arekit/contrib/utils/data/storages/sqlite_based.py,sha256=ARwVisVbPKBap_mVdpvTpp28iXgJbCJ3dAj41UYu03Q,609
145
+ arekit/contrib/utils/data/storages/jsonl_based.py,sha256=dz8uizu9t1C215o0HEL8y4LiDKR4aC_-OwDu_xF0xIM,522
146
+ arekit/contrib/utils/data/storages/row_cache.py,sha256=MRK0uJFvw6O99k2aFb3JLZhLUBo2JUO-WYQ4EeRRu6M,2051
147
+ arekit/contrib/utils/data/storages/sqlite_based.py,sha256=cIYAHyiB4CMftKgrgLqw-L4F1WnhbspjwWLSPqH5NHk,682
187
148
  arekit/contrib/utils/data/writers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
188
149
  arekit/contrib/utils/data/writers/base.py,sha256=JLwf5WVl_U319sdMev8YOn4OoCcrgNIUZtrOuG1JLjI,766
189
- arekit/contrib/utils/data/writers/csv_native.py,sha256=7fPxYeu9YDK8Cvjp1n-sbKT63ZuhDIEv3VwghHuKk5k,2252
190
- arekit/contrib/utils/data/writers/csv_pd.py,sha256=WhBjDJCHUBy_TabngMF42Qicx0ye8xIus0m6c7qotto,1330
191
- arekit/contrib/utils/data/writers/json_opennre.py,sha256=EkhXmONgtMe7A9VKrs9ElFHc8RoMumjFbkKfwuOVOoU,5067
192
- arekit/contrib/utils/data/writers/sqlite_native.py,sha256=MnbLU8iPvYvpYgEbOXhBKH_G8DJs0W9iSuhr_TPKBAQ,4601
193
- arekit/contrib/utils/embeddings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
194
- arekit/contrib/utils/embeddings/rusvectores.py,sha256=WA0HejE2U5kgeBvh4_vty2QzoAkFXiMk94BK8FHxoxw,1931
195
- arekit/contrib/utils/embeddings/tokens.py,sha256=z3lJ30JTX9zvZtPgzRl3yANECmuA1qboMDTcJsr_4E4,872
196
150
  arekit/contrib/utils/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
197
151
  arekit/contrib/utils/entities/filter.py,sha256=aHTExIMFaMdy4QL8iYE23eiby3qLImAakXR6gNqG6fs,145
198
152
  arekit/contrib/utils/entities/formatters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
- arekit/contrib/utils/entities/formatters/str_display.py,sha256=N8igv7EVaTFayvLXkyBGtm67KwHaeP-M-L8d7oqBG9Q,401
200
153
  arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py,sha256=rEUIma9O3kOBWIguGtJ69JH-00Dhm0vUBOd5yNcKweY,653
201
154
  arekit/contrib/utils/io_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
202
- arekit/contrib/utils/io_utils/embedding.py,sha256=cBDRv_1LROJ262QaL3QVfGt2W9EvBfbh83oL41PJn60,2543
203
155
  arekit/contrib/utils/io_utils/utils.py,sha256=310SIJTsNLn2OZrGPer9W4ZP52PHkjBK3zsyqxVs3h0,537
204
- arekit/contrib/utils/np_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
205
- arekit/contrib/utils/np_utils/embedding.py,sha256=G7Ls_ClzbskLLy-opRcVzQlfUfhdwbqoXgk0zoGrmHM,798
206
- arekit/contrib/utils/np_utils/npz_utils.py,sha256=XoUHNmOlcr2X674R1xKGUJitEpFCIBJ8DOpNEPhtJFk,234
207
- arekit/contrib/utils/np_utils/vocab.py,sha256=FsS18chMLU4WfMeGwBbvmfB5Qmoj5tZTOo-4zqWPm3Q,580
208
156
  arekit/contrib/utils/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
209
157
  arekit/contrib/utils/pipelines/opinion_collections.py,sha256=y9-klVJGCN9mPd7t1ECllAiCnAb3MKVXC1PnYddp5sQ,3195
210
158
  arekit/contrib/utils/pipelines/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
211
- arekit/contrib/utils/pipelines/items/sampling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
212
- arekit/contrib/utils/pipelines/items/sampling/base.py,sha256=-H-r5GIi9ee7CxxpJs8KnHC91l7Y1dYaWPR_OK17E8g,4245
213
- arekit/contrib/utils/pipelines/items/sampling/networks.py,sha256=E0EjQ4KRd3oYLFVbie05XJa00JqR26eLRoMrDnuQySQ,2653
214
159
  arekit/contrib/utils/pipelines/items/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
215
- arekit/contrib/utils/pipelines/items/text/entities_default.py,sha256=vNx5ir2mf7a1gg_OeqUsf_p1Fu2k7QIFxVpe-CuwZ84,727
216
160
  arekit/contrib/utils/pipelines/items/text/frames.py,sha256=pZQybYfgEQB1DM3PtmsgrtB2Xl0HejmP4rhT0nR_YKE,2586
217
- arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py,sha256=4rIAAB-_GeWNbu5KyaDm5qttH4o2Bzpdvy-D9YR5bRk,1776
218
- arekit/contrib/utils/pipelines/items/text/frames_negation.py,sha256=AdoY7lqSAT0RApp0DbqeI7xxyRVF6NPJLAfR59lsIec,1303
219
- arekit/contrib/utils/pipelines/items/text/tokenizer.py,sha256=FmV5flziDLCNttxrUzRr-FGCcKK6venZEcZ-KwcqwNE,3147
220
- arekit/contrib/utils/pipelines/items/text/translator.py,sha256=TkXVyZYRbS8P4S2Pnn2GzQMRa-9ba-nS4_zXvsf16vU,5365
221
161
  arekit/contrib/utils/pipelines/text_opinion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
222
- arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=QoK0-dfMl27uOOfUhvnbvzYX23jCpZbm97Qs27Na7VA,4133
162
+ arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=kKBQTvZxYYf9tBYmUv3Ipj9OOYKmHnYG0y5Gyjt27yA,4587
223
163
  arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
224
- arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py,sha256=bwS-UR2x3rgp_xqnf6z-73T-eIZE_kltRSGYxgd_WpU,1751
164
+ arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py,sha256=69xmuxqVmsYxBYpV2gYF7j3Z5iPk0ndjnOZe2Yy5WDA,1911
225
165
  arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
226
166
  arekit/contrib/utils/pipelines/text_opinion/filters/base.py,sha256=GnKnJB4MKqiMSJny3a9Na7l7Csm7abbt6GADBCY18Mw,143
227
167
  arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py,sha256=3Pjq4IJJMT7dYpK266lN66WQJUnQO3P0rG6wcAvJOOA,649
228
168
  arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py,sha256=pdWFJaKh4kKIsUuBNp3WNy5Rj80CjWEy2wp-0axFnrI,1254
229
169
  arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py,sha256=4AFS5zhocJuYphGO2ZMWmYTtIhGItKDTkB0--AmjgnA,1151
230
- arekit/contrib/utils/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
231
- arekit/contrib/utils/processing/languages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
232
- arekit/contrib/utils/processing/languages/mods.py,sha256=OERKcglI4pJEIQxlWMYuYg_uHnNWVpP-mqhnFsQbY7A,263
233
- arekit/contrib/utils/processing/languages/pos.py,sha256=etC3ueLGgZorgKEc3TWpeIuv46vs392xPi1lM31Cg0s,278
234
- arekit/contrib/utils/processing/languages/ru/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
235
- arekit/contrib/utils/processing/languages/ru/cases.py,sha256=27sIQsU5_0aT4EVuPtKCK-tfi1Q0TH11phV1x5hIzLs,1492
236
- arekit/contrib/utils/processing/languages/ru/constants.py,sha256=f4z7ivILKqYju9rkagi9_FIvPm1FnWHbXgxigyb3zm4,147
237
- arekit/contrib/utils/processing/languages/ru/mods.py,sha256=j4xKgRbCC834i9n-RyU607v9Qph9sP_B31WLrKFByRk,343
238
- arekit/contrib/utils/processing/languages/ru/number.py,sha256=kHyP0Lp_iHVDwkbN7tkZUJpGFQ40QRm-j_1g0dFU-sM,401
239
- arekit/contrib/utils/processing/languages/ru/pos_service.py,sha256=BWHLPybjmTVNXjJM2QmrZlEDcl7nZY7keLmXZcG_PFM,1125
240
- arekit/contrib/utils/processing/lemmatization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
241
- arekit/contrib/utils/processing/lemmatization/mystem.py,sha256=_FRqEGWUlgAbhSJ-dsyoFg_qbbUxePDSAOWWuveRqCo,1340
242
- arekit/contrib/utils/processing/pos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
243
- arekit/contrib/utils/processing/pos/base.py,sha256=CrMr3u6lRs2NoV7uch5HZgV71A-0M-pwJfwXjfudHBY,259
244
- arekit/contrib/utils/processing/pos/mystem_wrap.py,sha256=C9AnRIAZL4e8DMNte9LDuvxS-cbEQpo2AYdQtP9uIJ4,4336
245
- arekit/contrib/utils/processing/pos/russian.py,sha256=POCo6xKmK7vAEq-kWlODg611kLOtOj37OVc3L_GWL-8,229
246
- arekit/contrib/utils/processing/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
247
- arekit/contrib/utils/processing/text/tokens.py,sha256=_3u5Oy1MG_QfHH8wi0x0nA588qSaCp3Wmnp2SzMWjXY,3573
248
170
  arekit/contrib/utils/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
249
171
  arekit/contrib/utils/synonyms/simple.py,sha256=ST9EwuWP88FzbyV8Gi0-biTPgGOsZ7OWyaBWHL_U_eo,557
250
172
  arekit/contrib/utils/synonyms/stemmer_based.py,sha256=q19P_XOCWN2_JrBtybAt7ToMIr1ambw4ahr0fSEEHmQ,1400
251
- arekit/contrib/utils/vectorizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
252
- arekit/contrib/utils/vectorizers/bpe.py,sha256=bFS5MZytvU1L21YS5aAeb3FZl7RMjyog4lWwysvKD-8,3047
253
- arekit/contrib/utils/vectorizers/random_norm.py,sha256=TL86Kz6p59lJqoLg8RwQRTvfhr0e-tiULGHhO4vhBbo,1339
254
- arekit-0.25.0.data/data/logo.png,sha256=S8OZ4MGGD72Pf5co7ngYbXKkJH1EUhbErUXv1ZjUWiU,45718
255
- arekit-0.25.0.dist-info/LICENSE,sha256=JO9tIbxAvhwDv73cX-gUStr9yA-TY7wusUeLHRx7JuY,1076
256
- arekit-0.25.0.dist-info/METADATA,sha256=4DSUy6aTidHG9jFR7jMwQe3uJGER-e8E9vU0q2G20Uo,3145
257
- arekit-0.25.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
258
- arekit-0.25.0.dist-info/top_level.txt,sha256=4pXuFE8IE0lBsqi6ZsR7figx0H939VIX4_-76YIbkOQ,7
259
- arekit-0.25.0.dist-info/RECORD,,
173
+ arekit-0.25.2.data/data/logo.png,sha256=S8OZ4MGGD72Pf5co7ngYbXKkJH1EUhbErUXv1ZjUWiU,45718
174
+ arekit-0.25.2.dist-info/LICENSE,sha256=JO9tIbxAvhwDv73cX-gUStr9yA-TY7wusUeLHRx7JuY,1076
175
+ arekit-0.25.2.dist-info/METADATA,sha256=CsXviPZIM44LGhiyBRH-MK0DGOP7UAc4GHbvSaLcwxw,3252
176
+ arekit-0.25.2.dist-info/WHEEL,sha256=g4nMs7d-Xl9-xC9XovUrsDHGXt-FT0E17Yqo92DEfvY,92
177
+ arekit-0.25.2.dist-info/top_level.txt,sha256=4pXuFE8IE0lBsqi6ZsR7figx0H939VIX4_-76YIbkOQ,7
178
+ arekit-0.25.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.44.0)
2
+ Generator: bdist_wheel (0.34.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
File without changes
@@ -1,68 +0,0 @@
1
- from arekit.common.data.input.providers.columns.base import BaseColumnsProvider
2
- from arekit.common.data.input.providers.contents import ContentsProvider
3
- from arekit.common.data.input.providers.rows.base import BaseRowProvider
4
- from arekit.common.data.storages.base import BaseRowsStorage
5
- from arekit.contrib.utils.data.storages.row_cache import RowCacheStorage
6
- from arekit.contrib.utils.data.writers.base import BaseWriter
7
-
8
-
9
- class BaseInputRepository(object):
10
-
11
- def __init__(self, columns_provider, rows_provider, storage):
12
- assert(isinstance(columns_provider, BaseColumnsProvider))
13
- assert(isinstance(rows_provider, BaseRowProvider))
14
- assert(isinstance(storage, BaseRowsStorage))
15
-
16
- self._columns_provider = columns_provider
17
- self._rows_provider = rows_provider
18
- self._storage = storage
19
-
20
- # Do setup operations.
21
- self._setup_columns_provider()
22
- self._setup_rows_provider()
23
-
24
- # region protected methods
25
-
26
- def _setup_columns_provider(self):
27
- pass
28
-
29
- def _setup_rows_provider(self):
30
- pass
31
-
32
- # endregion
33
-
34
- def populate(self, contents_provider, doc_ids, desc="", writer=None, target=None):
35
- assert(isinstance(contents_provider, ContentsProvider))
36
- assert(isinstance(self._storage, BaseRowsStorage))
37
- assert(isinstance(doc_ids, list))
38
- assert(isinstance(writer, BaseWriter) or writer is None)
39
- assert(isinstance(target, str) or target is None)
40
-
41
- def iter_rows(idle_mode):
42
- return self._rows_provider.iter_by_rows(
43
- contents_provider=contents_provider,
44
- doc_ids_iter=doc_ids,
45
- idle_mode=idle_mode)
46
-
47
- self._storage.init_empty(columns_provider=self._columns_provider)
48
-
49
- is_async_write_mode_on = writer is not None and target is not None
50
-
51
- if is_async_write_mode_on:
52
- writer.open_target(target)
53
-
54
- self._storage.fill(lambda idle_mode: iter_rows(idle_mode),
55
- columns_provider=self._columns_provider,
56
- row_handler=lambda: writer.commit_line(self._storage) if is_async_write_mode_on else None,
57
- desc=desc)
58
-
59
- if is_async_write_mode_on:
60
- writer.close_target()
61
-
62
- def push(self, writer, target, free_storage=True):
63
- if not isinstance(self._storage, RowCacheStorage):
64
- writer.write_all(self._storage, target)
65
-
66
- # After writing we free the contents of the storage.
67
- if free_storage:
68
- self._storage.free()