arekit 0.25.0__py3-none-any.whl → 0.25.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. arekit/common/data/storages/base.py +4 -15
  2. arekit/common/docs/parser.py +3 -30
  3. arekit/common/pipeline/items/base.py +1 -1
  4. arekit/common/utils.py +11 -8
  5. arekit/contrib/utils/data/storages/jsonl_based.py +2 -1
  6. arekit/contrib/utils/data/storages/pandas_based.py +2 -17
  7. arekit/contrib/utils/data/storages/row_cache.py +2 -1
  8. arekit/contrib/utils/data/storages/sqlite_based.py +2 -1
  9. arekit/contrib/utils/pipelines/text_opinion/extraction.py +5 -4
  10. {arekit-0.25.0.dist-info → arekit-0.25.1.dist-info}/METADATA +4 -5
  11. {arekit-0.25.0.dist-info → arekit-0.25.1.dist-info}/RECORD +15 -88
  12. arekit/common/data/input/repositories/__init__.py +0 -0
  13. arekit/common/data/input/repositories/base.py +0 -68
  14. arekit/common/data/input/repositories/sample.py +0 -22
  15. arekit/common/data/views/__init__.py +0 -0
  16. arekit/common/data/views/samples.py +0 -26
  17. arekit/common/service/__init__.py +0 -0
  18. arekit/common/service/sqlite.py +0 -36
  19. arekit/contrib/networks/__init__.py +0 -0
  20. arekit/contrib/networks/embedding.py +0 -149
  21. arekit/contrib/networks/embedding_io.py +0 -18
  22. arekit/contrib/networks/input/__init__.py +0 -0
  23. arekit/contrib/networks/input/const.py +0 -6
  24. arekit/contrib/networks/input/ctx_serialization.py +0 -28
  25. arekit/contrib/networks/input/embedding/__init__.py +0 -0
  26. arekit/contrib/networks/input/embedding/matrix.py +0 -29
  27. arekit/contrib/networks/input/embedding/offsets.py +0 -55
  28. arekit/contrib/networks/input/formatters/__init__.py +0 -0
  29. arekit/contrib/networks/input/formatters/pos_mapper.py +0 -22
  30. arekit/contrib/networks/input/providers/__init__.py +0 -0
  31. arekit/contrib/networks/input/providers/sample.py +0 -129
  32. arekit/contrib/networks/input/providers/term_connotation.py +0 -23
  33. arekit/contrib/networks/input/providers/text.py +0 -24
  34. arekit/contrib/networks/input/rows_parser.py +0 -47
  35. arekit/contrib/networks/input/term_types.py +0 -13
  36. arekit/contrib/networks/input/terms_mapping.py +0 -60
  37. arekit/contrib/networks/vectorizer.py +0 -6
  38. arekit/contrib/utils/data/readers/__init__.py +0 -0
  39. arekit/contrib/utils/data/readers/base.py +0 -7
  40. arekit/contrib/utils/data/readers/csv_pd.py +0 -38
  41. arekit/contrib/utils/data/readers/jsonl.py +0 -15
  42. arekit/contrib/utils/data/readers/sqlite.py +0 -14
  43. arekit/contrib/utils/data/service/__init__.py +0 -0
  44. arekit/contrib/utils/data/service/balance.py +0 -50
  45. arekit/contrib/utils/data/writers/csv_native.py +0 -63
  46. arekit/contrib/utils/data/writers/csv_pd.py +0 -40
  47. arekit/contrib/utils/data/writers/json_opennre.py +0 -132
  48. arekit/contrib/utils/data/writers/sqlite_native.py +0 -114
  49. arekit/contrib/utils/embeddings/__init__.py +0 -0
  50. arekit/contrib/utils/embeddings/rusvectores.py +0 -58
  51. arekit/contrib/utils/embeddings/tokens.py +0 -30
  52. arekit/contrib/utils/io_utils/embedding.py +0 -72
  53. arekit/contrib/utils/np_utils/__init__.py +0 -0
  54. arekit/contrib/utils/np_utils/embedding.py +0 -22
  55. arekit/contrib/utils/np_utils/npz_utils.py +0 -13
  56. arekit/contrib/utils/np_utils/vocab.py +0 -20
  57. arekit/contrib/utils/pipelines/items/sampling/__init__.py +0 -0
  58. arekit/contrib/utils/pipelines/items/sampling/base.py +0 -94
  59. arekit/contrib/utils/pipelines/items/sampling/networks.py +0 -55
  60. arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +0 -36
  61. arekit/contrib/utils/pipelines/items/text/frames_negation.py +0 -33
  62. arekit/contrib/utils/pipelines/items/text/tokenizer.py +0 -105
  63. arekit/contrib/utils/pipelines/items/text/translator.py +0 -136
  64. arekit/contrib/utils/processing/languages/__init__.py +0 -0
  65. arekit/contrib/utils/processing/languages/mods.py +0 -12
  66. arekit/contrib/utils/processing/languages/pos.py +0 -23
  67. arekit/contrib/utils/processing/languages/ru/__init__.py +0 -0
  68. arekit/contrib/utils/processing/languages/ru/cases.py +0 -78
  69. arekit/contrib/utils/processing/languages/ru/constants.py +0 -6
  70. arekit/contrib/utils/processing/languages/ru/mods.py +0 -13
  71. arekit/contrib/utils/processing/languages/ru/number.py +0 -23
  72. arekit/contrib/utils/processing/languages/ru/pos_service.py +0 -36
  73. arekit/contrib/utils/processing/lemmatization/__init__.py +0 -0
  74. arekit/contrib/utils/processing/lemmatization/mystem.py +0 -51
  75. arekit/contrib/utils/processing/pos/__init__.py +0 -0
  76. arekit/contrib/utils/processing/pos/base.py +0 -12
  77. arekit/contrib/utils/processing/pos/mystem_wrap.py +0 -134
  78. arekit/contrib/utils/processing/pos/russian.py +0 -10
  79. arekit/contrib/utils/processing/text/__init__.py +0 -0
  80. arekit/contrib/utils/processing/text/tokens.py +0 -127
  81. arekit/contrib/utils/serializer.py +0 -42
  82. arekit/contrib/utils/vectorizers/__init__.py +0 -0
  83. arekit/contrib/utils/vectorizers/bpe.py +0 -93
  84. arekit/contrib/utils/vectorizers/random_norm.py +0 -39
  85. {arekit-0.25.0.data → arekit-0.25.1.data}/data/logo.png +0 -0
  86. {arekit-0.25.0.dist-info → arekit-0.25.1.dist-info}/LICENSE +0 -0
  87. {arekit-0.25.0.dist-info → arekit-0.25.1.dist-info}/WHEEL +0 -0
  88. {arekit-0.25.0.dist-info → arekit-0.25.1.dist-info}/top_level.txt +0 -0
@@ -10,6 +10,9 @@ logger = logging.getLogger(__name__)
10
10
 
11
11
  class BaseRowsStorage(object):
12
12
 
13
+ def __init__(self, log_out=None):
14
+ self.__log_out = log_out
15
+
13
16
  # region protected methods
14
17
 
15
18
  def _begin_filling_row(self, row_ind):
@@ -31,27 +34,12 @@ class BaseRowsStorage(object):
31
34
  def _get_rows_count(self):
32
35
  raise NotImplemented()
33
36
 
34
- def find_by_value(self, column_name, value):
35
- raise NotImplemented()
36
-
37
- def find_first_by_value(self, column_name, value):
38
- raise NotImplemented()
39
-
40
- def iter_column_values(self, column_name, dtype=None):
41
- raise NotImplemented()
42
-
43
37
  def get_row(self, row_index):
44
38
  raise NotImplemented()
45
39
 
46
- def get_cell(self, row_index, column_name):
47
- raise NotImplemented()
48
-
49
40
  def init_empty(self, columns_provider):
50
41
  raise NotImplemented()
51
42
 
52
- def iter_shuffled(self):
53
- raise NotImplemented()
54
-
55
43
  def iter_column_names(self):
56
44
  raise NotImplemented()
57
45
 
@@ -81,6 +69,7 @@ class BaseRowsStorage(object):
81
69
  condition_func=lambda item: not isinstance(item[1], MetaEmptyLinkedDataWrapper),
82
70
  postfix_func=postfix_func,
83
71
  desc="{fmt}".format(fmt=desc),
72
+ file=self.__log_out,
84
73
  total=rows_count)
85
74
 
86
75
  for row_index, item in enumerate(pbar_it):
@@ -1,42 +1,14 @@
1
- from tqdm import tqdm
2
1
  from arekit.common.docs.base import Document
3
2
  from arekit.common.docs.parsed.base import ParsedDocument
4
- from arekit.common.pipeline.base import BasePipelineLauncher
5
3
  from arekit.common.pipeline.batching import BatchingPipelineLauncher
6
4
  from arekit.common.pipeline.context import PipelineContext
7
5
  from arekit.common.pipeline.utils import BatchIterator
8
6
  from arekit.common.text.parsed import BaseParsedText
7
+ from arekit.common.utils import progress_bar_defined
9
8
 
10
9
 
11
10
  class DocumentParsers(object):
12
11
 
13
- @staticmethod
14
- def parse(doc, pipeline_items, parent_ppl_ctx=None, src_key="input", show_progress=False):
15
- """ This document parser is based on single text parts (sentences)
16
- that passes sequentially through the pipeline of transformations.
17
- """
18
- assert(isinstance(doc, Document))
19
- assert(isinstance(pipeline_items, list))
20
- assert(isinstance(parent_ppl_ctx, PipelineContext) or parent_ppl_ctx is None)
21
-
22
- parsed_sentences = []
23
-
24
- data_it = range(doc.SentencesCount)
25
- progress_it = tqdm(data_it, disable=not show_progress)
26
-
27
- for sent_ind in progress_it:
28
-
29
- # Composing the context from a single sentence.
30
- ctx = PipelineContext({src_key: doc.get_sentence(sent_ind)}, parent_ctx=parent_ppl_ctx)
31
-
32
- # Apply all the operations.
33
- BasePipelineLauncher.run(pipeline=pipeline_items, pipeline_ctx=ctx, src_key=src_key)
34
-
35
- # Collecting the result.
36
- parsed_sentences.append(BaseParsedText(terms=ctx.provide("result")))
37
-
38
- return ParsedDocument(doc_id=doc.ID, parsed_sentences=parsed_sentences)
39
-
40
12
  @staticmethod
41
13
  def parse_batch(doc, pipeline_items, batch_size, parent_ppl_ctx=None, src_key="input", show_progress=False):
42
14
  """ This document parser is based on batch of sentences.
@@ -49,7 +21,8 @@ class DocumentParsers(object):
49
21
  parsed_sentences = []
50
22
 
51
23
  data_it = BatchIterator(data_iter=iter(range(doc.SentencesCount)), batch_size=batch_size)
52
- progress_it = tqdm(data_it, total=round(doc.SentencesCount / batch_size), disable=not show_progress)
24
+ progress_it = progress_bar_defined(data_it, total=round(doc.SentencesCount / batch_size),
25
+ disable=not show_progress)
53
26
 
54
27
  for batch in progress_it:
55
28
 
@@ -2,7 +2,7 @@ from arekit.common.pipeline.context import PipelineContext
2
2
 
3
3
 
4
4
  class BasePipelineItem(object):
5
- """ Single pipeline item that might be instatiated and embedded into pipeline.
5
+ """ Single pipeline item that might be instantiated and embedded into pipeline.
6
6
  """
7
7
 
8
8
  def __init__(self, src_key="result", result_key="result", src_func=None):
arekit/common/utils.py CHANGED
@@ -1,4 +1,3 @@
1
- import sys
2
1
  import os
3
2
  from tqdm import tqdm
4
3
 
@@ -27,14 +26,14 @@ def split_by_whitespaces(text):
27
26
  return text.split()
28
27
 
29
28
 
30
- def progress_bar(iterable, total, desc="", unit="it"):
29
+ def progress_bar(iterable, total, desc="", unit="it", file=None, disable=False):
31
30
  if total is not None:
32
- return progress_bar_defined(iterable=iterable, total=total, desc=desc, unit=unit)
31
+ return progress_bar_defined(iterable=iterable, total=total, desc=desc, unit=unit, file=file, disable=disable)
33
32
  else:
34
- return progress_bar_iter(iterable=iterable, desc=desc, unit=unit)
33
+ return progress_bar_iter(iterable=iterable, desc=desc, unit=unit, file=file, disable=disable)
35
34
 
36
35
 
37
- def progress_bar_conditional(iterable, condition_func, total, postfix_func=None, desc="", unit="it"):
36
+ def progress_bar_conditional(iterable, condition_func, total, postfix_func=None, desc="", unit="it", file=None):
38
37
  """ This progress-bar updates only on the
39
38
  specific conditions during the iteration process.
40
39
  """
@@ -47,7 +46,7 @@ def progress_bar_conditional(iterable, condition_func, total, postfix_func=None,
47
46
  yield 0
48
47
 
49
48
  pbar_it = progress_bar(iterable=__iter_infinite_placeholder(),
50
- desc=desc, unit=unit, total=total)
49
+ desc=desc, unit=unit, total=total, file=file)
51
50
  element = iter(pbar_it)
52
51
 
53
52
  # Initialize with 0.
@@ -65,7 +64,7 @@ def progress_bar_conditional(iterable, condition_func, total, postfix_func=None,
65
64
  pbar_it.set_postfix(postfix_func(item))
66
65
 
67
66
 
68
- def progress_bar_defined(iterable, total, miniters=200, desc="", unit="it"):
67
+ def progress_bar_defined(iterable, total, miniters=200, desc="", unit="it", file=None, disable=False):
69
68
  return tqdm(iterable=iterable,
70
69
  total=total,
71
70
  desc=desc,
@@ -73,13 +72,17 @@ def progress_bar_defined(iterable, total, miniters=200, desc="", unit="it"):
73
72
  position=0,
74
73
  leave=True,
75
74
  unit=unit,
75
+ file=file,
76
+ disable=disable,
76
77
  miniters=total / miniters if total is not None else total)
77
78
 
78
79
 
79
- def progress_bar_iter(iterable, desc="", unit='it'):
80
+ def progress_bar_iter(iterable, desc="", unit='it', file=None, disable=False):
80
81
  return tqdm(iterable=iterable,
81
82
  desc=desc,
82
83
  position=0,
83
84
  leave=True,
84
85
  ncols=120,
86
+ file=file,
87
+ disable=disable,
85
88
  unit=unit)
@@ -5,8 +5,9 @@ from arekit.common.data.storages.base import BaseRowsStorage
5
5
 
6
6
  class JsonlBasedRowsStorage(BaseRowsStorage):
7
7
 
8
- def __init__(self, rows):
8
+ def __init__(self, rows, **kwargs):
9
9
  assert(isinstance(rows, list))
10
+ super(JsonlBasedRowsStorage, self).__init__(**kwargs)
10
11
  self.__rows = rows
11
12
 
12
13
  def _iter_rows(self):
@@ -12,7 +12,8 @@ class PandasBasedRowsStorage(BaseRowsStorage):
12
12
  based on the pandas DataFrames.
13
13
  """
14
14
 
15
- def __init__(self, df=None):
15
+ def __init__(self, df=None, **kwargs):
16
+ super(PandasBasedRowsStorage, self).__init__(**kwargs)
16
17
  self._df = df
17
18
 
18
19
  @property
@@ -96,26 +97,10 @@ class PandasBasedRowsStorage(BaseRowsStorage):
96
97
  def get_row(self, row_index):
97
98
  return self._df.iloc[row_index]
98
99
 
99
- def get_cell(self, row_index, column_name):
100
- return self._df.iloc[row_index][column_name]
101
-
102
- def iter_column_values(self, column_name, dtype=None):
103
- values = self._df[column_name]
104
- if dtype is None:
105
- return values
106
- return values.astype(dtype)
107
-
108
- def find_by_value(self, column_name, value):
109
- return self.__filter(column_name=column_name, value=value)
110
-
111
100
  def init_empty(self, columns_provider):
112
101
  cols_with_types = columns_provider.get_columns_list_with_types()
113
102
  self._df = self.__create_empty(cols_with_types)
114
103
 
115
- def iter_shuffled(self):
116
- shuffled_df = self._df.sample(frac=1)
117
- return self.__iter_rows_core(shuffled_df)
118
-
119
104
  def free(self):
120
105
  del self._df
121
106
  super(PandasBasedRowsStorage, self).free()
@@ -6,13 +6,14 @@ class RowCacheStorage(BaseRowsStorage):
6
6
  """ Row Caching storage kernel, based on python dictionary.
7
7
  """
8
8
 
9
- def __init__(self, force_collect_columns=None):
9
+ def __init__(self, force_collect_columns=None, **kwargs):
10
10
  """ This is a particular/related solution for the following issue:
11
11
  https://github.com/nicolay-r/AREkit/issues/464
12
12
  force_collect_columns: list
13
13
  columns that supposed to be additionally considered in output.
14
14
  """
15
15
  assert(isinstance(force_collect_columns, list) or force_collect_columns is None)
16
+ super(RowCacheStorage, self).__init__(**kwargs)
16
17
  self.__f = None
17
18
  self.__row_cache = {}
18
19
  self.__column_names = []
@@ -4,7 +4,8 @@ from arekit.common.data.storages.base import BaseRowsStorage
4
4
 
5
5
  class SQliteBasedRowsStorage(BaseRowsStorage):
6
6
 
7
- def __init__(self, path, table_name):
7
+ def __init__(self, path, table_name, **kwargs):
8
+ super(SQliteBasedRowsStorage, self).__init__(**kwargs)
8
9
  self.__path = path
9
10
  self.__table_name = table_name
10
11
  self.__conn = None
@@ -15,7 +15,7 @@ from arekit.contrib.utils.pipelines.text_opinion.filters.limitation import Frame
15
15
  def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
16
16
  text_opinion_filters, use_meta):
17
17
  """ use_meta: bool
18
- this is mainly for tqdm and other console parameters to stay up-to-date
18
+ this is mainly for the progress-bar and other console parameters to stay up-to-date
19
19
  with the state in the case we do not have that much output results
20
20
  across multiple amount of documents.
21
21
  """
@@ -62,12 +62,13 @@ def __iter_text_opinion_linkages(parsed_doc, annotators, entity_index_func,
62
62
  yield MetaEmptyLinkedDataWrapper(doc_id=parsed_doc.RelatedDocID)
63
63
 
64
64
 
65
- def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotators, entity_index_func,
65
+ def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotators, entity_index_func, batch_size,
66
66
  text_opinion_filters=None, use_meta_between_docs=True):
67
67
  assert(callable(get_doc_by_id_func))
68
68
  assert(isinstance(annotators, list))
69
69
  assert(isinstance(text_opinion_filters, list) or text_opinion_filters is None)
70
70
  assert(isinstance(use_meta_between_docs, bool))
71
+ assert(isinstance(batch_size, int) and batch_size > 0)
71
72
 
72
73
  extra_filters = [] if text_opinion_filters is None else text_opinion_filters
73
74
  actual_text_opinion_filters = [FrameworkLimitationsTextOpinionFilter()] + extra_filters
@@ -77,8 +78,8 @@ def text_opinion_extraction_pipeline(pipeline_items, get_doc_by_id_func, annotat
77
78
  MapPipelineItem(map_func=lambda doc_id: get_doc_by_id_func(doc_id)),
78
79
 
79
80
  # (doc, ppl_ctx) -> (parsed_doc)
80
- MapNestedPipelineItem(map_func=lambda doc, ppl_ctx: DocumentParsers.parse(
81
- doc=doc, pipeline_items=pipeline_items, parent_ppl_ctx=ppl_ctx)),
81
+ MapNestedPipelineItem(map_func=lambda doc, ppl_ctx: DocumentParsers.parse_batch(
82
+ doc=doc, pipeline_items=pipeline_items, parent_ppl_ctx=ppl_ctx, batch_size=batch_size)),
82
83
 
83
84
  # (parsed_doc) -> (text_opinions)
84
85
  MapPipelineItem(map_func=lambda parsed_doc: __iter_text_opinion_linkages(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: arekit
3
- Version: 0.25.0
3
+ Version: 0.25.1
4
4
  Summary: Document level Attitude and Relation Extraction toolkit (AREkit) for sampling and prompting mass-media news into datasets for ML-model training
5
5
  Home-page: https://github.com/nicolay-r/AREkit
6
6
  Author: Nicolay Rusnachenko
@@ -18,9 +18,8 @@ License-File: LICENSE
18
18
  Requires-Dist: tqdm
19
19
  Requires-Dist: enum34==1.1.10
20
20
  Requires-Dist: numpy>=1.14.5
21
- Requires-Dist: pymystem3==0.2.0
22
21
 
23
- # AREkit 0.25.0
22
+ # AREkit 0.25.1
24
23
 
25
24
  ![](https://img.shields.io/badge/Python-3.9+-brightgreen.svg)
26
25
 
@@ -34,7 +33,7 @@ is a python toolkit, devoted to document level Attitude and Relation Extraction
34
33
  ## Description
35
34
 
36
35
 
37
- This toolkit aims at memory-effective data processing in Relation Extraction (RE) related tasks.
36
+ This toolkit aims at memory-effective data processing in [Relation Extraction (RE)](https://nlpprogress.com/english/relationship_extraction.html) related tasks.
38
37
 
39
38
  <p align="center">
40
39
  <img src="docs/arekit-pipeline-concept.png"/>
@@ -60,7 +59,7 @@ for sentence level relations preparation (dubbed as contexts);
60
59
  ## Installation
61
60
 
62
61
  ```bash
63
- pip install git+https://github.com/nicolay-r/AREkit.git@0.25.0-rc
62
+ pip install git+https://github.com/nicolay-r/AREkit.git@0.25.1-rc
64
63
  ```
65
64
 
66
65
  ## Usage
@@ -2,7 +2,7 @@ arekit/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  arekit/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  arekit/common/bound.py,sha256=lPpHY6ct_CU9e4qXeYjhJfWbTj6Sb_NVtZ1CJheQPNE,1402
4
4
  arekit/common/log_utils.py,sha256=OfEQxbExkuRAl9dxlgFEqcFhI4HHoMYT7WE8ud0IPOM,924
5
- arekit/common/utils.py,sha256=eVRGhRy882ow-63Glncc3pJ-_43KSI0ukBePjC8ogAY,2394
5
+ arekit/common/utils.py,sha256=N061ENJJgvsB338Q9cixc6RWyuikSPQq4Tc8mmgwy9s,2659
6
6
  arekit/common/context/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  arekit/common/context/terms_mapper.py,sha256=QA02Cv7D2JKTlXkez_0w0J8HuvNziNF2vrqLgy4Bwc8,1447
8
8
  arekit/common/context/token.py,sha256=CpWAlvprUnJfCtYvO8lwdfU_ofSKAOGOudXTwppyzSk,459
@@ -35,18 +35,13 @@ arekit/common/data/input/providers/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-
35
35
  arekit/common/data/input/providers/sample/cropped.py,sha256=jJSos4Si-qy-wb-QmomXxxgURR1UhJnvY0tZoowlfVc,1885
36
36
  arekit/common/data/input/providers/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  arekit/common/data/input/providers/text/single.py,sha256=vm3sShIYZcmses-hmZX9cOfveWXCYGwvKLgQ0qs3VXQ,1604
38
- arekit/common/data/input/repositories/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
- arekit/common/data/input/repositories/base.py,sha256=4DmLVORc85gu6bxtXVZgxi176NxnIaqHz2tVebMyGZ8,2557
40
- arekit/common/data/input/repositories/sample.py,sha256=LAdpaA1N_nq1iInLwkWQVvL6HGH64JYWSJ9tywU0llY,784
41
38
  arekit/common/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
- arekit/common/data/storages/base.py,sha256=L9OLpVOZwlAXZION0YP1T6ZN1t_dfQpnAPAU4ztSs48,2956
43
- arekit/common/data/views/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
- arekit/common/data/views/samples.py,sha256=LDqUDqArGt90ujRB4kDFgDHLmR2_AQoUnzhxpXYWYaM,882
39
+ arekit/common/data/storages/base.py,sha256=psxo5uIc3hUDi5Cgf4j3Cm-935Fy1VQBYzcBzCcCFZE,2661
45
40
  arekit/common/docs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
41
  arekit/common/docs/base.py,sha256=uXUOtpR9BEsDBfDHg4eLqOjfSVOV_o9VPii3nSxLZuY,734
47
42
  arekit/common/docs/entities_grouping.py,sha256=_r254fNr0j6BjHuLZBLjj21yWm4_k__5aOcBXcAaQUQ,704
48
43
  arekit/common/docs/entity.py,sha256=TxrZMdIEgjk-PgCyskCkVis2KAw_M7vTBp3ppP6G05M,662
49
- arekit/common/docs/parser.py,sha256=514lQNrZiwU_mxgyuWBkDhqjS5SVAvcIHx9GQUTuVG8,2883
44
+ arekit/common/docs/parser.py,sha256=dzWjpbbYt-C9UU9sSy_Holnm0kQxJqtz1_6va6kS_L4,1780
50
45
  arekit/common/docs/sentence.py,sha256=nZCCFj2yk71POoXCBfEMN3pteM2qQdj60eEzxMVY_3k,302
51
46
  arekit/common/docs/parsed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
47
  arekit/common/docs/parsed/base.py,sha256=WPstqOpBuLKjtz6UO_bI0DpOPF3Sm0wYEVwjtldbPXE,3175
@@ -115,14 +110,12 @@ arekit/common/pipeline/context.py,sha256=Fw25lBVakHNAXjtkdEqopR-Jh59cDKGWD2jCJxB
115
110
  arekit/common/pipeline/conts.py,sha256=NAQNsHt1kK3HnxWv3M6yXi0c7C6Mx6ZZ6KZc0yE0eas,70
116
111
  arekit/common/pipeline/utils.py,sha256=5VqH1LtRa4tYUbyiRvWdBmP4biFhTKq9vhr8QiRFFkY,882
117
112
  arekit/common/pipeline/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
118
- arekit/common/pipeline/items/base.py,sha256=dWIZVGJjYuURLCiZj8YQHWtsS725SOi9SPZaCPV7NvI,1694
113
+ arekit/common/pipeline/items/base.py,sha256=15-z8ERQ0QxaRszs7sHQduU0KIBJIm8B0V2nwCva6d0,1695
119
114
  arekit/common/pipeline/items/flatten.py,sha256=9T4jWqPGv4UDxajlM0Nm0-gvwUgqqYB8XH0efTum9a0,542
120
115
  arekit/common/pipeline/items/handle.py,sha256=QS5Byj7-o5jmFi0ag58NE3zm2-JzVIunIgc3Pn1ij6g,578
121
116
  arekit/common/pipeline/items/iter.py,sha256=Tk9WdUMPOq20s7jEWEpU4PmillnVtQ8nIa2ct7iw-3s,406
122
117
  arekit/common/pipeline/items/map.py,sha256=G5wBdjaaxePD0pijrxsfpJACeP7kzj7HerjCkNIhmII,381
123
118
  arekit/common/pipeline/items/map_nested.py,sha256=vs0GdJNr3qSF9p2yd1nWji5E1HGzECbvOfN2MqoHc2A,630
124
- arekit/common/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
125
- arekit/common/service/sqlite.py,sha256=1jLIszkcJGeT0hUos8Y0Chp3o9XRUfljG2P9q0T2_Ds,1440
126
119
  arekit/common/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
127
120
  arekit/common/synonyms/base.py,sha256=YxD-CKCjlEtar1zTdumnfC3vKgbP2wLODR9mMEwbbnA,4237
128
121
  arekit/common/synonyms/grouping.py,sha256=fi7QQbBvsTvvP2CPTesSPEsPNmGfc6euqj-HPhVvtlg,698
@@ -141,29 +134,9 @@ arekit/contrib/bert/input/providers/cropped_sample.py,sha256=46uHHhAe8cGxV2JlfO3
141
134
  arekit/contrib/bert/input/providers/text_pair.py,sha256=_1d-he0n42y3ksj8RjJlNHgHnaQUEq0aQhUdTPRMKgg,2817
142
135
  arekit/contrib/bert/terms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
143
136
  arekit/contrib/bert/terms/mapper.py,sha256=oHX-lsaZYjBFLjngzSKT5z_JPJCHbclUsEe4i4fup_8,992
144
- arekit/contrib/networks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
145
- arekit/contrib/networks/embedding.py,sha256=lrLdB6CdmnmzwavAL6MZuLHceNM3PsZZiWLZ4BjGeXc,3845
146
- arekit/contrib/networks/embedding_io.py,sha256=hV1MBr9wu9-10gQgnAzLuC-l897aB-8KNcw4h69B5VM,460
147
- arekit/contrib/networks/vectorizer.py,sha256=KKV_f0GZD10ZpeYgqZfvMapJtsKa3NBddR6W_GdYqrM,155
148
- arekit/contrib/networks/input/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
- arekit/contrib/networks/input/const.py,sha256=nPeuO-G6MILNlIkGc5HzSDj_RmTwLflReF7n5htFAUI,176
150
- arekit/contrib/networks/input/ctx_serialization.py,sha256=eCOw4xjp8A7Z2WFanshooS3MqSy7dbZ8ywf_DA2LZO8,982
151
- arekit/contrib/networks/input/rows_parser.py,sha256=6_43LbAelveY9yEWMU5BdvQlpWwm4RDOjUEmqHuPYdE,1807
152
- arekit/contrib/networks/input/term_types.py,sha256=P8E5LKegZE5ZEh4vNtC55Lu8USbQt8_Eo14op_anmvU,348
153
- arekit/contrib/networks/input/terms_mapping.py,sha256=NAnuTAbj7tBTe1Ga4js2IfnUdAWlTV9fcgSQEgYqQUQ,2129
154
- arekit/contrib/networks/input/embedding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
- arekit/contrib/networks/input/embedding/matrix.py,sha256=BFn7eXhiqXY7c4tUfy1fzemIqRnZYx_GiEv873QnIEs,952
156
- arekit/contrib/networks/input/embedding/offsets.py,sha256=HrBfbFD03o_Y0ZvEGTd-FRxmPx55_5vqItTranMFy88,1313
157
- arekit/contrib/networks/input/formatters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
- arekit/contrib/networks/input/formatters/pos_mapper.py,sha256=yftPKYU7noVb_q0KAflHf7bqjuUXt5siIgbnwMEoWrw,773
159
- arekit/contrib/networks/input/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
160
- arekit/contrib/networks/input/providers/sample.py,sha256=MHtXhhBD-kM0yzTACTbY14KMPIuhiLgUKEYXfhvumfo,5445
161
- arekit/contrib/networks/input/providers/term_connotation.py,sha256=Q90pVN4hQgYAk3oBSCPYc6_1xQUQE1b6ksiU_k8frcM,1157
162
- arekit/contrib/networks/input/providers/text.py,sha256=kucezKm6Ilmy5wuM2jUP5xk9zh1K1Pf8KcMd1prrp8k,917
163
137
  arekit/contrib/prompt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
138
  arekit/contrib/prompt/sample.py,sha256=MxpbDR0ww7WmdtuPu74B8R6QKVXeuzO0CKGOJIYwbRk,3164
165
139
  arekit/contrib/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
- arekit/contrib/utils/serializer.py,sha256=D9LJ2ZXeVx3YntV-HqEnt32xW-s4GauwD97XRVlqr0g,1626
167
140
  arekit/contrib/utils/bert/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
168
141
  arekit/contrib/utils/bert/samplers.py,sha256=ZVe3rbUAH0Jw1xR_yHE1DoUJf3CI0pDgbBQQzlLWevc,989
169
142
  arekit/contrib/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -172,54 +145,28 @@ arekit/contrib/utils/data/contents/opinions.py,sha256=MSV7NytEe15adKhhHCq5KiCj6Z
172
145
  arekit/contrib/utils/data/doc_provider/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
173
146
  arekit/contrib/utils/data/doc_provider/dict_based.py,sha256=zUOiiIbj5zby4xqMb0m9N-a6enavJJ7wFmPaGErykWU,371
174
147
  arekit/contrib/utils/data/doc_provider/dir_based.py,sha256=FTw3kLV_CYtPoUoHl39IrP6RjLvTecCno9May95jVXw,1916
175
- arekit/contrib/utils/data/readers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
176
- arekit/contrib/utils/data/readers/base.py,sha256=zAsZLX5ng0_gb_ysL6wQchptmBHlNgqgQilw295Y5Aw,153
177
- arekit/contrib/utils/data/readers/csv_pd.py,sha256=Ym49j04Z-_WQN-7xJMiiN1y2TIMnMDtPxy5h0mT3WBQ,1383
178
- arekit/contrib/utils/data/readers/jsonl.py,sha256=c2bHwnTfNEwb1c8B9fRwaQyeze5x3nOd2UXXAp4MbxQ,426
179
- arekit/contrib/utils/data/readers/sqlite.py,sha256=U1138XNCIwqycNivxwzwIUnowj3jDkP4M6J_Kvyedbc,416
180
- arekit/contrib/utils/data/service/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
181
- arekit/contrib/utils/data/service/balance.py,sha256=PgA5B6qSPmt8ITPLsQuCkniE8-u2NO_eQ2m-U9Akh98,1547
182
148
  arekit/contrib/utils/data/storages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
183
- arekit/contrib/utils/data/storages/jsonl_based.py,sha256=Oj5u8aW_UtVDSDxMpIQsgMlZlV-KBD0qVHPVVT3m8nA,450
184
- arekit/contrib/utils/data/storages/pandas_based.py,sha256=m8z34tO_7NupYd_zQ4L1miTXJQkmMMB90zPFqEeYCNs,4301
185
- arekit/contrib/utils/data/storages/row_cache.py,sha256=V1InYIqRf5WMWV_JndHNH9JzAjFS3ZL38f4_pDPLo_8,1985
186
- arekit/contrib/utils/data/storages/sqlite_based.py,sha256=ARwVisVbPKBap_mVdpvTpp28iXgJbCJ3dAj41UYu03Q,609
149
+ arekit/contrib/utils/data/storages/jsonl_based.py,sha256=dz8uizu9t1C215o0HEL8y4LiDKR4aC_-OwDu_xF0xIM,522
150
+ arekit/contrib/utils/data/storages/pandas_based.py,sha256=gMkWUFHZE9Oe1Uy04vEBcUfTIAdh46r5zpjlPAwwG2g,3842
151
+ arekit/contrib/utils/data/storages/row_cache.py,sha256=MRK0uJFvw6O99k2aFb3JLZhLUBo2JUO-WYQ4EeRRu6M,2051
152
+ arekit/contrib/utils/data/storages/sqlite_based.py,sha256=cIYAHyiB4CMftKgrgLqw-L4F1WnhbspjwWLSPqH5NHk,682
187
153
  arekit/contrib/utils/data/writers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
188
154
  arekit/contrib/utils/data/writers/base.py,sha256=JLwf5WVl_U319sdMev8YOn4OoCcrgNIUZtrOuG1JLjI,766
189
- arekit/contrib/utils/data/writers/csv_native.py,sha256=7fPxYeu9YDK8Cvjp1n-sbKT63ZuhDIEv3VwghHuKk5k,2252
190
- arekit/contrib/utils/data/writers/csv_pd.py,sha256=WhBjDJCHUBy_TabngMF42Qicx0ye8xIus0m6c7qotto,1330
191
- arekit/contrib/utils/data/writers/json_opennre.py,sha256=EkhXmONgtMe7A9VKrs9ElFHc8RoMumjFbkKfwuOVOoU,5067
192
- arekit/contrib/utils/data/writers/sqlite_native.py,sha256=MnbLU8iPvYvpYgEbOXhBKH_G8DJs0W9iSuhr_TPKBAQ,4601
193
- arekit/contrib/utils/embeddings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
194
- arekit/contrib/utils/embeddings/rusvectores.py,sha256=WA0HejE2U5kgeBvh4_vty2QzoAkFXiMk94BK8FHxoxw,1931
195
- arekit/contrib/utils/embeddings/tokens.py,sha256=z3lJ30JTX9zvZtPgzRl3yANECmuA1qboMDTcJsr_4E4,872
196
155
  arekit/contrib/utils/entities/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
197
156
  arekit/contrib/utils/entities/filter.py,sha256=aHTExIMFaMdy4QL8iYE23eiby3qLImAakXR6gNqG6fs,145
198
157
  arekit/contrib/utils/entities/formatters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
199
158
  arekit/contrib/utils/entities/formatters/str_display.py,sha256=N8igv7EVaTFayvLXkyBGtm67KwHaeP-M-L8d7oqBG9Q,401
200
159
  arekit/contrib/utils/entities/formatters/str_simple_sharp_prefixed_fmt.py,sha256=rEUIma9O3kOBWIguGtJ69JH-00Dhm0vUBOd5yNcKweY,653
201
160
  arekit/contrib/utils/io_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
202
- arekit/contrib/utils/io_utils/embedding.py,sha256=cBDRv_1LROJ262QaL3QVfGt2W9EvBfbh83oL41PJn60,2543
203
161
  arekit/contrib/utils/io_utils/utils.py,sha256=310SIJTsNLn2OZrGPer9W4ZP52PHkjBK3zsyqxVs3h0,537
204
- arekit/contrib/utils/np_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
205
- arekit/contrib/utils/np_utils/embedding.py,sha256=G7Ls_ClzbskLLy-opRcVzQlfUfhdwbqoXgk0zoGrmHM,798
206
- arekit/contrib/utils/np_utils/npz_utils.py,sha256=XoUHNmOlcr2X674R1xKGUJitEpFCIBJ8DOpNEPhtJFk,234
207
- arekit/contrib/utils/np_utils/vocab.py,sha256=FsS18chMLU4WfMeGwBbvmfB5Qmoj5tZTOo-4zqWPm3Q,580
208
162
  arekit/contrib/utils/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
209
163
  arekit/contrib/utils/pipelines/opinion_collections.py,sha256=y9-klVJGCN9mPd7t1ECllAiCnAb3MKVXC1PnYddp5sQ,3195
210
164
  arekit/contrib/utils/pipelines/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
211
- arekit/contrib/utils/pipelines/items/sampling/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
212
- arekit/contrib/utils/pipelines/items/sampling/base.py,sha256=-H-r5GIi9ee7CxxpJs8KnHC91l7Y1dYaWPR_OK17E8g,4245
213
- arekit/contrib/utils/pipelines/items/sampling/networks.py,sha256=E0EjQ4KRd3oYLFVbie05XJa00JqR26eLRoMrDnuQySQ,2653
214
165
  arekit/contrib/utils/pipelines/items/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
215
166
  arekit/contrib/utils/pipelines/items/text/entities_default.py,sha256=vNx5ir2mf7a1gg_OeqUsf_p1Fu2k7QIFxVpe-CuwZ84,727
216
167
  arekit/contrib/utils/pipelines/items/text/frames.py,sha256=pZQybYfgEQB1DM3PtmsgrtB2Xl0HejmP4rhT0nR_YKE,2586
217
- arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py,sha256=4rIAAB-_GeWNbu5KyaDm5qttH4o2Bzpdvy-D9YR5bRk,1776
218
- arekit/contrib/utils/pipelines/items/text/frames_negation.py,sha256=AdoY7lqSAT0RApp0DbqeI7xxyRVF6NPJLAfR59lsIec,1303
219
- arekit/contrib/utils/pipelines/items/text/tokenizer.py,sha256=FmV5flziDLCNttxrUzRr-FGCcKK6venZEcZ-KwcqwNE,3147
220
- arekit/contrib/utils/pipelines/items/text/translator.py,sha256=TkXVyZYRbS8P4S2Pnn2GzQMRa-9ba-nS4_zXvsf16vU,5365
221
168
  arekit/contrib/utils/pipelines/text_opinion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
222
- arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=QoK0-dfMl27uOOfUhvnbvzYX23jCpZbm97Qs27Na7VA,4133
169
+ arekit/contrib/utils/pipelines/text_opinion/extraction.py,sha256=MT1WMlvVI25JRL0g7W83bV8BGUr7_MNOQBj7ZAHgrnU,4245
223
170
  arekit/contrib/utils/pipelines/text_opinion/annot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
224
171
  arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py,sha256=bwS-UR2x3rgp_xqnf6z-73T-eIZE_kltRSGYxgd_WpU,1751
225
172
  arekit/contrib/utils/pipelines/text_opinion/filters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -228,32 +175,12 @@ arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py,sha256=3Pj
228
175
  arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py,sha256=pdWFJaKh4kKIsUuBNp3WNy5Rj80CjWEy2wp-0axFnrI,1254
229
176
  arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py,sha256=4AFS5zhocJuYphGO2ZMWmYTtIhGItKDTkB0--AmjgnA,1151
230
177
  arekit/contrib/utils/processing/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
231
- arekit/contrib/utils/processing/languages/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
232
- arekit/contrib/utils/processing/languages/mods.py,sha256=OERKcglI4pJEIQxlWMYuYg_uHnNWVpP-mqhnFsQbY7A,263
233
- arekit/contrib/utils/processing/languages/pos.py,sha256=etC3ueLGgZorgKEc3TWpeIuv46vs392xPi1lM31Cg0s,278
234
- arekit/contrib/utils/processing/languages/ru/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
235
- arekit/contrib/utils/processing/languages/ru/cases.py,sha256=27sIQsU5_0aT4EVuPtKCK-tfi1Q0TH11phV1x5hIzLs,1492
236
- arekit/contrib/utils/processing/languages/ru/constants.py,sha256=f4z7ivILKqYju9rkagi9_FIvPm1FnWHbXgxigyb3zm4,147
237
- arekit/contrib/utils/processing/languages/ru/mods.py,sha256=j4xKgRbCC834i9n-RyU607v9Qph9sP_B31WLrKFByRk,343
238
- arekit/contrib/utils/processing/languages/ru/number.py,sha256=kHyP0Lp_iHVDwkbN7tkZUJpGFQ40QRm-j_1g0dFU-sM,401
239
- arekit/contrib/utils/processing/languages/ru/pos_service.py,sha256=BWHLPybjmTVNXjJM2QmrZlEDcl7nZY7keLmXZcG_PFM,1125
240
- arekit/contrib/utils/processing/lemmatization/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
241
- arekit/contrib/utils/processing/lemmatization/mystem.py,sha256=_FRqEGWUlgAbhSJ-dsyoFg_qbbUxePDSAOWWuveRqCo,1340
242
- arekit/contrib/utils/processing/pos/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
243
- arekit/contrib/utils/processing/pos/base.py,sha256=CrMr3u6lRs2NoV7uch5HZgV71A-0M-pwJfwXjfudHBY,259
244
- arekit/contrib/utils/processing/pos/mystem_wrap.py,sha256=C9AnRIAZL4e8DMNte9LDuvxS-cbEQpo2AYdQtP9uIJ4,4336
245
- arekit/contrib/utils/processing/pos/russian.py,sha256=POCo6xKmK7vAEq-kWlODg611kLOtOj37OVc3L_GWL-8,229
246
- arekit/contrib/utils/processing/text/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
247
- arekit/contrib/utils/processing/text/tokens.py,sha256=_3u5Oy1MG_QfHH8wi0x0nA588qSaCp3Wmnp2SzMWjXY,3573
248
178
  arekit/contrib/utils/synonyms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
249
179
  arekit/contrib/utils/synonyms/simple.py,sha256=ST9EwuWP88FzbyV8Gi0-biTPgGOsZ7OWyaBWHL_U_eo,557
250
180
  arekit/contrib/utils/synonyms/stemmer_based.py,sha256=q19P_XOCWN2_JrBtybAt7ToMIr1ambw4ahr0fSEEHmQ,1400
251
- arekit/contrib/utils/vectorizers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
252
- arekit/contrib/utils/vectorizers/bpe.py,sha256=bFS5MZytvU1L21YS5aAeb3FZl7RMjyog4lWwysvKD-8,3047
253
- arekit/contrib/utils/vectorizers/random_norm.py,sha256=TL86Kz6p59lJqoLg8RwQRTvfhr0e-tiULGHhO4vhBbo,1339
254
- arekit-0.25.0.data/data/logo.png,sha256=S8OZ4MGGD72Pf5co7ngYbXKkJH1EUhbErUXv1ZjUWiU,45718
255
- arekit-0.25.0.dist-info/LICENSE,sha256=JO9tIbxAvhwDv73cX-gUStr9yA-TY7wusUeLHRx7JuY,1076
256
- arekit-0.25.0.dist-info/METADATA,sha256=4DSUy6aTidHG9jFR7jMwQe3uJGER-e8E9vU0q2G20Uo,3145
257
- arekit-0.25.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
258
- arekit-0.25.0.dist-info/top_level.txt,sha256=4pXuFE8IE0lBsqi6ZsR7figx0H939VIX4_-76YIbkOQ,7
259
- arekit-0.25.0.dist-info/RECORD,,
181
+ arekit-0.25.1.data/data/logo.png,sha256=S8OZ4MGGD72Pf5co7ngYbXKkJH1EUhbErUXv1ZjUWiU,45718
182
+ arekit-0.25.1.dist-info/LICENSE,sha256=JO9tIbxAvhwDv73cX-gUStr9yA-TY7wusUeLHRx7JuY,1076
183
+ arekit-0.25.1.dist-info/METADATA,sha256=ryWGTL4fYqR36z2qh1UuYBg6UIU6n7_U9Y09KPRS6xk,3177
184
+ arekit-0.25.1.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
185
+ arekit-0.25.1.dist-info/top_level.txt,sha256=4pXuFE8IE0lBsqi6ZsR7figx0H939VIX4_-76YIbkOQ,7
186
+ arekit-0.25.1.dist-info/RECORD,,
File without changes
@@ -1,68 +0,0 @@
1
- from arekit.common.data.input.providers.columns.base import BaseColumnsProvider
2
- from arekit.common.data.input.providers.contents import ContentsProvider
3
- from arekit.common.data.input.providers.rows.base import BaseRowProvider
4
- from arekit.common.data.storages.base import BaseRowsStorage
5
- from arekit.contrib.utils.data.storages.row_cache import RowCacheStorage
6
- from arekit.contrib.utils.data.writers.base import BaseWriter
7
-
8
-
9
- class BaseInputRepository(object):
10
-
11
- def __init__(self, columns_provider, rows_provider, storage):
12
- assert(isinstance(columns_provider, BaseColumnsProvider))
13
- assert(isinstance(rows_provider, BaseRowProvider))
14
- assert(isinstance(storage, BaseRowsStorage))
15
-
16
- self._columns_provider = columns_provider
17
- self._rows_provider = rows_provider
18
- self._storage = storage
19
-
20
- # Do setup operations.
21
- self._setup_columns_provider()
22
- self._setup_rows_provider()
23
-
24
- # region protected methods
25
-
26
- def _setup_columns_provider(self):
27
- pass
28
-
29
- def _setup_rows_provider(self):
30
- pass
31
-
32
- # endregion
33
-
34
- def populate(self, contents_provider, doc_ids, desc="", writer=None, target=None):
35
- assert(isinstance(contents_provider, ContentsProvider))
36
- assert(isinstance(self._storage, BaseRowsStorage))
37
- assert(isinstance(doc_ids, list))
38
- assert(isinstance(writer, BaseWriter) or writer is None)
39
- assert(isinstance(target, str) or target is None)
40
-
41
- def iter_rows(idle_mode):
42
- return self._rows_provider.iter_by_rows(
43
- contents_provider=contents_provider,
44
- doc_ids_iter=doc_ids,
45
- idle_mode=idle_mode)
46
-
47
- self._storage.init_empty(columns_provider=self._columns_provider)
48
-
49
- is_async_write_mode_on = writer is not None and target is not None
50
-
51
- if is_async_write_mode_on:
52
- writer.open_target(target)
53
-
54
- self._storage.fill(lambda idle_mode: iter_rows(idle_mode),
55
- columns_provider=self._columns_provider,
56
- row_handler=lambda: writer.commit_line(self._storage) if is_async_write_mode_on else None,
57
- desc=desc)
58
-
59
- if is_async_write_mode_on:
60
- writer.close_target()
61
-
62
- def push(self, writer, target, free_storage=True):
63
- if not isinstance(self._storage, RowCacheStorage):
64
- writer.write_all(self._storage, target)
65
-
66
- # After writing we free the contents of the storage.
67
- if free_storage:
68
- self._storage.free()
@@ -1,22 +0,0 @@
1
- import logging
2
-
3
- from arekit.common.data.input.providers.rows.samples import BaseSampleRowProvider
4
- from arekit.common.data.input.repositories.base import BaseInputRepository
5
-
6
- logger = logging.getLogger(__name__)
7
- logging.basicConfig(level=logging.INFO)
8
-
9
-
10
- class BaseInputSamplesRepository(BaseInputRepository):
11
-
12
- def _setup_rows_provider(self):
13
- """ Setup store labels.
14
- """
15
- assert(isinstance(self._rows_provider, BaseSampleRowProvider))
16
- self._rows_provider.set_store_labels(self._columns_provider.StoreLabels)
17
-
18
- def _setup_columns_provider(self):
19
- """ Setup text column names.
20
- """
21
- text_column_names = list(self._rows_provider.TextProvider.iter_columns())
22
- self._columns_provider.set_text_column_names(text_column_names)
File without changes
@@ -1,26 +0,0 @@
1
- from arekit.common.data import const
2
- from arekit.common.data.storages.base import BaseRowsStorage
3
-
4
-
5
- # TODO. This is a particular type of view, and expected to be off the core.
6
- class LinkedSamplesStorageView(object):
7
-
8
- def iter_from_storage(self, storage):
9
- assert(isinstance(storage, BaseRowsStorage))
10
- undefined = -1
11
-
12
- linked = []
13
- current_opinion_id = undefined
14
- for row_index, opinion_id in enumerate(storage.iter_column_values(const.OPINION_ID)):
15
- if current_opinion_id != undefined:
16
- if opinion_id != current_opinion_id:
17
- yield linked
18
- linked = []
19
- current_opinion_id = opinion_id
20
- else:
21
- current_opinion_id = opinion_id
22
-
23
- linked.append(storage.get_row(row_index))
24
-
25
- if len(linked) > 0:
26
- yield linked
File without changes