arekit 0.23.1__py3-none-any.whl → 0.25.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. arekit/common/context/terms_mapper.py +2 -2
  2. arekit/common/data/const.py +5 -4
  3. arekit/common/{experiment/api/ops_doc.py → data/doc_provider.py} +1 -1
  4. arekit/common/data/input/providers/columns/sample.py +6 -1
  5. arekit/common/data/input/providers/instances/base.py +1 -1
  6. arekit/common/data/input/providers/rows/base.py +36 -13
  7. arekit/common/data/input/providers/rows/samples.py +57 -55
  8. arekit/common/data/input/providers/sample/cropped.py +2 -2
  9. arekit/common/data/input/sample.py +1 -1
  10. arekit/common/data/rows_fmt.py +82 -0
  11. arekit/common/data/rows_parser.py +43 -0
  12. arekit/common/data/storages/base.py +23 -18
  13. arekit/common/data/views/samples.py +2 -8
  14. arekit/common/{news → docs}/base.py +2 -2
  15. arekit/common/{news → docs}/entities_grouping.py +2 -1
  16. arekit/common/{news → docs}/entity.py +2 -1
  17. arekit/common/{news → docs}/parsed/base.py +5 -5
  18. arekit/common/docs/parsed/providers/base.py +68 -0
  19. arekit/common/{news → docs}/parsed/providers/base_pairs.py +2 -2
  20. arekit/common/{news → docs}/parsed/providers/entity_service.py +27 -22
  21. arekit/common/{news → docs}/parsed/providers/opinion_pairs.py +2 -2
  22. arekit/common/{news → docs}/parsed/providers/text_opinion_pairs.py +6 -6
  23. arekit/common/docs/parsed/service.py +31 -0
  24. arekit/common/docs/parser.py +66 -0
  25. arekit/common/{news → docs}/sentence.py +1 -1
  26. arekit/common/entities/base.py +11 -2
  27. arekit/common/experiment/api/base_samples_io.py +1 -1
  28. arekit/common/frames/variants/collection.py +2 -2
  29. arekit/common/linkage/base.py +2 -2
  30. arekit/common/linkage/meta.py +23 -0
  31. arekit/common/linkage/opinions.py +1 -1
  32. arekit/common/linkage/text_opinions.py +2 -2
  33. arekit/common/opinions/annot/algo/base.py +1 -1
  34. arekit/common/opinions/annot/algo/pair_based.py +15 -13
  35. arekit/common/opinions/annot/algo/predefined.py +4 -4
  36. arekit/common/opinions/annot/algo_based.py +5 -5
  37. arekit/common/opinions/annot/base.py +3 -3
  38. arekit/common/opinions/base.py +7 -7
  39. arekit/common/opinions/collection.py +3 -3
  40. arekit/common/pipeline/base.py +12 -16
  41. arekit/common/pipeline/batching.py +28 -0
  42. arekit/common/pipeline/context.py +5 -1
  43. arekit/common/pipeline/items/base.py +38 -1
  44. arekit/common/pipeline/items/flatten.py +5 -1
  45. arekit/common/pipeline/items/handle.py +2 -1
  46. arekit/common/pipeline/items/iter.py +2 -1
  47. arekit/common/pipeline/items/map.py +2 -1
  48. arekit/common/pipeline/items/map_nested.py +4 -0
  49. arekit/common/pipeline/utils.py +32 -0
  50. arekit/common/service/sqlite.py +36 -0
  51. arekit/common/synonyms/base.py +2 -2
  52. arekit/common/text/{partitioning/str.py → partitioning.py} +16 -11
  53. arekit/common/text_opinions/base.py +11 -11
  54. arekit/common/utils.py +33 -46
  55. arekit/contrib/networks/embedding.py +3 -3
  56. arekit/contrib/networks/embedding_io.py +5 -5
  57. arekit/contrib/networks/input/const.py +0 -2
  58. arekit/contrib/networks/input/providers/sample.py +15 -29
  59. arekit/contrib/networks/input/rows_parser.py +47 -134
  60. arekit/contrib/prompt/sample.py +18 -16
  61. arekit/contrib/utils/data/contents/opinions.py +17 -5
  62. arekit/contrib/utils/data/doc_provider/dict_based.py +13 -0
  63. arekit/contrib/utils/data/{doc_ops → doc_provider}/dir_based.py +7 -7
  64. arekit/contrib/utils/data/readers/base.py +3 -0
  65. arekit/contrib/utils/data/readers/csv_pd.py +10 -4
  66. arekit/contrib/utils/data/readers/jsonl.py +3 -0
  67. arekit/contrib/utils/data/readers/sqlite.py +14 -0
  68. arekit/contrib/utils/data/service/balance.py +0 -1
  69. arekit/contrib/utils/data/storages/pandas_based.py +3 -5
  70. arekit/contrib/utils/data/storages/row_cache.py +18 -6
  71. arekit/contrib/utils/data/storages/sqlite_based.py +17 -0
  72. arekit/contrib/utils/data/writers/base.py +5 -0
  73. arekit/contrib/utils/data/writers/csv_native.py +3 -0
  74. arekit/contrib/utils/data/writers/csv_pd.py +3 -0
  75. arekit/contrib/utils/data/writers/json_opennre.py +31 -13
  76. arekit/contrib/utils/data/writers/sqlite_native.py +114 -0
  77. arekit/contrib/utils/io_utils/embedding.py +25 -33
  78. arekit/contrib/utils/io_utils/utils.py +3 -24
  79. arekit/contrib/utils/pipelines/items/sampling/base.py +31 -26
  80. arekit/contrib/utils/pipelines/items/sampling/networks.py +7 -10
  81. arekit/contrib/utils/pipelines/items/text/entities_default.py +2 -2
  82. arekit/contrib/utils/pipelines/items/text/frames.py +2 -3
  83. arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +3 -3
  84. arekit/contrib/utils/pipelines/items/text/frames_negation.py +2 -1
  85. arekit/contrib/utils/pipelines/items/text/tokenizer.py +3 -5
  86. arekit/contrib/utils/pipelines/items/text/translator.py +136 -0
  87. arekit/contrib/utils/pipelines/opinion_collections.py +5 -5
  88. arekit/contrib/utils/pipelines/text_opinion/annot/algo_based.py +7 -7
  89. arekit/contrib/utils/pipelines/text_opinion/extraction.py +34 -22
  90. arekit/contrib/utils/pipelines/text_opinion/filters/base.py +1 -1
  91. arekit/contrib/utils/pipelines/text_opinion/filters/distance_based.py +1 -1
  92. arekit/contrib/utils/pipelines/text_opinion/filters/entity_based.py +3 -3
  93. arekit/contrib/utils/pipelines/text_opinion/filters/limitation.py +4 -4
  94. arekit/contrib/utils/serializer.py +4 -23
  95. arekit-0.25.0.data/data/logo.png +0 -0
  96. arekit-0.25.0.dist-info/METADATA +82 -0
  97. arekit-0.25.0.dist-info/RECORD +259 -0
  98. {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/WHEEL +1 -1
  99. arekit/common/data/row_ids/base.py +0 -79
  100. arekit/common/data/row_ids/binary.py +0 -38
  101. arekit/common/data/row_ids/multiple.py +0 -14
  102. arekit/common/folding/base.py +0 -36
  103. arekit/common/folding/fixed.py +0 -42
  104. arekit/common/folding/nofold.py +0 -15
  105. arekit/common/folding/united.py +0 -46
  106. arekit/common/news/objects_parser.py +0 -37
  107. arekit/common/news/parsed/providers/base.py +0 -48
  108. arekit/common/news/parsed/service.py +0 -31
  109. arekit/common/news/parser.py +0 -34
  110. arekit/common/text/parser.py +0 -12
  111. arekit/common/text/partitioning/__init__.py +0 -0
  112. arekit/common/text/partitioning/base.py +0 -4
  113. arekit/common/text/partitioning/terms.py +0 -35
  114. arekit/contrib/source/__init__.py +0 -0
  115. arekit/contrib/source/brat/__init__.py +0 -0
  116. arekit/contrib/source/brat/annot.py +0 -83
  117. arekit/contrib/source/brat/entities/__init__.py +0 -0
  118. arekit/contrib/source/brat/entities/compound.py +0 -33
  119. arekit/contrib/source/brat/entities/entity.py +0 -42
  120. arekit/contrib/source/brat/entities/parser.py +0 -53
  121. arekit/contrib/source/brat/news.py +0 -28
  122. arekit/contrib/source/brat/opinions/__init__.py +0 -0
  123. arekit/contrib/source/brat/opinions/converter.py +0 -19
  124. arekit/contrib/source/brat/relation.py +0 -32
  125. arekit/contrib/source/brat/sentence.py +0 -69
  126. arekit/contrib/source/brat/sentences_reader.py +0 -128
  127. arekit/contrib/source/download.py +0 -41
  128. arekit/contrib/source/nerel/__init__.py +0 -0
  129. arekit/contrib/source/nerel/entities.py +0 -55
  130. arekit/contrib/source/nerel/folding/__init__.py +0 -0
  131. arekit/contrib/source/nerel/folding/fixed.py +0 -75
  132. arekit/contrib/source/nerel/io_utils.py +0 -62
  133. arekit/contrib/source/nerel/labels.py +0 -241
  134. arekit/contrib/source/nerel/reader.py +0 -46
  135. arekit/contrib/source/nerel/utils.py +0 -24
  136. arekit/contrib/source/nerel/versions.py +0 -12
  137. arekit/contrib/source/nerelbio/__init__.py +0 -0
  138. arekit/contrib/source/nerelbio/io_utils.py +0 -62
  139. arekit/contrib/source/nerelbio/labels.py +0 -265
  140. arekit/contrib/source/nerelbio/reader.py +0 -8
  141. arekit/contrib/source/nerelbio/versions.py +0 -8
  142. arekit/contrib/source/ruattitudes/__init__.py +0 -0
  143. arekit/contrib/source/ruattitudes/collection.py +0 -36
  144. arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
  145. arekit/contrib/source/ruattitudes/entity/parser.py +0 -7
  146. arekit/contrib/source/ruattitudes/io_utils.py +0 -56
  147. arekit/contrib/source/ruattitudes/labels_fmt.py +0 -12
  148. arekit/contrib/source/ruattitudes/news.py +0 -51
  149. arekit/contrib/source/ruattitudes/news_brat.py +0 -44
  150. arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
  151. arekit/contrib/source/ruattitudes/opinions/base.py +0 -28
  152. arekit/contrib/source/ruattitudes/opinions/converter.py +0 -37
  153. arekit/contrib/source/ruattitudes/reader.py +0 -268
  154. arekit/contrib/source/ruattitudes/sentence.py +0 -73
  155. arekit/contrib/source/ruattitudes/synonyms.py +0 -17
  156. arekit/contrib/source/ruattitudes/text_object.py +0 -57
  157. arekit/contrib/source/rusentiframes/__init__.py +0 -0
  158. arekit/contrib/source/rusentiframes/collection.py +0 -157
  159. arekit/contrib/source/rusentiframes/effect.py +0 -24
  160. arekit/contrib/source/rusentiframes/io_utils.py +0 -19
  161. arekit/contrib/source/rusentiframes/labels_fmt.py +0 -22
  162. arekit/contrib/source/rusentiframes/polarity.py +0 -35
  163. arekit/contrib/source/rusentiframes/role.py +0 -15
  164. arekit/contrib/source/rusentiframes/state.py +0 -24
  165. arekit/contrib/source/rusentiframes/types.py +0 -42
  166. arekit/contrib/source/rusentiframes/value.py +0 -2
  167. arekit/contrib/source/rusentrel/__init__.py +0 -0
  168. arekit/contrib/source/rusentrel/const.py +0 -3
  169. arekit/contrib/source/rusentrel/entities.py +0 -26
  170. arekit/contrib/source/rusentrel/io_utils.py +0 -125
  171. arekit/contrib/source/rusentrel/labels_fmt.py +0 -12
  172. arekit/contrib/source/rusentrel/news_reader.py +0 -51
  173. arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
  174. arekit/contrib/source/rusentrel/opinions/collection.py +0 -30
  175. arekit/contrib/source/rusentrel/opinions/converter.py +0 -40
  176. arekit/contrib/source/rusentrel/opinions/provider.py +0 -54
  177. arekit/contrib/source/rusentrel/opinions/writer.py +0 -42
  178. arekit/contrib/source/rusentrel/synonyms.py +0 -17
  179. arekit/contrib/source/sentinerel/__init__.py +0 -0
  180. arekit/contrib/source/sentinerel/entities.py +0 -52
  181. arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
  182. arekit/contrib/source/sentinerel/folding/factory.py +0 -32
  183. arekit/contrib/source/sentinerel/folding/fixed.py +0 -73
  184. arekit/contrib/source/sentinerel/io_utils.py +0 -87
  185. arekit/contrib/source/sentinerel/labels.py +0 -53
  186. arekit/contrib/source/sentinerel/labels_scaler.py +0 -30
  187. arekit/contrib/source/sentinerel/reader.py +0 -42
  188. arekit/contrib/source/synonyms/__init__.py +0 -0
  189. arekit/contrib/source/synonyms/utils.py +0 -19
  190. arekit/contrib/source/zip_utils.py +0 -47
  191. arekit/contrib/utils/bert/rows.py +0 -0
  192. arekit/contrib/utils/bert/text_b_rus.py +0 -18
  193. arekit/contrib/utils/connotations/__init__.py +0 -0
  194. arekit/contrib/utils/connotations/rusentiframes_sentiment.py +0 -23
  195. arekit/contrib/utils/cv/__init__.py +0 -0
  196. arekit/contrib/utils/cv/doc_stat/__init__.py +0 -0
  197. arekit/contrib/utils/cv/doc_stat/base.py +0 -37
  198. arekit/contrib/utils/cv/doc_stat/sentence.py +0 -12
  199. arekit/contrib/utils/cv/splitters/__init__.py +0 -0
  200. arekit/contrib/utils/cv/splitters/base.py +0 -4
  201. arekit/contrib/utils/cv/splitters/default.py +0 -53
  202. arekit/contrib/utils/cv/splitters/statistical.py +0 -57
  203. arekit/contrib/utils/cv/two_class.py +0 -77
  204. arekit/contrib/utils/data/doc_ops/__init__.py +0 -0
  205. arekit/contrib/utils/data/doc_ops/dict_based.py +0 -13
  206. arekit/contrib/utils/data/ext.py +0 -31
  207. arekit/contrib/utils/data/views/__init__.py +0 -0
  208. arekit/contrib/utils/data/views/linkages/__init__.py +0 -0
  209. arekit/contrib/utils/data/views/linkages/base.py +0 -58
  210. arekit/contrib/utils/data/views/linkages/multilabel.py +0 -48
  211. arekit/contrib/utils/data/views/linkages/utils.py +0 -24
  212. arekit/contrib/utils/data/views/opinions.py +0 -14
  213. arekit/contrib/utils/download.py +0 -78
  214. arekit/contrib/utils/entities/formatters/str_rus_cased_fmt.py +0 -78
  215. arekit/contrib/utils/entities/formatters/str_rus_nocased_fmt.py +0 -15
  216. arekit/contrib/utils/entities/formatters/str_simple_fmt.py +0 -24
  217. arekit/contrib/utils/entities/formatters/str_simple_uppercase_fmt.py +0 -21
  218. arekit/contrib/utils/io_utils/opinions.py +0 -39
  219. arekit/contrib/utils/io_utils/samples.py +0 -78
  220. arekit/contrib/utils/lexicons/__init__.py +0 -0
  221. arekit/contrib/utils/lexicons/lexicon.py +0 -43
  222. arekit/contrib/utils/lexicons/relation.py +0 -45
  223. arekit/contrib/utils/lexicons/rusentilex.py +0 -34
  224. arekit/contrib/utils/nn/__init__.py +0 -0
  225. arekit/contrib/utils/nn/rows.py +0 -83
  226. arekit/contrib/utils/pipelines/items/sampling/bert.py +0 -5
  227. arekit/contrib/utils/pipelines/items/text/terms_splitter.py +0 -10
  228. arekit/contrib/utils/pipelines/items/to_output.py +0 -101
  229. arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
  230. arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
  231. arekit/contrib/utils/pipelines/sources/nerel/doc_ops.py +0 -27
  232. arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +0 -59
  233. arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +0 -60
  234. arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
  235. arekit/contrib/utils/pipelines/sources/nerel_bio/doc_ops.py +0 -29
  236. arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +0 -59
  237. arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +0 -79
  238. arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
  239. arekit/contrib/utils/pipelines/sources/ruattitudes/doc_ops.py +0 -56
  240. arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +0 -19
  241. arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +0 -58
  242. arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
  243. arekit/contrib/utils/pipelines/sources/rusentrel/doc_ops.py +0 -21
  244. arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +0 -100
  245. arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
  246. arekit/contrib/utils/pipelines/sources/sentinerel/doc_ops.py +0 -29
  247. arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +0 -62
  248. arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +0 -175
  249. arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +0 -50
  250. arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +0 -88
  251. arekit/contrib/utils/resources.py +0 -26
  252. arekit/contrib/utils/sources/__init__.py +0 -0
  253. arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
  254. arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
  255. arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +0 -63
  256. arekit/contrib/utils/utils_folding.py +0 -19
  257. arekit/download_data.py +0 -11
  258. arekit-0.23.1.dist-info/METADATA +0 -23
  259. arekit-0.23.1.dist-info/RECORD +0 -403
  260. /arekit/common/{data/row_ids → docs}/__init__.py +0 -0
  261. /arekit/common/{folding → docs/parsed}/__init__.py +0 -0
  262. /arekit/common/{news → docs/parsed/providers}/__init__.py +0 -0
  263. /arekit/common/{news → docs}/parsed/term_position.py +0 -0
  264. /arekit/common/{news/parsed → service}/__init__.py +0 -0
  265. /arekit/{common/news/parsed/providers → contrib/utils/data/doc_provider}/__init__.py +0 -0
  266. {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/LICENSE +0 -0
  267. {arekit-0.23.1.dist-info → arekit-0.25.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,68 @@
1
+ from arekit.common.entities.base import Entity
2
+ from arekit.common.docs.entity import DocumentEntity
3
+ from arekit.common.docs.parsed.base import ParsedDocument
4
+
5
+
6
+ class BaseParsedDocumentServiceProvider(object):
7
+
8
+ def __init__(self, entity_index_func=None):
9
+ """ Outside enity indexing function
10
+ entity_index_func: provides id for a given entity, i.e.
11
+ func(entity) -> int (id)
12
+ """
13
+ assert(callable(entity_index_func) or entity_index_func is None)
14
+ self._doc_entities = None
15
+ self.__entity_map = {}
16
+ self.__entity_index_func = entity_index_func
17
+
18
+ @property
19
+ def Name(self):
20
+ raise NotImplementedError()
21
+
22
+ def init_parsed_doc(self, parsed_doc):
23
+ assert(isinstance(parsed_doc, ParsedDocument))
24
+
25
+ def __iter_childs_and_root_node(entity):
26
+ """ Note: Entity has childs and we would like to iterate over childs
27
+ to conider them as well as keep the root Node.
28
+ """
29
+ # We first add childs.
30
+ for child_entity in entity.iter_childs():
31
+ yield child_entity, True
32
+
33
+ # Return Root node.
34
+ yield entity, False
35
+
36
+ self._doc_entities = []
37
+ self.__entity_map.clear()
38
+
39
+ current_id = 0
40
+ for _, entity in enumerate(parsed_doc.iter_entities()):
41
+
42
+ child_doc_entities = []
43
+ for tree_entity, is_child in __iter_childs_and_root_node(entity):
44
+
45
+ doc_entity = DocumentEntity(id_in_doc=current_id,
46
+ value=tree_entity.Value,
47
+ e_type=tree_entity.Type,
48
+ display_value=tree_entity.DisplayValue,
49
+ childs=None if is_child else child_doc_entities,
50
+ group_index=tree_entity.GroupIndex)
51
+ current_id += 1
52
+
53
+ if is_child:
54
+ child_doc_entities.append(doc_entity)
55
+
56
+ self._doc_entities.append(doc_entity)
57
+
58
+ if self.__entity_index_func is not None:
59
+ self.__entity_map[self.__entity_index_func(tree_entity)] = doc_entity
60
+
61
+ def get_document_entity(self, entity):
62
+ """ Maps entity to the related one with DocumentEntity type
63
+ """
64
+ assert(isinstance(entity, Entity))
65
+ return self.__entity_map[self.__entity_index_func(entity)]
66
+
67
+ def contains_entity(self, entity):
68
+ return self.__entity_index_func(entity) in self.__entity_map
@@ -1,8 +1,8 @@
1
1
  from arekit.common.labels.provider.base import BasePairLabelProvider
2
- from arekit.common.news.parsed.providers.base import BaseParsedNewsServiceProvider
2
+ from arekit.common.docs.parsed.providers.base import BaseParsedDocumentServiceProvider
3
3
 
4
4
 
5
- class BasePairProvider(BaseParsedNewsServiceProvider):
5
+ class BasePairProvider(BaseParsedDocumentServiceProvider):
6
6
 
7
7
  @property
8
8
  def Name(self):
@@ -1,10 +1,10 @@
1
1
  from enum import Enum
2
2
 
3
3
  from arekit.common.entities.base import Entity
4
- from arekit.common.news.entity import DocumentEntity
5
- from arekit.common.news.parsed.base import ParsedNews
6
- from arekit.common.news.parsed.providers.base import BaseParsedNewsServiceProvider
7
- from arekit.common.news.parsed.term_position import TermPositionTypes, TermPosition
4
+ from arekit.common.docs.entity import DocumentEntity
5
+ from arekit.common.docs.parsed.base import ParsedDocument
6
+ from arekit.common.docs.parsed.providers.base import BaseParsedDocumentServiceProvider
7
+ from arekit.common.docs.parsed.term_position import TermPositionTypes, TermPosition
8
8
  from arekit.common.text_opinions.base import TextOpinion
9
9
 
10
10
 
@@ -30,18 +30,19 @@ class DistanceType(Enum):
30
30
  return TermPositionTypes.SentenceIndex
31
31
 
32
32
 
33
- class EntityServiceProvider(BaseParsedNewsServiceProvider):
33
+ class EntityServiceProvider(BaseParsedDocumentServiceProvider):
34
34
  """ This class provides a helper functions for TextOpinions, which become a part of TextOpinionCollection.
35
35
  The latter is important because of the dependency from Owner.
36
36
  We utilize 'extract' prefix in methods to emphasize that these are methods of helper.
37
37
 
38
38
  Wrapper over:
39
- parsed news, positions, text_opinions
39
+ parsed doc, positions, text_opinions
40
40
  """
41
41
 
42
42
  NAME = "entity-service-provider"
43
43
 
44
44
  def __init__(self, entity_index_func):
45
+ assert(callable(entity_index_func))
45
46
  super(EntityServiceProvider, self).__init__(entity_index_func=entity_index_func)
46
47
  # Initialize API.
47
48
  self.__iter_raw_terms_func = None
@@ -52,11 +53,11 @@ class EntityServiceProvider(BaseParsedNewsServiceProvider):
52
53
  def Name(self):
53
54
  return self.NAME
54
55
 
55
- def init_parsed_news(self, parsed_news):
56
- super(EntityServiceProvider, self).init_parsed_news(parsed_news)
57
- assert(isinstance(parsed_news, ParsedNews))
58
- self.__iter_raw_terms_func = lambda: parsed_news.iter_terms(filter_func=None, term_only=False)
59
- self.__init_entity_positions()
56
+ def init_parsed_doc(self, parsed_doc):
57
+ super(EntityServiceProvider, self).init_parsed_doc(parsed_doc)
58
+ assert(isinstance(parsed_doc, ParsedDocument))
59
+ self.__iter_raw_terms_func = lambda: parsed_doc.iter_terms(filter_func=None, term_only=False)
60
+ self.__entity_positions = self.__calculate_entity_positions()
60
61
 
61
62
  # region public 'extract' methods
62
63
 
@@ -146,25 +147,29 @@ class EntityServiceProvider(BaseParsedNewsServiceProvider):
146
147
  assert(end_type == EntityEndType.Source or end_type == EntityEndType.Target)
147
148
  return text_opinion.SourceId if end_type == EntityEndType.Source else text_opinion.TargetId
148
149
 
149
- def __init_entity_positions(self):
150
- self.__entity_positions = self.__calculate_entity_positions()
151
-
152
150
  def __calculate_entity_positions(self):
153
151
  """ Note: here we consider the same order as in self._entities.
154
152
  """
155
- positions = []
156
- t_ind_in_doc = 0
153
+ t_ind_in_doc = -1
157
154
 
155
+ positions = {}
158
156
  for s_ind, t_ind_in_sent, term in self.__iter_raw_terms_func():
159
157
 
160
- if isinstance(term, Entity):
161
- position = TermPosition(term_ind_in_doc=t_ind_in_doc,
162
- term_ind_in_sent=t_ind_in_sent,
163
- s_ind=s_ind)
164
- positions.append(position)
165
-
166
158
  t_ind_in_doc += 1
167
159
 
160
+ if not isinstance(term, Entity):
161
+ continue
162
+
163
+ # We consider that entities within a single tree has the same positions.
164
+ for tree_entity in list(term.iter_childs()) + [term]:
165
+
166
+ key = self.get_document_entity(tree_entity).IdInDocument
167
+ assert(key not in positions)
168
+
169
+ positions[key] = TermPosition(term_ind_in_doc=t_ind_in_doc,
170
+ term_ind_in_sent=t_ind_in_sent,
171
+ s_ind=s_ind)
172
+
168
173
  return positions
169
174
 
170
175
  # endregion
@@ -1,5 +1,5 @@
1
1
  from arekit.common.entities.base import Entity
2
- from arekit.common.news.parsed.providers.base_pairs import BasePairProvider
2
+ from arekit.common.docs.parsed.providers.base_pairs import BasePairProvider
3
3
  from arekit.common.opinions.base import Opinion
4
4
 
5
5
 
@@ -17,4 +17,4 @@ class OpinionPairsProvider(BasePairProvider):
17
17
 
18
18
  return Opinion(source_value=source_entity.Value,
19
19
  target_value=target_entity.Value,
20
- sentiment=label)
20
+ label=label)
@@ -1,8 +1,8 @@
1
1
  import logging
2
2
 
3
3
  from arekit.common.entities.collection import EntityCollection
4
- from arekit.common.news.entity import DocumentEntity
5
- from arekit.common.news.parsed.providers.base_pairs import BasePairProvider
4
+ from arekit.common.docs.entity import DocumentEntity
5
+ from arekit.common.docs.parsed.providers.base_pairs import BasePairProvider
6
6
  from arekit.common.opinions.base import Opinion
7
7
  from arekit.common.text_opinions.base import TextOpinion
8
8
  from arekit.common.labels.provider.constant import ConstantLabelProvider
@@ -36,9 +36,9 @@ class TextOpinionPairsProvider(BasePairProvider):
36
36
  label=label,
37
37
  text_opinion_id=None)
38
38
 
39
- def init_parsed_news(self, parsed_news):
40
- super(TextOpinionPairsProvider, self).init_parsed_news(parsed_news)
41
- self.__doc_id = parsed_news.RelatedDocID
39
+ def init_parsed_doc(self, parsed_doc):
40
+ super(TextOpinionPairsProvider, self).init_parsed_doc(parsed_doc)
41
+ self.__doc_id = parsed_doc.RelatedDocID
42
42
  self.__entities_collection = EntityCollection(
43
43
  entities=list(self._doc_entities),
44
44
  value_to_group_id_func=self.__value_to_group_id_func)
@@ -68,7 +68,7 @@ class TextOpinionPairsProvider(BasePairProvider):
68
68
  return
69
69
  yield
70
70
 
71
- label_provider = ConstantLabelProvider(label_instance=opinion.Sentiment)
71
+ label_provider = ConstantLabelProvider(label_instance=opinion.Label)
72
72
 
73
73
  pairs_it = self._iter_from_entities(src_entity_doc_ids=list(map(lambda e: e.IdInDocument, source_entities)),
74
74
  tgt_entity_doc_ids=list(map(lambda e: e.IdInDocument, target_entities)),
@@ -0,0 +1,31 @@
1
+ from arekit.common.docs.parsed.base import ParsedDocument
2
+ from arekit.common.docs.parsed.providers.base import BaseParsedDocumentServiceProvider
3
+
4
+
5
+ class ParsedDocumentService(object):
6
+ """ Represents a collection of providers, combined with the parsed doc.
7
+ """
8
+
9
+ def __init__(self, parsed_doc, providers):
10
+ assert(isinstance(parsed_doc, ParsedDocument))
11
+ assert(isinstance(providers, list))
12
+ self.__parsed_doc = parsed_doc
13
+ self.__providers = {}
14
+
15
+ for provider in providers:
16
+ assert(isinstance(provider, BaseParsedDocumentServiceProvider))
17
+ assert(provider.Name not in self.__providers)
18
+
19
+ # Link provider with the related name.
20
+ self.__providers[provider.Name] = provider
21
+
22
+ # Post initialize with the related parsed doc.
23
+ provider.init_parsed_doc(self.__parsed_doc)
24
+
25
+
26
+ @property
27
+ def ParsedDocument(self):
28
+ return self.__parsed_doc
29
+
30
+ def get_provider(self, name):
31
+ return self.__providers[name]
@@ -0,0 +1,66 @@
1
+ from tqdm import tqdm
2
+ from arekit.common.docs.base import Document
3
+ from arekit.common.docs.parsed.base import ParsedDocument
4
+ from arekit.common.pipeline.base import BasePipelineLauncher
5
+ from arekit.common.pipeline.batching import BatchingPipelineLauncher
6
+ from arekit.common.pipeline.context import PipelineContext
7
+ from arekit.common.pipeline.utils import BatchIterator
8
+ from arekit.common.text.parsed import BaseParsedText
9
+
10
+
11
+ class DocumentParsers(object):
12
+
13
+ @staticmethod
14
+ def parse(doc, pipeline_items, parent_ppl_ctx=None, src_key="input", show_progress=False):
15
+ """ This document parser is based on single text parts (sentences)
16
+ that passes sequentially through the pipeline of transformations.
17
+ """
18
+ assert(isinstance(doc, Document))
19
+ assert(isinstance(pipeline_items, list))
20
+ assert(isinstance(parent_ppl_ctx, PipelineContext) or parent_ppl_ctx is None)
21
+
22
+ parsed_sentences = []
23
+
24
+ data_it = range(doc.SentencesCount)
25
+ progress_it = tqdm(data_it, disable=not show_progress)
26
+
27
+ for sent_ind in progress_it:
28
+
29
+ # Composing the context from a single sentence.
30
+ ctx = PipelineContext({src_key: doc.get_sentence(sent_ind)}, parent_ctx=parent_ppl_ctx)
31
+
32
+ # Apply all the operations.
33
+ BasePipelineLauncher.run(pipeline=pipeline_items, pipeline_ctx=ctx, src_key=src_key)
34
+
35
+ # Collecting the result.
36
+ parsed_sentences.append(BaseParsedText(terms=ctx.provide("result")))
37
+
38
+ return ParsedDocument(doc_id=doc.ID, parsed_sentences=parsed_sentences)
39
+
40
+ @staticmethod
41
+ def parse_batch(doc, pipeline_items, batch_size, parent_ppl_ctx=None, src_key="input", show_progress=False):
42
+ """ This document parser is based on batch of sentences.
43
+ """
44
+ assert(isinstance(batch_size, int) and batch_size > 0)
45
+ assert(isinstance(doc, Document))
46
+ assert(isinstance(pipeline_items, list))
47
+ assert(isinstance(parent_ppl_ctx, PipelineContext) or parent_ppl_ctx is None)
48
+
49
+ parsed_sentences = []
50
+
51
+ data_it = BatchIterator(data_iter=iter(range(doc.SentencesCount)), batch_size=batch_size)
52
+ progress_it = tqdm(data_it, total=round(doc.SentencesCount / batch_size), disable=not show_progress)
53
+
54
+ for batch in progress_it:
55
+
56
+ # Composing the context from a single sentence.
57
+ ctx = PipelineContext({src_key: [doc.get_sentence(s_ind) for s_ind in batch]},
58
+ parent_ctx=parent_ppl_ctx)
59
+
60
+ # Apply all the operations.
61
+ BatchingPipelineLauncher.run(pipeline=pipeline_items, pipeline_ctx=ctx, src_key=src_key)
62
+
63
+ # Collecting the result.
64
+ parsed_sentences += [BaseParsedText(terms=result) for result in ctx.provide("result")]
65
+
66
+ return ParsedDocument(doc_id=doc.ID, parsed_sentences=parsed_sentences)
@@ -1,5 +1,5 @@
1
1
 
2
- class BaseNewsSentence(object):
2
+ class BaseDocumentSentence(object):
3
3
 
4
4
  def __init__(self, text):
5
5
  self.__text = text
@@ -1,14 +1,16 @@
1
1
  class Entity(object):
2
2
 
3
- def __init__(self, value, e_type, display_value=None, group_index=None):
3
+ def __init__(self, value, e_type, childs=None, display_value=None, group_index=None):
4
4
  assert(isinstance(value, str) and len(value) > 0)
5
5
  assert(isinstance(e_type, str) or e_type is None)
6
6
  assert(isinstance(display_value, str) or display_value is None)
7
7
  assert(isinstance(group_index, int) or group_index is None)
8
- self.__value = value.lower()
8
+ assert(isinstance(childs, list) or childs is None)
9
+ self.__value = value
9
10
  self.__type = e_type
10
11
  self.__display_value = display_value
11
12
  self.__group_index = group_index
13
+ self.__childs = childs
12
14
 
13
15
  @property
14
16
  def GroupIndex(self):
@@ -40,3 +42,10 @@ class Entity(object):
40
42
  assert(isinstance(value, int) and value >= -1)
41
43
  assert(self.__group_index is None)
42
44
  self.__group_index = value
45
+
46
+ def iter_childs(self):
47
+ if self.__childs is None:
48
+ return
49
+ yield
50
+ for child in self.__childs:
51
+ yield child
@@ -14,7 +14,7 @@ class BaseSamplesIO(object):
14
14
  """
15
15
  raise NotImplementedError()
16
16
 
17
- def create_target(self, data_type, data_folding):
17
+ def create_target(self, data_type):
18
18
  """ Path for reaiding/viewing
19
19
  """
20
20
  raise NotImplementedError()
@@ -1,4 +1,4 @@
1
- import collections
1
+ from collections.abc import Iterable
2
2
  from arekit.common.frames.variants.base import FrameVariant
3
3
 
4
4
 
@@ -23,7 +23,7 @@ class FrameVariantsCollection(object):
23
23
  # region public methods
24
24
 
25
25
  def fill_from_iterable(self, variants_with_id, overwrite_existed_variant, raise_error_on_existed_variant):
26
- assert(isinstance(variants_with_id, collections.Iterable))
26
+ assert(isinstance(variants_with_id, Iterable))
27
27
  assert(isinstance(overwrite_existed_variant, bool))
28
28
  assert(isinstance(raise_error_on_existed_variant, bool))
29
29
  assert(len(self.__variants) == 0)
@@ -1,10 +1,10 @@
1
- import collections
1
+ from collections.abc import Iterable
2
2
 
3
3
 
4
4
  class LinkedDataWrapper(object):
5
5
 
6
6
  def __init__(self, linked_data):
7
- assert(isinstance(linked_data, collections.Iterable))
7
+ assert(isinstance(linked_data, Iterable))
8
8
  self.__linked_data = list(linked_data)
9
9
  self.__tag = None
10
10
 
@@ -0,0 +1,23 @@
1
+ from arekit.common.linkage.base import LinkedDataWrapper
2
+
3
+
4
+ class MetaEmptyLinkedDataWrapper(LinkedDataWrapper):
5
+ """ This is a placeholder data-wrapper utilized for passing system information
6
+ while iterating through the data pipelines.
7
+ """
8
+
9
+ def __init__(self, doc_id, meta_data=None):
10
+ """ meta_data:
11
+ optional parameter which serves any information need in further.
12
+ """
13
+ super(MetaEmptyLinkedDataWrapper, self).__init__([])
14
+ self.__doc_id = doc_id
15
+ self.__meta_data = meta_data
16
+
17
+ @property
18
+ def RelatedDocID(self):
19
+ return self.__doc_id
20
+
21
+ @property
22
+ def MetaData(self):
23
+ return self.__meta_data
@@ -6,4 +6,4 @@ class OpinionsLinkage(LinkedDataWrapper):
6
6
 
7
7
  def _get_data_label(self, item):
8
8
  assert(isinstance(item, Opinion))
9
- return item.Sentiment
9
+ return item.Label
@@ -15,8 +15,8 @@ class TextOpinionsLinkage(LinkedDataWrapper):
15
15
  return self.First.DocID
16
16
 
17
17
  def get_linked_label(self):
18
- return self.First.Sentiment
18
+ return self.First.Label
19
19
 
20
20
  def _get_data_label(self, item):
21
21
  assert(isinstance(item, TextOpinion))
22
- return item.Sentiment
22
+ return item.Label
@@ -1,4 +1,4 @@
1
1
  class BaseOpinionAnnotationAlgorithm(object):
2
2
 
3
- def iter_opinions(self, parsed_news, existed_opinions=None):
3
+ def iter_opinions(self, parsed_doc, existed_opinions=None):
4
4
  pass
@@ -1,9 +1,9 @@
1
1
  from arekit.common.entities.types import OpinionEntityType
2
2
  from arekit.common.labels.provider.base import BasePairLabelProvider
3
- from arekit.common.news.entity import DocumentEntity
4
- from arekit.common.news.parsed.base import ParsedNews
5
- from arekit.common.news.parsed.providers.entity_service import EntityServiceProvider, DistanceType
6
- from arekit.common.news.parsed.providers.opinion_pairs import OpinionPairsProvider
3
+ from arekit.common.docs.entity import DocumentEntity
4
+ from arekit.common.docs.parsed.base import ParsedDocument
5
+ from arekit.common.docs.parsed.providers.entity_service import EntityServiceProvider, DistanceType
6
+ from arekit.common.docs.parsed.providers.opinion_pairs import OpinionPairsProvider
7
7
  from arekit.common.opinions.annot.algo.base import BaseOpinionAnnotationAlgorithm
8
8
  from arekit.common.opinions.base import Opinion
9
9
 
@@ -16,7 +16,8 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
16
16
  [1] Extracting Sentiment Attitudes from Analytical Texts https://arxiv.org/pdf/1808.08932.pdf
17
17
  """
18
18
 
19
- def __init__(self, dist_in_terms_bound, label_provider, dist_in_sents=0, is_entity_ignored_func=None):
19
+ def __init__(self, dist_in_terms_bound, label_provider, entity_index_func, dist_in_sents=0,
20
+ is_entity_ignored_func=None):
20
21
  """
21
22
  dist_in_terms_bound: int
22
23
  max allowed distance in term (less than passed value)
@@ -25,6 +26,7 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
25
26
  """
26
27
  assert(isinstance(dist_in_terms_bound, int) or dist_in_terms_bound is None)
27
28
  assert(isinstance(label_provider, BasePairLabelProvider))
29
+ assert(callable(entity_index_func))
28
30
  assert(isinstance(dist_in_sents, int))
29
31
  assert(callable(is_entity_ignored_func) or is_entity_ignored_func is None)
30
32
 
@@ -32,6 +34,7 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
32
34
  self.__dist_in_terms_bound = dist_in_terms_bound
33
35
  self.__dist_in_sents = dist_in_sents
34
36
  self.__is_entity_ignored_func = is_entity_ignored_func
37
+ self.__entity_index_func = entity_index_func
35
38
 
36
39
  # region private methods
37
40
 
@@ -68,7 +71,7 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
68
71
  if existed_opinions is not None:
69
72
  o = Opinion(source_value=e1.Value,
70
73
  target_value=e2.Value,
71
- sentiment=self.__label_provider.provide(source=e1, target=e2))
74
+ label=self.__label_provider.provide(source=e1, target=e2))
72
75
  if existed_opinions.has_synonymous_opinion(opinion=o):
73
76
  return
74
77
 
@@ -76,8 +79,8 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
76
79
 
77
80
  # endregion
78
81
 
79
- def iter_opinions(self, parsed_news, existed_opinions=None):
80
- assert(isinstance(parsed_news, ParsedNews))
82
+ def iter_opinions(self, parsed_doc, existed_opinions=None):
83
+ assert(isinstance(parsed_doc, ParsedDocument))
81
84
 
82
85
  def __filter_pair_func(e1, e2):
83
86
  key = self.__try_create_pair_key(entity_service=entity_service_provider,
@@ -87,11 +90,10 @@ class PairBasedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
87
90
  return key is not None
88
91
 
89
92
  # Initialize providers.
90
- # TODO. Provide here service #245 issue.
91
- opinions_provider = OpinionPairsProvider(entity_index_func=None)
92
- entity_service_provider = EntityServiceProvider(entity_index_func=None)
93
- opinions_provider.init_parsed_news(parsed_news)
94
- entity_service_provider.init_parsed_news(parsed_news)
93
+ opinions_provider = OpinionPairsProvider(entity_index_func=self.__entity_index_func)
94
+ entity_service_provider = EntityServiceProvider(entity_index_func=self.__entity_index_func)
95
+ opinions_provider.init_parsed_doc(parsed_doc)
96
+ entity_service_provider.init_parsed_doc(parsed_doc)
95
97
 
96
98
  return opinions_provider.iter_from_all(label_provider=self.__label_provider,
97
99
  filter_func=__filter_pair_func)
@@ -1,4 +1,4 @@
1
- from arekit.common.news.parsed.base import ParsedNews
1
+ from arekit.common.docs.parsed.base import ParsedDocument
2
2
  from arekit.common.opinions.annot.algo.base import BaseOpinionAnnotationAlgorithm
3
3
 
4
4
 
@@ -11,6 +11,6 @@ class PredefinedOpinionAnnotationAlgorithm(BaseOpinionAnnotationAlgorithm):
11
11
  assert(callable(get_opinions_by_doc_id_func))
12
12
  self.__get_opinions_by_doc_id_func = get_opinions_by_doc_id_func
13
13
 
14
- def iter_opinions(self, parsed_news, existed_opinions=None):
15
- assert(isinstance(parsed_news, ParsedNews))
16
- return self.__get_opinions_by_doc_id_func(parsed_news.RelatedDocID)
14
+ def iter_opinions(self, parsed_doc, existed_opinions=None):
15
+ assert(isinstance(parsed_doc, ParsedDocument))
16
+ return self.__get_opinions_by_doc_id_func(parsed_doc.RelatedDocID)
@@ -1,6 +1,6 @@
1
1
  import logging
2
2
 
3
- from arekit.common.news.parsed.base import ParsedNews
3
+ from arekit.common.docs.parsed.base import ParsedDocument
4
4
  from arekit.common.opinions.annot.algo.base import BaseOpinionAnnotationAlgorithm
5
5
  from arekit.common.opinions.annot.base import BaseOpinionAnnotator
6
6
  from arekit.common.opinions.collection import OpinionCollection
@@ -31,14 +31,14 @@ class AlgorithmBasedOpinionAnnotator(BaseOpinionAnnotator):
31
31
 
32
32
  # region private methods
33
33
 
34
- def _annot_collection_core(self, parsed_news):
35
- assert(isinstance(parsed_news, ParsedNews))
34
+ def _annot_collection_core(self, parsed_doc):
35
+ assert(isinstance(parsed_doc, ParsedDocument))
36
36
 
37
- opinions = self.__get_existed_opinions_func(parsed_news.RelatedDocID)
37
+ opinions = self.__get_existed_opinions_func(parsed_doc.RelatedDocID)
38
38
  assert(isinstance(opinions, OpinionCollection) or opinions is None)
39
39
 
40
40
  annotated_opinions_it = self.__annot_algo.iter_opinions(
41
- parsed_news=parsed_news, existed_opinions=opinions)
41
+ parsed_doc=parsed_doc, existed_opinions=opinions)
42
42
 
43
43
  collection = self.__create_empty_collection_func()
44
44
  assert(isinstance(collection, OpinionCollection))
@@ -4,12 +4,12 @@ class BaseOpinionAnnotator(object):
4
4
  using OpinOps and DocOps API.
5
5
  """
6
6
 
7
- def _annot_collection_core(self, parsed_news):
7
+ def _annot_collection_core(self, parsed_doc):
8
8
  raise NotImplementedError
9
9
 
10
10
  # region public methods
11
11
 
12
- def annotate_collection(self, parsed_news):
13
- return self._annot_collection_core(parsed_news=parsed_news)
12
+ def annotate_collection(self, parsed_doc):
13
+ return self._annot_collection_core(parsed_doc=parsed_doc)
14
14
 
15
15
  # endregion
@@ -7,13 +7,13 @@ class Opinion(object):
7
7
  """ Source opinion description
8
8
  """
9
9
 
10
- def __init__(self, source_value, target_value, sentiment):
10
+ def __init__(self, source_value, target_value, label):
11
11
  assert(isinstance(source_value, str))
12
12
  assert(isinstance(target_value, str))
13
- assert(isinstance(sentiment, Label))
14
- self.__source_value = source_value.lower()
15
- self.__target_value = target_value.lower()
16
- self.__sentiment = sentiment
13
+ assert(isinstance(label, Label))
14
+ self.__source_value = source_value
15
+ self.__target_value = target_value
16
+ self.__label = label
17
17
  self.__tag = None
18
18
 
19
19
  # region properties
@@ -27,8 +27,8 @@ class Opinion(object):
27
27
  return self.__target_value
28
28
 
29
29
  @property
30
- def Sentiment(self):
31
- return self.__sentiment
30
+ def Label(self):
31
+ return self.__label
32
32
 
33
33
  @property
34
34
  def Tag(self):
@@ -1,4 +1,4 @@
1
- import collections
1
+ from collections.abc import Iterable
2
2
 
3
3
  from arekit.common import log_utils
4
4
  from arekit.common.labels.base import Label
@@ -23,7 +23,7 @@ class OpinionCollection(object):
23
23
  raise_exception_on_duplicates: bool
24
24
  denotes whether there is a need to fire exception for duplicates in opinions list.
25
25
  """
26
- assert(isinstance(opinions, collections.Iterable) or isinstance(opinions, type(None)))
26
+ assert(isinstance(opinions, Iterable) or isinstance(opinions, type(None)))
27
27
  assert(isinstance(synonyms, SynonymsCollection))
28
28
  assert(isinstance(error_on_duplicates, bool))
29
29
  assert(isinstance(error_on_synonym_end_missed, bool))
@@ -76,7 +76,7 @@ class OpinionCollection(object):
76
76
  f_o = self.__by_synonyms[s_id]
77
77
  if label is None:
78
78
  return f_o
79
- elif f_o.sentiment == label:
79
+ elif f_o.Label == label:
80
80
  return f_o
81
81
  else:
82
82
  return None