arekit 0.24.0__py3-none-any.whl → 0.25.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (224) hide show
  1. arekit/common/data/storages/base.py +4 -15
  2. arekit/common/docs/entities_grouping.py +2 -1
  3. arekit/common/docs/parser.py +27 -22
  4. arekit/common/pipeline/base.py +12 -16
  5. arekit/common/pipeline/batching.py +28 -0
  6. arekit/common/pipeline/context.py +5 -1
  7. arekit/common/pipeline/items/base.py +39 -2
  8. arekit/common/pipeline/items/flatten.py +5 -1
  9. arekit/common/pipeline/items/handle.py +2 -1
  10. arekit/common/pipeline/items/iter.py +2 -1
  11. arekit/common/pipeline/items/map.py +2 -1
  12. arekit/common/pipeline/items/map_nested.py +4 -0
  13. arekit/common/pipeline/utils.py +32 -0
  14. arekit/common/text/{partitioning/str.py → partitioning.py} +14 -9
  15. arekit/common/utils.py +11 -52
  16. arekit/contrib/utils/data/contents/opinions.py +13 -3
  17. arekit/contrib/utils/data/storages/jsonl_based.py +2 -1
  18. arekit/contrib/utils/data/storages/pandas_based.py +2 -17
  19. arekit/contrib/utils/data/storages/row_cache.py +8 -2
  20. arekit/contrib/utils/data/storages/sqlite_based.py +18 -0
  21. arekit/contrib/utils/io_utils/utils.py +1 -18
  22. arekit/contrib/utils/pipelines/items/text/entities_default.py +2 -2
  23. arekit/contrib/utils/pipelines/items/text/frames.py +2 -3
  24. arekit/contrib/utils/pipelines/text_opinion/extraction.py +8 -10
  25. arekit-0.25.1.data/data/logo.png +0 -0
  26. arekit-0.25.1.dist-info/METADATA +81 -0
  27. arekit-0.25.1.dist-info/RECORD +186 -0
  28. {arekit-0.24.0.dist-info → arekit-0.25.1.dist-info}/WHEEL +1 -1
  29. arekit/common/data/input/repositories/__init__.py +0 -0
  30. arekit/common/data/input/repositories/base.py +0 -68
  31. arekit/common/data/input/repositories/sample.py +0 -22
  32. arekit/common/data/views/__init__.py +0 -0
  33. arekit/common/data/views/samples.py +0 -26
  34. arekit/common/docs/objects_parser.py +0 -37
  35. arekit/common/text/parser.py +0 -12
  36. arekit/common/text/partitioning/__init__.py +0 -0
  37. arekit/common/text/partitioning/base.py +0 -4
  38. arekit/common/text/partitioning/terms.py +0 -35
  39. arekit/contrib/networks/__init__.py +0 -0
  40. arekit/contrib/networks/embedding.py +0 -149
  41. arekit/contrib/networks/embedding_io.py +0 -18
  42. arekit/contrib/networks/input/__init__.py +0 -0
  43. arekit/contrib/networks/input/const.py +0 -6
  44. arekit/contrib/networks/input/ctx_serialization.py +0 -28
  45. arekit/contrib/networks/input/embedding/__init__.py +0 -0
  46. arekit/contrib/networks/input/embedding/matrix.py +0 -29
  47. arekit/contrib/networks/input/embedding/offsets.py +0 -55
  48. arekit/contrib/networks/input/formatters/__init__.py +0 -0
  49. arekit/contrib/networks/input/formatters/pos_mapper.py +0 -22
  50. arekit/contrib/networks/input/providers/__init__.py +0 -0
  51. arekit/contrib/networks/input/providers/sample.py +0 -129
  52. arekit/contrib/networks/input/providers/term_connotation.py +0 -23
  53. arekit/contrib/networks/input/providers/text.py +0 -24
  54. arekit/contrib/networks/input/rows_parser.py +0 -47
  55. arekit/contrib/networks/input/term_types.py +0 -13
  56. arekit/contrib/networks/input/terms_mapping.py +0 -60
  57. arekit/contrib/networks/vectorizer.py +0 -6
  58. arekit/contrib/source/__init__.py +0 -0
  59. arekit/contrib/source/brat/__init__.py +0 -0
  60. arekit/contrib/source/brat/annot.py +0 -84
  61. arekit/contrib/source/brat/doc.py +0 -28
  62. arekit/contrib/source/brat/entities/__init__.py +0 -0
  63. arekit/contrib/source/brat/entities/compound.py +0 -13
  64. arekit/contrib/source/brat/entities/entity.py +0 -42
  65. arekit/contrib/source/brat/entities/parser.py +0 -53
  66. arekit/contrib/source/brat/opinions/__init__.py +0 -0
  67. arekit/contrib/source/brat/opinions/converter.py +0 -19
  68. arekit/contrib/source/brat/relation.py +0 -32
  69. arekit/contrib/source/brat/sentence.py +0 -69
  70. arekit/contrib/source/brat/sentences_reader.py +0 -128
  71. arekit/contrib/source/download.py +0 -41
  72. arekit/contrib/source/nerel/__init__.py +0 -0
  73. arekit/contrib/source/nerel/entities.py +0 -55
  74. arekit/contrib/source/nerel/folding/__init__.py +0 -0
  75. arekit/contrib/source/nerel/folding/fixed.py +0 -74
  76. arekit/contrib/source/nerel/io_utils.py +0 -62
  77. arekit/contrib/source/nerel/labels.py +0 -241
  78. arekit/contrib/source/nerel/reader.py +0 -46
  79. arekit/contrib/source/nerel/utils.py +0 -24
  80. arekit/contrib/source/nerel/versions.py +0 -12
  81. arekit/contrib/source/nerelbio/__init__.py +0 -0
  82. arekit/contrib/source/nerelbio/io_utils.py +0 -62
  83. arekit/contrib/source/nerelbio/labels.py +0 -265
  84. arekit/contrib/source/nerelbio/reader.py +0 -8
  85. arekit/contrib/source/nerelbio/versions.py +0 -8
  86. arekit/contrib/source/ruattitudes/__init__.py +0 -0
  87. arekit/contrib/source/ruattitudes/collection.py +0 -36
  88. arekit/contrib/source/ruattitudes/doc.py +0 -51
  89. arekit/contrib/source/ruattitudes/doc_brat.py +0 -44
  90. arekit/contrib/source/ruattitudes/entity/__init__.py +0 -0
  91. arekit/contrib/source/ruattitudes/entity/parser.py +0 -7
  92. arekit/contrib/source/ruattitudes/io_utils.py +0 -56
  93. arekit/contrib/source/ruattitudes/labels_fmt.py +0 -12
  94. arekit/contrib/source/ruattitudes/opinions/__init__.py +0 -0
  95. arekit/contrib/source/ruattitudes/opinions/base.py +0 -28
  96. arekit/contrib/source/ruattitudes/opinions/converter.py +0 -37
  97. arekit/contrib/source/ruattitudes/reader.py +0 -268
  98. arekit/contrib/source/ruattitudes/sentence.py +0 -73
  99. arekit/contrib/source/ruattitudes/synonyms.py +0 -17
  100. arekit/contrib/source/ruattitudes/text_object.py +0 -59
  101. arekit/contrib/source/rusentiframes/__init__.py +0 -0
  102. arekit/contrib/source/rusentiframes/collection.py +0 -157
  103. arekit/contrib/source/rusentiframes/effect.py +0 -24
  104. arekit/contrib/source/rusentiframes/io_utils.py +0 -19
  105. arekit/contrib/source/rusentiframes/labels_fmt.py +0 -22
  106. arekit/contrib/source/rusentiframes/polarity.py +0 -35
  107. arekit/contrib/source/rusentiframes/role.py +0 -15
  108. arekit/contrib/source/rusentiframes/state.py +0 -24
  109. arekit/contrib/source/rusentiframes/types.py +0 -42
  110. arekit/contrib/source/rusentiframes/value.py +0 -2
  111. arekit/contrib/source/rusentrel/__init__.py +0 -0
  112. arekit/contrib/source/rusentrel/const.py +0 -3
  113. arekit/contrib/source/rusentrel/docs_reader.py +0 -51
  114. arekit/contrib/source/rusentrel/entities.py +0 -26
  115. arekit/contrib/source/rusentrel/io_utils.py +0 -125
  116. arekit/contrib/source/rusentrel/labels_fmt.py +0 -12
  117. arekit/contrib/source/rusentrel/opinions/__init__.py +0 -0
  118. arekit/contrib/source/rusentrel/opinions/collection.py +0 -30
  119. arekit/contrib/source/rusentrel/opinions/converter.py +0 -40
  120. arekit/contrib/source/rusentrel/opinions/provider.py +0 -54
  121. arekit/contrib/source/rusentrel/opinions/writer.py +0 -42
  122. arekit/contrib/source/rusentrel/synonyms.py +0 -17
  123. arekit/contrib/source/sentinerel/__init__.py +0 -0
  124. arekit/contrib/source/sentinerel/entities.py +0 -52
  125. arekit/contrib/source/sentinerel/folding/__init__.py +0 -0
  126. arekit/contrib/source/sentinerel/folding/factory.py +0 -31
  127. arekit/contrib/source/sentinerel/folding/fixed.py +0 -70
  128. arekit/contrib/source/sentinerel/io_utils.py +0 -87
  129. arekit/contrib/source/sentinerel/labels.py +0 -53
  130. arekit/contrib/source/sentinerel/labels_scaler.py +0 -30
  131. arekit/contrib/source/sentinerel/reader.py +0 -42
  132. arekit/contrib/source/synonyms/__init__.py +0 -0
  133. arekit/contrib/source/synonyms/utils.py +0 -19
  134. arekit/contrib/source/zip_utils.py +0 -47
  135. arekit/contrib/utils/connotations/__init__.py +0 -0
  136. arekit/contrib/utils/connotations/rusentiframes_sentiment.py +0 -23
  137. arekit/contrib/utils/data/readers/__init__.py +0 -0
  138. arekit/contrib/utils/data/readers/base.py +0 -7
  139. arekit/contrib/utils/data/readers/csv_pd.py +0 -38
  140. arekit/contrib/utils/data/readers/jsonl.py +0 -15
  141. arekit/contrib/utils/data/service/__init__.py +0 -0
  142. arekit/contrib/utils/data/service/balance.py +0 -50
  143. arekit/contrib/utils/data/writers/csv_native.py +0 -63
  144. arekit/contrib/utils/data/writers/csv_pd.py +0 -40
  145. arekit/contrib/utils/data/writers/json_opennre.py +0 -132
  146. arekit/contrib/utils/data/writers/sqlite_native.py +0 -110
  147. arekit/contrib/utils/download.py +0 -77
  148. arekit/contrib/utils/embeddings/__init__.py +0 -0
  149. arekit/contrib/utils/embeddings/rusvectores.py +0 -58
  150. arekit/contrib/utils/embeddings/tokens.py +0 -30
  151. arekit/contrib/utils/io_utils/embedding.py +0 -72
  152. arekit/contrib/utils/io_utils/opinions.py +0 -37
  153. arekit/contrib/utils/io_utils/samples.py +0 -79
  154. arekit/contrib/utils/lexicons/__init__.py +0 -0
  155. arekit/contrib/utils/lexicons/lexicon.py +0 -41
  156. arekit/contrib/utils/lexicons/relation.py +0 -42
  157. arekit/contrib/utils/lexicons/rusentilex.py +0 -37
  158. arekit/contrib/utils/nn/__init__.py +0 -0
  159. arekit/contrib/utils/nn/rows.py +0 -83
  160. arekit/contrib/utils/np_utils/__init__.py +0 -0
  161. arekit/contrib/utils/np_utils/embedding.py +0 -22
  162. arekit/contrib/utils/np_utils/npz_utils.py +0 -13
  163. arekit/contrib/utils/np_utils/vocab.py +0 -20
  164. arekit/contrib/utils/pipelines/items/sampling/__init__.py +0 -0
  165. arekit/contrib/utils/pipelines/items/sampling/base.py +0 -99
  166. arekit/contrib/utils/pipelines/items/sampling/networks.py +0 -54
  167. arekit/contrib/utils/pipelines/items/text/frames_lemmatized.py +0 -36
  168. arekit/contrib/utils/pipelines/items/text/frames_negation.py +0 -32
  169. arekit/contrib/utils/pipelines/items/text/terms_splitter.py +0 -10
  170. arekit/contrib/utils/pipelines/items/text/tokenizer.py +0 -107
  171. arekit/contrib/utils/pipelines/items/text/translator.py +0 -135
  172. arekit/contrib/utils/pipelines/sources/__init__.py +0 -0
  173. arekit/contrib/utils/pipelines/sources/nerel/__init__.py +0 -0
  174. arekit/contrib/utils/pipelines/sources/nerel/doc_provider.py +0 -27
  175. arekit/contrib/utils/pipelines/sources/nerel/extract_text_relations.py +0 -65
  176. arekit/contrib/utils/pipelines/sources/nerel/labels_fmt.py +0 -60
  177. arekit/contrib/utils/pipelines/sources/nerel_bio/__init__.py +0 -0
  178. arekit/contrib/utils/pipelines/sources/nerel_bio/doc_provider.py +0 -29
  179. arekit/contrib/utils/pipelines/sources/nerel_bio/extrat_text_relations.py +0 -64
  180. arekit/contrib/utils/pipelines/sources/nerel_bio/labels_fmt.py +0 -79
  181. arekit/contrib/utils/pipelines/sources/ruattitudes/__init__.py +0 -0
  182. arekit/contrib/utils/pipelines/sources/ruattitudes/doc_provider.py +0 -56
  183. arekit/contrib/utils/pipelines/sources/ruattitudes/entity_filter.py +0 -20
  184. arekit/contrib/utils/pipelines/sources/ruattitudes/extract_text_opinions.py +0 -65
  185. arekit/contrib/utils/pipelines/sources/rusentrel/__init__.py +0 -0
  186. arekit/contrib/utils/pipelines/sources/rusentrel/doc_provider.py +0 -21
  187. arekit/contrib/utils/pipelines/sources/rusentrel/extract_text_opinions.py +0 -107
  188. arekit/contrib/utils/pipelines/sources/sentinerel/__init__.py +0 -0
  189. arekit/contrib/utils/pipelines/sources/sentinerel/doc_provider.py +0 -29
  190. arekit/contrib/utils/pipelines/sources/sentinerel/entity_filter.py +0 -62
  191. arekit/contrib/utils/pipelines/sources/sentinerel/extract_text_opinions.py +0 -180
  192. arekit/contrib/utils/pipelines/sources/sentinerel/labels_fmt.py +0 -50
  193. arekit/contrib/utils/pipelines/text_opinion/annot/predefined.py +0 -88
  194. arekit/contrib/utils/processing/languages/__init__.py +0 -0
  195. arekit/contrib/utils/processing/languages/mods.py +0 -12
  196. arekit/contrib/utils/processing/languages/pos.py +0 -23
  197. arekit/contrib/utils/processing/languages/ru/__init__.py +0 -0
  198. arekit/contrib/utils/processing/languages/ru/cases.py +0 -78
  199. arekit/contrib/utils/processing/languages/ru/constants.py +0 -6
  200. arekit/contrib/utils/processing/languages/ru/mods.py +0 -13
  201. arekit/contrib/utils/processing/languages/ru/number.py +0 -23
  202. arekit/contrib/utils/processing/languages/ru/pos_service.py +0 -36
  203. arekit/contrib/utils/processing/lemmatization/__init__.py +0 -0
  204. arekit/contrib/utils/processing/lemmatization/mystem.py +0 -51
  205. arekit/contrib/utils/processing/pos/__init__.py +0 -0
  206. arekit/contrib/utils/processing/pos/base.py +0 -12
  207. arekit/contrib/utils/processing/pos/mystem_wrap.py +0 -134
  208. arekit/contrib/utils/processing/pos/russian.py +0 -10
  209. arekit/contrib/utils/processing/text/__init__.py +0 -0
  210. arekit/contrib/utils/processing/text/tokens.py +0 -127
  211. arekit/contrib/utils/resources.py +0 -25
  212. arekit/contrib/utils/serializer.py +0 -43
  213. arekit/contrib/utils/sources/__init__.py +0 -0
  214. arekit/contrib/utils/sources/sentinerel/__init__.py +0 -0
  215. arekit/contrib/utils/sources/sentinerel/text_opinion/__init__.py +0 -0
  216. arekit/contrib/utils/sources/sentinerel/text_opinion/prof_per_org_filter.py +0 -63
  217. arekit/contrib/utils/vectorizers/__init__.py +0 -0
  218. arekit/contrib/utils/vectorizers/bpe.py +0 -93
  219. arekit/contrib/utils/vectorizers/random_norm.py +0 -39
  220. arekit/download_data.py +0 -11
  221. arekit-0.24.0.dist-info/METADATA +0 -23
  222. arekit-0.24.0.dist-info/RECORD +0 -374
  223. {arekit-0.24.0.dist-info → arekit-0.25.1.dist-info}/LICENSE +0 -0
  224. {arekit-0.24.0.dist-info → arekit-0.25.1.dist-info}/top_level.txt +0 -0
@@ -1,62 +0,0 @@
1
- from os import path
2
-
3
- from arekit.common.experiment.data_type import DataType
4
- from arekit.contrib.source.nerel.folding.fixed import create_fixed_folding
5
- from arekit.contrib.source.nerel.utils import iter_filename_and_splittype
6
- from arekit.contrib.source.zip_utils import ZipArchiveUtils
7
-
8
-
9
- class NerelIOUtils(ZipArchiveUtils):
10
-
11
- splits = {
12
- DataType.Train: "train",
13
- DataType.Dev: "dev",
14
- DataType.Test: "test"
15
- }
16
-
17
- @staticmethod
18
- def get_archive_filepath(version):
19
- return path.join(NerelIOUtils.get_data_root(), "nerel-{}.zip".format(version))
20
-
21
- @staticmethod
22
- def get_annotation_innerpath(folding_data_type, filename):
23
- assert(isinstance(filename, str))
24
- return path.join(NerelIOUtils.splits[folding_data_type], "{}.ann".format(filename))
25
-
26
- @staticmethod
27
- def get_news_innerpath(folding_data_type, filename):
28
- assert(isinstance(filename, str))
29
- return path.join(NerelIOUtils.splits[folding_data_type], "{}.txt".format(filename))
30
-
31
- @staticmethod
32
- def map_doc_to_fold_type(version):
33
-
34
- it = iter_filename_and_splittype(
35
- filenames_it=NerelIOUtils.iter_filenames_from_zip(version),
36
- splits=NerelIOUtils.splits.items())
37
-
38
- d2f = {}
39
- for filename, split_type in it:
40
- d2f[filename] = split_type
41
-
42
- return d2f
43
-
44
- @staticmethod
45
- def read_dataset_split(version, docs_limit=None):
46
-
47
- it = iter_filename_and_splittype(
48
- filenames_it=NerelIOUtils.iter_filenames_from_zip(version),
49
- splits=NerelIOUtils.splits.items())
50
-
51
- f2d = {}
52
- for filename, split_type in it:
53
- if split_type not in f2d:
54
- f2d[split_type] = []
55
- f2d[split_type].append(filename)
56
-
57
- filenames_by_ids, data_folding = create_fixed_folding(train_filenames=f2d[DataType.Train],
58
- test_filenames=f2d[DataType.Test],
59
- dev_filenames=f2d[DataType.Dev],
60
- limit=docs_limit)
61
-
62
- return filenames_by_ids, data_folding
@@ -1,241 +0,0 @@
1
- from arekit.common.labels.base import Label
2
-
3
-
4
- class OpinionBelongsTo(Label):
5
- pass
6
-
7
-
8
- class OpinionRelatesTo(Label):
9
- pass
10
-
11
-
12
- class NegEffectFrom(Label):
13
- pass
14
-
15
-
16
- class NegStateFrom(Label):
17
- pass
18
-
19
-
20
- class PosEffectFrom(Label):
21
- pass
22
-
23
-
24
- class PosAuthorFrom(Label):
25
- pass
26
-
27
-
28
- class NegAuthorFrom(Label):
29
- pass
30
-
31
-
32
- class PosStateFrom(Label):
33
- pass
34
-
35
-
36
- class NegativeTo(Label):
37
- pass
38
-
39
-
40
- class PositiveTo(Label):
41
- pass
42
-
43
-
44
- class STATE_BELONGS_TO(Label):
45
- pass
46
-
47
-
48
- class ABBREVIATION(Label):
49
- pass
50
-
51
-
52
- class HEADQUARTERED_IN(Label):
53
- pass
54
-
55
-
56
- class AGE_DIED_AT(Label):
57
- pass
58
-
59
-
60
- class AGE_IS(Label):
61
- pass
62
-
63
-
64
- class AGENT(Label):
65
- pass
66
-
67
-
68
- class IDEOLOGY_OF(Label):
69
- pass
70
-
71
-
72
- class PLACE_RESIDES_IN(Label):
73
- pass
74
-
75
-
76
- class POINT_IN_TIME(Label):
77
- pass
78
-
79
-
80
- class INANIMATE_INVOLVED(Label):
81
- pass
82
-
83
-
84
- class PRICE_OF(Label):
85
- pass
86
-
87
-
88
- class INCOME(Label):
89
- pass
90
-
91
-
92
- class PRODUCES(Label):
93
- pass
94
-
95
-
96
- class ALTERNATIVE_NAME(Label):
97
- pass
98
-
99
-
100
- class AWARDED_WITH(Label):
101
- pass
102
-
103
-
104
- class CAUSE_OF_DEATH(Label):
105
- pass
106
-
107
-
108
- class CONVICTED_OF(Label):
109
- pass
110
-
111
-
112
- class DATE_DEFUNCT_IN(Label):
113
- pass
114
-
115
-
116
- class DATE_FOUNDED_IN(Label):
117
- pass
118
-
119
-
120
- class DATE_OF_BIRTH(Label):
121
- pass
122
-
123
-
124
- class DATE_OF_CREATION(Label):
125
- pass
126
-
127
-
128
- class DATE_OF_DEATH(Label):
129
- pass
130
-
131
-
132
- class END_TIME(Label):
133
- pass
134
-
135
-
136
- class EXPENDITURE(Label):
137
- pass
138
-
139
-
140
- class FOUNDED_BY(Label):
141
- pass
142
-
143
-
144
- class KNOWS(Label):
145
- pass
146
-
147
-
148
- class RELATIVE(Label):
149
- pass
150
-
151
-
152
- class LOCATED_IN(Label):
153
- pass
154
-
155
-
156
- class RELIGION_OF(Label):
157
- pass
158
-
159
-
160
- class MEDICAL_CONDITION(Label):
161
- pass
162
-
163
-
164
- class SCHOOLS_ATTENDED(Label):
165
- pass
166
-
167
-
168
- class MEMBER_OF(Label):
169
- pass
170
-
171
-
172
- class SIBLING(Label):
173
- pass
174
-
175
-
176
- class ORGANIZES(Label):
177
- pass
178
-
179
-
180
- class SPOUSE(Label):
181
- pass
182
-
183
-
184
- class ORIGINS_FROM(Label):
185
- pass
186
-
187
-
188
- class START_TIME(Label):
189
- pass
190
-
191
-
192
- class OWNER_OF(Label):
193
- pass
194
-
195
-
196
- class SUBEVENT_OF(Label):
197
- pass
198
-
199
-
200
- class PARENT_OF(Label):
201
- pass
202
-
203
-
204
- class SUBORDINATE_OF(Label):
205
- pass
206
-
207
-
208
- class PART_OF(Label):
209
- pass
210
-
211
-
212
- class TAKES_PLACE_IN(Label):
213
- pass
214
-
215
-
216
- class PARTICIPANT_IN(Label):
217
- pass
218
-
219
-
220
- class WORKPLACE(Label):
221
- pass
222
-
223
-
224
- class PENALIZED_AS(Label):
225
- pass
226
-
227
-
228
- class WORKS_AS(Label):
229
- pass
230
-
231
-
232
- class PLACE_OF_DEATH(Label):
233
- pass
234
-
235
-
236
- class PLACE_OF_BIRTH(Label):
237
- pass
238
-
239
-
240
- class HAS_CAUSE (Label):
241
- pass
@@ -1,46 +0,0 @@
1
- from arekit.contrib.source.brat.annot import BratAnnotationParser
2
- from arekit.contrib.source.brat.doc import BratDocument
3
- from arekit.contrib.source.brat.sentences_reader import BratDocumentSentencesReader
4
- from arekit.contrib.source.nerel.entities import NerelEntityCollection
5
- from arekit.contrib.source.nerel.io_utils import NerelIOUtils
6
-
7
-
8
- class NerelDocReader(object):
9
-
10
- def __init__(self, version, io_utils=NerelIOUtils()):
11
- assert(isinstance(io_utils, NerelIOUtils))
12
- self.__version = version
13
- self.__io_utils = io_utils
14
- self.__doc_fold = io_utils.map_doc_to_fold_type(version)
15
-
16
- def read_text_relations(self, filename):
17
- assert(isinstance(filename, str))
18
-
19
- return self.__io_utils.read_from_zip(
20
- inner_path=self.__io_utils.get_annotation_innerpath(
21
- folding_data_type=self.__doc_fold[filename],
22
- filename=filename),
23
- process_func=lambda input_file: [
24
- relation for relation in BratAnnotationParser.parse_annotations(
25
- input_file=input_file, encoding='utf-8-sig')["relations"]],
26
- version=self.__version)
27
-
28
- def read_document(self, filename, doc_id, entities_to_ignore=None):
29
- assert(isinstance(filename, str))
30
- assert(isinstance(doc_id, int))
31
-
32
- def file_to_doc(input_file):
33
- sentences = BratDocumentSentencesReader.from_file(input_file=input_file, entities=entities)
34
- return BratDocument(doc_id=doc_id, sentences=sentences, text_relations=text_relations)
35
-
36
- entities = NerelEntityCollection.read_collection(
37
- filename=filename, version=self.__version,
38
- entities_to_ignore=entities_to_ignore, io_utils=self.__io_utils)
39
-
40
- text_relations = self.read_text_relations(filename=filename)
41
-
42
- return self.__io_utils.read_from_zip(
43
- inner_path=self.__io_utils.get_news_innerpath(
44
- folding_data_type=self.__doc_fold[filename], filename=filename),
45
- process_func=file_to_doc,
46
- version=self.__version)
@@ -1,24 +0,0 @@
1
- from os.path import basename
2
-
3
-
4
- def __iter_filtered_filenames(filenames_iter):
5
- for filename in filenames_iter:
6
- extension = filename[-4:]
7
- # Crop extension.
8
- filename = filename[:-4]
9
- if extension != ".txt":
10
- continue
11
- yield filename, basename(filename)
12
-
13
-
14
- def iter_filename_and_splittype(filenames_it, splits):
15
- for doc_id, data in enumerate(__iter_filtered_filenames(filenames_it)):
16
- filepath, filename = data
17
- for split_type, split_name in splits:
18
- if split_name in filepath:
19
- yield filename, split_type
20
-
21
-
22
- def iter_collection_filenames(filenames_it):
23
- for doc_id, filename in enumerate(__iter_filtered_filenames(filenames_it)):
24
- yield doc_id, filename
@@ -1,12 +0,0 @@
1
- import enum
2
-
3
-
4
- class NerelVersions(enum.Enum):
5
- """ List of the supported version of this collection
6
- """
7
-
8
- V1 = "v1_0"
9
- V11 = "v1_1"
10
-
11
-
12
- DEFAULT_VERSION = NerelVersions.V1
File without changes
@@ -1,62 +0,0 @@
1
- from os import path
2
-
3
- from arekit.common.experiment.data_type import DataType
4
- from arekit.contrib.source.nerel.folding.fixed import create_fixed_folding
5
- from arekit.contrib.source.nerel.io_utils import NerelIOUtils
6
- from arekit.contrib.source.nerel.utils import iter_filename_and_splittype
7
-
8
-
9
- class NerelBioIOUtils(NerelIOUtils):
10
-
11
- splits = {
12
- DataType.Train: "train",
13
- DataType.Dev: "dev",
14
- DataType.Test: "test"
15
- }
16
-
17
- @staticmethod
18
- def get_archive_filepath(version):
19
- return path.join(NerelBioIOUtils.get_data_root(), "nerel-bio-{}.zip".format(version))
20
-
21
- @staticmethod
22
- def get_annotation_innerpath(folding_data_type, filename):
23
- assert(isinstance(filename, str))
24
- return path.join(NerelBioIOUtils.splits[folding_data_type], "{}.ann".format(filename))
25
-
26
- @staticmethod
27
- def get_news_innerpath(folding_data_type, filename):
28
- assert(isinstance(filename, str))
29
- return path.join(NerelBioIOUtils.splits[folding_data_type], "{}.txt".format(filename))
30
-
31
- @staticmethod
32
- def map_doc_to_fold_type(version):
33
-
34
- it = iter_filename_and_splittype(
35
- filenames_it=NerelBioIOUtils.iter_filenames_from_zip(version),
36
- splits=NerelBioIOUtils.splits.items())
37
-
38
- d2f = {}
39
- for filename, split_type in it:
40
- d2f[filename] = split_type
41
-
42
- return d2f
43
-
44
- @staticmethod
45
- def read_dataset_split(version, docs_limit=None):
46
-
47
- it = iter_filename_and_splittype(
48
- filenames_it=NerelBioIOUtils.iter_filenames_from_zip(version),
49
- splits=NerelBioIOUtils.splits.items())
50
-
51
- f2d = {}
52
- for filename, split_type in it:
53
- if split_type not in f2d:
54
- f2d[split_type] = []
55
- f2d[split_type].append(filename)
56
-
57
- filenames_by_ids, data_folding = create_fixed_folding(train_filenames=f2d[DataType.Train],
58
- test_filenames=f2d[DataType.Test],
59
- dev_filenames=f2d[DataType.Dev],
60
- limit=docs_limit)
61
-
62
- return filenames_by_ids, data_folding
@@ -1,265 +0,0 @@
1
- from arekit.common.labels.base import Label
2
-
3
-
4
- class ABBREVIATION(Label):
5
- pass
6
-
7
-
8
- class ALTERNATIVE_NAME(Label):
9
- pass
10
-
11
-
12
- class KNOWS(Label):
13
- pass
14
-
15
-
16
- class AGE_IS(Label):
17
- pass
18
-
19
-
20
- class AGE_DIED_AT(Label):
21
- pass
22
-
23
-
24
- class AWARDED_WITH(Label):
25
- pass
26
-
27
-
28
- class PLACE_OF_BIRTH(Label):
29
- pass
30
-
31
-
32
- class DATE_DEFUNCT_IN(Label):
33
- pass
34
-
35
-
36
- class DATE_FOUNDED_IN(Label):
37
- pass
38
-
39
-
40
- class DATE_OF_BIRTH(Label):
41
- pass
42
-
43
-
44
- class DATE_OF_CREATION(Label):
45
- pass
46
-
47
-
48
- class DATE_OF_DEATH(Label):
49
- pass
50
-
51
-
52
- class POINT_IN_TIME(Label):
53
- pass
54
-
55
-
56
- class PLACE_OF_DEATH(Label):
57
- pass
58
-
59
-
60
- class FOUNDED_BY(Label):
61
- pass
62
-
63
-
64
- class HEADQUARTERED_IN(Label):
65
- pass
66
-
67
-
68
- class IDEOLOGY_OF(Label):
69
- pass
70
-
71
-
72
- class SPOUSE(Label):
73
- pass
74
-
75
-
76
- class MEMBER_OF(Label):
77
- pass
78
-
79
-
80
- class ORGANIZES(Label):
81
- pass
82
-
83
-
84
- class OWNER_OF(Label):
85
- pass
86
-
87
-
88
- class PARENT_OF(Label):
89
- pass
90
-
91
-
92
- class PARTICIPANT_IN(Label):
93
- pass
94
-
95
-
96
- class PLACE_RESIDES_IN(Label):
97
- pass
98
-
99
-
100
- class PRICE_OF(Label):
101
- pass
102
-
103
-
104
- class PRODUCES(Label):
105
- pass
106
-
107
-
108
- class RELATIVE(Label):
109
- pass
110
-
111
-
112
- class RELIGION_OF(Label):
113
- pass
114
-
115
-
116
- class SCHOOLS_ATTENDED(Label):
117
- pass
118
-
119
-
120
- class SIBLING(Label):
121
- pass
122
-
123
-
124
- class SUBEVENT_OF(Label):
125
- pass
126
-
127
-
128
- class SUBORDINATE_OF(Label):
129
- pass
130
-
131
-
132
- class TAKES_PLACE_IN(Label):
133
- pass
134
-
135
-
136
- class WORKPLACE(Label):
137
- pass
138
-
139
-
140
- class WORKS_AS(Label):
141
- pass
142
-
143
-
144
- class CONVICTED_OF(Label):
145
- pass
146
-
147
-
148
- class PENALIZED_AS(Label):
149
- pass
150
-
151
-
152
- class START_TIME(Label):
153
- pass
154
-
155
-
156
- class END_TIME(Label):
157
- pass
158
-
159
-
160
- class EXPENDITURE(Label):
161
- pass
162
-
163
-
164
- class AGENT(Label):
165
- pass
166
-
167
-
168
- class INANIMATE_INVOLVED(Label):
169
- pass
170
-
171
-
172
- class INCOME(Label):
173
- pass
174
-
175
-
176
- class SUBCLASS_OF(Label):
177
- pass
178
-
179
-
180
- class PART_OF(Label):
181
- pass
182
-
183
-
184
- class LOCATED_IN(Label):
185
- pass
186
-
187
-
188
- class TREATED_USING(Label):
189
- pass
190
-
191
-
192
- class ORIGINS_FROM(Label):
193
- pass
194
-
195
-
196
- class TO_DETECT_OR_STUDY(Label):
197
- pass
198
-
199
-
200
- class AFFECTS(Label):
201
- pass
202
-
203
-
204
- class HAS_CAUSE(Label):
205
- pass
206
-
207
-
208
- class APPLIED_TO(Label):
209
- pass
210
-
211
-
212
- class USED_IN(Label):
213
- pass
214
-
215
-
216
- class ASSOCIATED_WITH(Label):
217
- pass
218
-
219
-
220
- class HAS_ADMINISTRATION_ROUTE(Label):
221
- pass
222
-
223
-
224
- class HAS_STRENGTH(Label):
225
- pass
226
-
227
-
228
- class DURATION_OF(Label):
229
- pass
230
-
231
-
232
- class VALUE_IS(Label):
233
- pass
234
-
235
-
236
- class PHYSIOLOGY_OF(Label):
237
- pass
238
-
239
-
240
- class PROCEDURE_PERFORMED(Label):
241
- pass
242
-
243
-
244
- class MENTAL_PROCESS_OF(Label):
245
- pass
246
-
247
-
248
- class MEDICAL_CONDITION(Label):
249
- pass
250
-
251
-
252
- class DOSE_IS(Label):
253
- pass
254
-
255
-
256
- class FINDING_OF(Label):
257
- pass
258
-
259
-
260
- class CAUSE_OF_DEATH(Label):
261
- pass
262
-
263
-
264
- class CONSUME(Label):
265
- pass
@@ -1,8 +0,0 @@
1
- from arekit.contrib.source.nerel.reader import NerelDocReader
2
- from arekit.contrib.source.nerelbio.io_utils import NerelBioIOUtils
3
-
4
-
5
- class NerelBioDocReader(NerelDocReader):
6
-
7
- def __init__(self, version):
8
- super(NerelBioDocReader, self).__init__(version=version, io_utils=NerelBioIOUtils())