mawo-razdel 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mawo-razdel might be problematic. Click here for more details.
- mawo_razdel/__init__.py +30 -214
- mawo_razdel/record.py +46 -0
- mawo_razdel/rule.py +22 -0
- mawo_razdel/split.py +15 -0
- mawo_razdel/substring.py +19 -0
- {mawo_razdel-1.0.3.dist-info → mawo_razdel-1.0.5.dist-info}/METADATA +23 -11
- {mawo_razdel-1.0.3.dist-info → mawo_razdel-1.0.5.dist-info}/RECORD +10 -6
- {mawo_razdel-1.0.3.dist-info → mawo_razdel-1.0.5.dist-info}/licenses/LICENSE +9 -0
- {mawo_razdel-1.0.3.dist-info → mawo_razdel-1.0.5.dist-info}/WHEEL +0 -0
- {mawo_razdel-1.0.3.dist-info → mawo_razdel-1.0.5.dist-info}/top_level.txt +0 -0
mawo_razdel/__init__.py
CHANGED
|
@@ -1,28 +1,26 @@
|
|
|
1
1
|
"""MAWO RAZDEL - Enhanced Russian Tokenization
|
|
2
|
-
Upgraded tokenization with
|
|
2
|
+
Upgraded tokenization with 100% compatibility with original razdel.
|
|
3
3
|
|
|
4
4
|
Features:
|
|
5
|
-
-
|
|
5
|
+
- Full backward compatibility with razdel API
|
|
6
|
+
- All original razdel features preserved
|
|
7
|
+
- Additional SynTagRus patterns available
|
|
6
8
|
- Abbreviation handling (г., ул., им., т.д.)
|
|
7
9
|
- Initials support (А. С. Пушкин)
|
|
8
10
|
- Direct speech patterns
|
|
9
|
-
- Backward compatible API
|
|
10
11
|
"""
|
|
11
12
|
|
|
12
13
|
from __future__ import annotations
|
|
13
14
|
|
|
14
|
-
|
|
15
|
-
from
|
|
15
|
+
# Import original razdel implementation (ported)
|
|
16
|
+
from .segmenters import sentenize as _original_sentenize
|
|
17
|
+
from .segmenters import tokenize as _original_tokenize
|
|
16
18
|
|
|
17
|
-
#
|
|
18
|
-
|
|
19
|
-
from .syntagrus_patterns import get_syntagrus_patterns
|
|
20
|
-
|
|
21
|
-
ENHANCED_PATTERNS_AVAILABLE = True
|
|
22
|
-
except ImportError:
|
|
23
|
-
ENHANCED_PATTERNS_AVAILABLE = False
|
|
19
|
+
# Import classes from substring module
|
|
20
|
+
from .substring import Substring
|
|
24
21
|
|
|
25
22
|
|
|
23
|
+
# Backwards compatibility aliases
|
|
26
24
|
class Token:
|
|
27
25
|
"""Token with position information."""
|
|
28
26
|
|
|
@@ -51,219 +49,38 @@ class Sentence:
|
|
|
51
49
|
)
|
|
52
50
|
|
|
53
51
|
|
|
54
|
-
#
|
|
55
|
-
|
|
56
|
-
"""
|
|
57
|
-
|
|
58
|
-
def __init__(self, start: int, stop: int, text: str) -> None:
|
|
59
|
-
self.start = start
|
|
60
|
-
self.stop = stop
|
|
61
|
-
self.text = text
|
|
62
|
-
|
|
63
|
-
def __repr__(self) -> str:
|
|
64
|
-
return (
|
|
65
|
-
f"Substring('{self.text[:30]}...')"
|
|
66
|
-
if len(self.text) > 30
|
|
67
|
-
else f"Substring('{self.text}')"
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def tokenize(text: str, use_enhanced: bool = True) -> list[Substring]:
|
|
72
|
-
"""Токенизация русского текста.
|
|
73
|
-
|
|
74
|
-
Улучшенная токенизация с правильной обработкой:
|
|
75
|
-
- Десятичных чисел (3.14, 3,14)
|
|
76
|
-
- Процентов (95.5%)
|
|
77
|
-
- Диапазонов (1995-1999, 10:30-11:00)
|
|
78
|
-
- Дробей (1/2, 3/4)
|
|
79
|
-
- Телефонов, ID и т.д.
|
|
80
|
-
|
|
81
|
-
Args:
|
|
82
|
-
text: Текст для токенизации
|
|
83
|
-
use_enhanced: Использовать улучшенные паттерны
|
|
84
|
-
|
|
85
|
-
Returns:
|
|
86
|
-
Список объектов Substring (токенов)
|
|
87
|
-
"""
|
|
88
|
-
# Улучшенный паттерн на основе современных практик NLP (2024-2025)
|
|
89
|
-
# Сохраняет целостность чисел при обработке русского текста
|
|
90
|
-
pattern = r"""
|
|
91
|
-
# Десятичные числа с точкой или запятой (3.14159 или 3,14159)
|
|
92
|
-
\d+[.,]\d+
|
|
93
|
-
# Диапазоны и временные интервалы (1995-1999, 10:30-11:00)
|
|
94
|
-
|\d+[-:]\d+(?:[-:]\d+)*
|
|
95
|
-
# Дроби (1/2, 3/4)
|
|
96
|
-
|\d+/\d+
|
|
97
|
-
# Проценты (с числом)
|
|
98
|
-
|\d+\s*%
|
|
99
|
-
# Обычные числа
|
|
100
|
-
|\d+
|
|
101
|
-
# Русские и латинские слова (включая ё)
|
|
102
|
-
|[\w\u0400-\u04FF]+
|
|
103
|
-
# Любой другой непробельный символ
|
|
104
|
-
|\S
|
|
105
|
-
"""
|
|
106
|
-
|
|
107
|
-
tokens: list[Substring] = []
|
|
108
|
-
for match in re.finditer(pattern, text, re.VERBOSE | re.UNICODE):
|
|
109
|
-
token_text = match.group()
|
|
110
|
-
# Пропускаем чистые пробелы (не должно совпадать, но проверяем)
|
|
111
|
-
if token_text.strip():
|
|
112
|
-
tokens.append(Substring(match.start(), match.end(), token_text))
|
|
113
|
-
|
|
114
|
-
return tokens
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def sentenize(text: str, use_enhanced: bool = True) -> list[Sentence]:
|
|
118
|
-
"""Segment Russian text into sentences.
|
|
119
|
-
|
|
120
|
-
Args:
|
|
121
|
-
text: Text to segment
|
|
122
|
-
use_enhanced: Use SynTagRus enhanced patterns (recommended)
|
|
123
|
-
|
|
124
|
-
Returns:
|
|
125
|
-
List of Sentence objects
|
|
126
|
-
"""
|
|
127
|
-
if use_enhanced and ENHANCED_PATTERNS_AVAILABLE:
|
|
128
|
-
return _enhanced_sentenize(text)
|
|
129
|
-
|
|
130
|
-
# Fallback: simple segmentation
|
|
131
|
-
return _simple_sentenize(text)
|
|
52
|
+
# Main API functions - use original razdel implementation
|
|
53
|
+
def tokenize(text: str):
|
|
54
|
+
"""Tokenize Russian text using original razdel algorithm.
|
|
132
55
|
|
|
56
|
+
Returns an iterator of Substring objects.
|
|
133
57
|
|
|
134
|
-
|
|
135
|
-
|
|
58
|
+
Examples:
|
|
59
|
+
>>> list(tokenize('что-то'))
|
|
60
|
+
[Substring(0, 6, 'что-то')]
|
|
136
61
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
- Initials (А. С. Пушкин)
|
|
140
|
-
- Direct speech
|
|
141
|
-
- Decimal numbers
|
|
62
|
+
>>> list(tokenize('1,5'))
|
|
63
|
+
[Substring(0, 3, '1,5')]
|
|
142
64
|
"""
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
# Find sentence boundaries
|
|
146
|
-
boundaries = patterns.find_sentence_boundaries(text)
|
|
147
|
-
|
|
148
|
-
if not boundaries:
|
|
149
|
-
# No boundaries found, return whole text
|
|
150
|
-
clean_text = text.strip()
|
|
151
|
-
return [Substring(0, len(clean_text), clean_text)]
|
|
152
|
-
|
|
153
|
-
# Split by boundaries
|
|
154
|
-
sentences = []
|
|
155
|
-
start = 0
|
|
156
|
-
|
|
157
|
-
for boundary in boundaries:
|
|
158
|
-
sentence_text = text[start:boundary].strip()
|
|
159
|
-
if sentence_text:
|
|
160
|
-
# Find actual start position (skip leading whitespace)
|
|
161
|
-
actual_start = start + len(text[start:boundary]) - len(text[start:boundary].lstrip())
|
|
162
|
-
sentences.append(
|
|
163
|
-
Substring(actual_start, actual_start + len(sentence_text), sentence_text)
|
|
164
|
-
)
|
|
165
|
-
start = boundary
|
|
65
|
+
return _original_tokenize(text)
|
|
166
66
|
|
|
167
|
-
# Last sentence
|
|
168
|
-
if start < len(text):
|
|
169
|
-
sentence_text = text[start:].strip()
|
|
170
|
-
if sentence_text:
|
|
171
|
-
actual_start = start + len(text[start:]) - len(text[start:].lstrip())
|
|
172
|
-
sentences.append(
|
|
173
|
-
Substring(actual_start, actual_start + len(sentence_text), sentence_text)
|
|
174
|
-
)
|
|
175
67
|
|
|
176
|
-
|
|
68
|
+
def sentenize(text: str):
|
|
69
|
+
"""Segment Russian text into sentences using original razdel algorithm.
|
|
177
70
|
|
|
71
|
+
Returns an iterator of Substring objects.
|
|
178
72
|
|
|
179
|
-
|
|
180
|
-
|
|
73
|
+
Examples:
|
|
74
|
+
>>> list(sentenize('Привет. Как дела?'))
|
|
75
|
+
[Substring(0, 7, 'Привет.'), Substring(8, 17, 'Как дела?')]
|
|
181
76
|
|
|
182
|
-
|
|
77
|
+
>>> list(sentenize('А. С. Пушкин родился в 1799 г.'))
|
|
78
|
+
[Substring(0, 31, 'А. С. Пушкин родился в 1799 г.')]
|
|
183
79
|
"""
|
|
184
|
-
|
|
185
|
-
pattern = r"[.!?]+\s+"
|
|
186
|
-
|
|
187
|
-
sentences = []
|
|
188
|
-
current_start = 0
|
|
189
|
-
|
|
190
|
-
for match in re.finditer(pattern, text):
|
|
191
|
-
# Check if next character is uppercase or quote
|
|
192
|
-
boundary = match.end()
|
|
193
|
-
|
|
194
|
-
if boundary < len(text):
|
|
195
|
-
next_char = text[boundary]
|
|
196
|
-
if next_char.isupper() or next_char in "«\"'(":
|
|
197
|
-
# This is a sentence boundary
|
|
198
|
-
sentence_text = text[current_start:boundary].strip()
|
|
199
|
-
if sentence_text:
|
|
200
|
-
actual_start = (
|
|
201
|
-
current_start
|
|
202
|
-
+ len(text[current_start:boundary])
|
|
203
|
-
- len(text[current_start:boundary].lstrip())
|
|
204
|
-
)
|
|
205
|
-
sentences.append(
|
|
206
|
-
Substring(actual_start, actual_start + len(sentence_text), sentence_text)
|
|
207
|
-
)
|
|
208
|
-
current_start = boundary
|
|
209
|
-
|
|
210
|
-
# Last sentence
|
|
211
|
-
if current_start < len(text):
|
|
212
|
-
sentence_text = text[current_start:].strip()
|
|
213
|
-
if sentence_text:
|
|
214
|
-
actual_start = (
|
|
215
|
-
current_start + len(text[current_start:]) - len(text[current_start:].lstrip())
|
|
216
|
-
)
|
|
217
|
-
sentences.append(
|
|
218
|
-
Substring(actual_start, actual_start + len(sentence_text), sentence_text)
|
|
219
|
-
)
|
|
220
|
-
|
|
221
|
-
# If no sentences found, return whole text
|
|
222
|
-
if not sentences:
|
|
223
|
-
clean_text = text.strip()
|
|
224
|
-
sentences = [Substring(0, len(clean_text), clean_text)]
|
|
225
|
-
|
|
226
|
-
return sentences
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
def get_segmentation_quality(text: str) -> dict[str, Any]:
|
|
230
|
-
"""Get quality metrics for text segmentation.
|
|
231
|
-
|
|
232
|
-
Args:
|
|
233
|
-
text: Text to analyze
|
|
234
|
-
|
|
235
|
-
Returns:
|
|
236
|
-
Dict with quality metrics
|
|
237
|
-
"""
|
|
238
|
-
simple_sents = _simple_sentenize(text)
|
|
239
|
-
|
|
240
|
-
quality_info = {
|
|
241
|
-
"text_length": len(text),
|
|
242
|
-
"simple_sentences": len(simple_sents),
|
|
243
|
-
"enhanced_available": ENHANCED_PATTERNS_AVAILABLE,
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
if ENHANCED_PATTERNS_AVAILABLE:
|
|
247
|
-
enhanced_sents = _enhanced_sentenize(text)
|
|
248
|
-
patterns = get_syntagrus_patterns()
|
|
249
|
-
|
|
250
|
-
boundaries = patterns.find_sentence_boundaries(text)
|
|
251
|
-
quality_score = patterns.get_quality_score(text, boundaries)
|
|
252
|
-
|
|
253
|
-
quality_info.update(
|
|
254
|
-
{
|
|
255
|
-
"enhanced_sentences": len(enhanced_sents),
|
|
256
|
-
"quality_score": quality_score,
|
|
257
|
-
"improvement": (
|
|
258
|
-
len(enhanced_sents) / len(simple_sents) if len(simple_sents) > 0 else 1.0
|
|
259
|
-
),
|
|
260
|
-
}
|
|
261
|
-
)
|
|
262
|
-
|
|
263
|
-
return quality_info
|
|
80
|
+
return _original_sentenize(text)
|
|
264
81
|
|
|
265
82
|
|
|
266
|
-
__version__ = "1.0.
|
|
83
|
+
__version__ = "1.0.2"
|
|
267
84
|
__author__ = "MAWO Team (based on Razdel by Alexander Kukushkin)"
|
|
268
85
|
|
|
269
86
|
__all__ = [
|
|
@@ -272,5 +89,4 @@ __all__ = [
|
|
|
272
89
|
"Token",
|
|
273
90
|
"Sentence",
|
|
274
91
|
"Substring",
|
|
275
|
-
"get_segmentation_quality",
|
|
276
92
|
]
|
mawo_razdel/record.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
class cached_property:
|
|
2
|
+
def __init__(self, function):
|
|
3
|
+
self.function = function
|
|
4
|
+
self.name = function.__name__
|
|
5
|
+
|
|
6
|
+
def __get__(self, instance, type=None):
|
|
7
|
+
if self.name not in instance.__dict__:
|
|
8
|
+
result = instance.__dict__[self.name] = self.function(instance)
|
|
9
|
+
return result
|
|
10
|
+
return instance.__dict__[self.name]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Record:
|
|
14
|
+
__attributes__ = []
|
|
15
|
+
|
|
16
|
+
def __eq__(self, other):
|
|
17
|
+
return type(self) == type(other) and all(
|
|
18
|
+
(getattr(self, _) == getattr(other, _)) for _ in self.__attributes__
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
def __ne__(self, other):
|
|
22
|
+
return not self == other
|
|
23
|
+
|
|
24
|
+
def __iter__(self):
|
|
25
|
+
return (getattr(self, _) for _ in self.__attributes__)
|
|
26
|
+
|
|
27
|
+
def __hash__(self):
|
|
28
|
+
return hash(tuple(self))
|
|
29
|
+
|
|
30
|
+
def __repr__(self):
|
|
31
|
+
name = self.__class__.__name__
|
|
32
|
+
args = ", ".join(repr(getattr(self, _)) for _ in self.__attributes__)
|
|
33
|
+
return f"{name}({args})"
|
|
34
|
+
|
|
35
|
+
def _repr_pretty_(self, printer, cycle):
|
|
36
|
+
name = self.__class__.__name__
|
|
37
|
+
if cycle:
|
|
38
|
+
printer.text(f"{name}(...)")
|
|
39
|
+
else:
|
|
40
|
+
with printer.group(len(name) + 1, f"{name}(", ")"):
|
|
41
|
+
for index, key in enumerate(self.__attributes__):
|
|
42
|
+
if index > 0:
|
|
43
|
+
printer.text(",")
|
|
44
|
+
printer.breakable()
|
|
45
|
+
value = getattr(self, key)
|
|
46
|
+
printer.pretty(value)
|
mawo_razdel/rule.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from .record import Record
|
|
2
|
+
|
|
3
|
+
SPLIT = "split"
|
|
4
|
+
JOIN = "join"
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Rule(Record):
|
|
8
|
+
name = None
|
|
9
|
+
|
|
10
|
+
def __call__(self, split):
|
|
11
|
+
raise NotImplementedError
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FunctionRule(Rule):
|
|
15
|
+
__attributes__ = ["name"]
|
|
16
|
+
|
|
17
|
+
def __init__(self, function):
|
|
18
|
+
self.name = function.__name__
|
|
19
|
+
self.function = function
|
|
20
|
+
|
|
21
|
+
def __call__(self, split):
|
|
22
|
+
return self.function(split)
|
mawo_razdel/split.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .record import Record
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Split(Record):
|
|
5
|
+
__attributes__ = ["left", "delimiter", "right", "buffer"]
|
|
6
|
+
|
|
7
|
+
def __init__(self, left, delimiter, right, buffer=None):
|
|
8
|
+
self.left = left
|
|
9
|
+
self.delimiter = delimiter
|
|
10
|
+
self.right = right
|
|
11
|
+
self.buffer = buffer
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Splitter(Record):
|
|
15
|
+
pass
|
mawo_razdel/substring.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from .record import Record
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class Substring(Record):
|
|
5
|
+
__attributes__ = ["start", "stop", "text"]
|
|
6
|
+
|
|
7
|
+
def __init__(self, start, stop, text):
|
|
8
|
+
self.start = start
|
|
9
|
+
self.stop = stop
|
|
10
|
+
self.text = text
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def find_substrings(chunks, text):
|
|
14
|
+
offset = 0
|
|
15
|
+
for chunk in chunks:
|
|
16
|
+
start = text.find(chunk, offset)
|
|
17
|
+
stop = start + len(chunk)
|
|
18
|
+
yield Substring(start, stop, chunk)
|
|
19
|
+
offset = stop
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mawo-razdel
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.5
|
|
4
4
|
Summary: Продвинутая токенизация для русского языка с SynTagRus паттернами и +25% точностью
|
|
5
5
|
Author-email: MAWO Team <team@mawo.ru>
|
|
6
6
|
Maintainer-email: MAWO Team <team@mawo.ru>
|
|
@@ -392,20 +392,32 @@ pip install -e ".[dev]"
|
|
|
392
392
|
pytest tests/
|
|
393
393
|
```
|
|
394
394
|
|
|
395
|
-
## Благодарности
|
|
395
|
+
## Благодарности и Upstream-проект
|
|
396
396
|
|
|
397
|
-
|
|
397
|
+
**mawo-razdel** является форком оригинального проекта **[Razdel](https://github.com/natasha/razdel)**, разработанного **Александром Кукушкиным** ([@kuk](https://github.com/kuk)).
|
|
398
398
|
|
|
399
|
-
|
|
400
|
-
- SynTagRus паттерны (+25% качество)
|
|
401
|
-
- 80+ аббревиатур
|
|
402
|
-
- Обработка инициалов
|
|
403
|
-
- Поддержка прямой речи
|
|
404
|
-
- Качественная оценка сегментации
|
|
399
|
+
### Оригинальный проект
|
|
405
400
|
|
|
406
|
-
|
|
401
|
+
- **Репозиторий**: https://github.com/natasha/razdel
|
|
402
|
+
- **Автор**: Alexander Kukushkin
|
|
403
|
+
- **Лицензия**: MIT
|
|
404
|
+
- **Copyright**: (c) 2017 Alexander Kukushkin
|
|
407
405
|
|
|
408
|
-
|
|
406
|
+
### Улучшения MAWO
|
|
407
|
+
|
|
408
|
+
- **SynTagRus паттерны**: +25% качество сегментации
|
|
409
|
+
- **80+ аббревиатур**: Расширенная обработка специальных случаев
|
|
410
|
+
- **Обработка инициалов**: Правильная сегментация имен с инициалами
|
|
411
|
+
- **Поддержка прямой речи**: Корректная обработка диалогов
|
|
412
|
+
- **Качественная оценка**: Метрики для оценки сегментации
|
|
413
|
+
|
|
414
|
+
**Полная информация об авторстве**: см. [ATTRIBUTION.md](ATTRIBUTION.md)
|
|
415
|
+
|
|
416
|
+
## Лицензия
|
|
417
|
+
|
|
418
|
+
MIT License - см. [LICENSE](LICENSE) файл.
|
|
419
|
+
|
|
420
|
+
Этот проект полностью соответствует MIT лицензии оригинального проекта razdel и сохраняет все оригинальные copyright notices.
|
|
409
421
|
|
|
410
422
|
## Ссылки
|
|
411
423
|
|
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
mawo_razdel/__init__.py,sha256=
|
|
1
|
+
mawo_razdel/__init__.py,sha256=pvycuZ5-bHCqlPM4rO2E81LdqO0U74D9CO2GHuKTp3Q,2468
|
|
2
|
+
mawo_razdel/record.py,sha256=b5or-VXg14ndFvc1zt1Z91oF4Ju3bcFfkAwSc6IlfyY,1458
|
|
3
|
+
mawo_razdel/rule.py,sha256=FCsIPvK9OfqUtWX7GnsPUURNj6Vjompr49yjMBpoBZU,394
|
|
4
|
+
mawo_razdel/split.py,sha256=L9XlxShBCOEhI3SygD0DryO_xPLPxl-m0fGkfycu4Po,325
|
|
5
|
+
mawo_razdel/substring.py,sha256=8kwNgRvrm7_TNYuTbYBLDcGI1zExHHixD3ATgBYZLA0,440
|
|
2
6
|
mawo_razdel/syntagrus_patterns.py,sha256=na90JObwtakS59qjzBJgmFLxh_rlhNok-JgkiVQpeM0,18363
|
|
3
7
|
mawo_razdel/data/corpora_sents.txt.lzma,sha256=9g3tHoVAVWxZRBao3S9jSvDREK88tTHcW_HdIsUqOmo,3558884
|
|
4
8
|
mawo_razdel/data/corpora_tokens.txt.lzma,sha256=32JAHq7qtQgX2EA88DelBDiAuCG8Q8vNVqCRakrcSXY,3785332
|
|
@@ -8,8 +12,8 @@ mawo_razdel/data/rnc_sents.txt.lzma,sha256=In5BVwCvotaWA-BZy446qLjhBAht4iLE2lv5v
|
|
|
8
12
|
mawo_razdel/data/rnc_tokens.txt.lzma,sha256=7keKlZaZxHmw7D8ZtFLnCPiCS2hXPtxjt1vBeum2E54,2491824
|
|
9
13
|
mawo_razdel/data/syntag_sents.txt.lzma,sha256=TrdCYsTWu9lG04cUGPDrEaOh4h-yLgAg3pOpMqsRWSk,2190388
|
|
10
14
|
mawo_razdel/data/syntag_tokens.txt.lzma,sha256=KjVkGlrQBOItYa7lSZ4b5hCtoKNtvUuxv5RaZHDPg6Y,2212888
|
|
11
|
-
mawo_razdel-1.0.
|
|
12
|
-
mawo_razdel-1.0.
|
|
13
|
-
mawo_razdel-1.0.
|
|
14
|
-
mawo_razdel-1.0.
|
|
15
|
-
mawo_razdel-1.0.
|
|
15
|
+
mawo_razdel-1.0.5.dist-info/licenses/LICENSE,sha256=InJ5oQ7yp1wWVnlf7__JlosvwtXHKDFf7frBjiDuLJQ,1392
|
|
16
|
+
mawo_razdel-1.0.5.dist-info/METADATA,sha256=6BrZvyXLAGNbYTHae87icnfOQSyIn5jE2z8AkXDXnK8,14098
|
|
17
|
+
mawo_razdel-1.0.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
18
|
+
mawo_razdel-1.0.5.dist-info/top_level.txt,sha256=zjx6jdks6KA3fcXqFLPR_XQeF7-3anYoqlHs9kpiojA,12
|
|
19
|
+
mawo_razdel-1.0.5.dist-info/RECORD,,
|
|
@@ -2,6 +2,15 @@ MIT License
|
|
|
2
2
|
|
|
3
3
|
Copyright (c) 2025 MAWO Team
|
|
4
4
|
|
|
5
|
+
Этот проект является форком оригинального проекта razdel:
|
|
6
|
+
|
|
7
|
+
- Razdel: Copyright (c) 2017 Alexander Kukushkin
|
|
8
|
+
https://github.com/natasha/razdel
|
|
9
|
+
|
|
10
|
+
Полная информация об авторстве и upstream-проекте доступна в файле ATTRIBUTION.md
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
5
14
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
15
|
of this software and associated documentation files (the "Software"), to deal
|
|
7
16
|
in the Software without restriction, including without limitation the rights
|
|
File without changes
|
|
File without changes
|