mawo-slovnet 1.0.0__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mawo-slovnet might be problematic. Click here for more details.
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/PKG-INFO +3 -2
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/mawo_slovnet/__init__.py +16 -5
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/mawo_slovnet/model_downloader.py +6 -5
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/mawo_slovnet.egg-info/PKG-INFO +3 -2
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/pyproject.toml +4 -5
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/tests/test_integration.py +14 -20
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/LICENSE +0 -0
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/README.md +0 -0
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/mawo_slovnet/slovnet_morph_news_v1.tar.neural.gz +0 -0
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/mawo_slovnet/slovnet_ner_news_v1.tar.neural.gz +0 -0
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/mawo_slovnet/slovnet_syntax_news_v1.tar.neural.gz +0 -0
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/mawo_slovnet.egg-info/SOURCES.txt +0 -0
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/mawo_slovnet.egg-info/dependency_links.txt +0 -0
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/mawo_slovnet.egg-info/requires.txt +0 -0
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/mawo_slovnet.egg-info/top_level.txt +0 -0
- {mawo_slovnet-1.0.0 → mawo_slovnet-1.0.2}/setup.cfg +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mawo-slovnet
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Нейросетевые модели для русского языка: NER, морфология и синтаксис с автоматической загрузкой
|
|
5
5
|
Author-email: MAWO Team <team@mawo.ru>
|
|
6
6
|
Maintainer-email: MAWO Team <team@mawo.ru>
|
|
7
|
-
License
|
|
7
|
+
License: MIT
|
|
8
8
|
Project-URL: Homepage, https://github.com/mawo-ru/mawo-slovnet
|
|
9
9
|
Project-URL: Documentation, https://github.com/mawo-ru/mawo-slovnet#readme
|
|
10
10
|
Project-URL: Repository, https://github.com/mawo-ru/mawo-slovnet
|
|
@@ -14,6 +14,7 @@ Keywords: nlp,russian,ner,morphology,syntax,slovnet,mawo
|
|
|
14
14
|
Classifier: Development Status :: 5 - Production/Stable
|
|
15
15
|
Classifier: Intended Audience :: Developers
|
|
16
16
|
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
18
|
Classifier: Natural Language :: Russian
|
|
18
19
|
Classifier: Operating System :: OS Independent
|
|
19
20
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -7,12 +7,14 @@ Features:
|
|
|
7
7
|
- Hybrid mode: DL models + rule-based fallback
|
|
8
8
|
- 100% качество оригинального SlovNet (если модели доступны)
|
|
9
9
|
"""
|
|
10
|
+
|
|
10
11
|
from __future__ import annotations
|
|
11
12
|
|
|
12
13
|
import logging
|
|
13
14
|
import os
|
|
14
15
|
import sys
|
|
15
|
-
from
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any, Optional
|
|
16
18
|
|
|
17
19
|
logger = logging.getLogger(__name__)
|
|
18
20
|
|
|
@@ -104,7 +106,9 @@ class EnhancedSlovNetLoader:
|
|
|
104
106
|
self.slovnet_available = True
|
|
105
107
|
logger.info("✅ Original slovnet package available")
|
|
106
108
|
except ImportError:
|
|
107
|
-
logger.info(
|
|
109
|
+
logger.info(
|
|
110
|
+
"ℹ️ Original slovnet package not installed (will try to use numpy-only mode)"
|
|
111
|
+
)
|
|
108
112
|
|
|
109
113
|
def ensure_models_downloaded(self) -> bool:
|
|
110
114
|
"""Проверяет и загружает модели если нужно.
|
|
@@ -123,9 +127,7 @@ class EnhancedSlovNetLoader:
|
|
|
123
127
|
cache_info = downloader.get_cache_info()
|
|
124
128
|
|
|
125
129
|
# Check if any models are cached
|
|
126
|
-
cached_models = [
|
|
127
|
-
name for name, info in cache_info["models"].items() if info["cached"]
|
|
128
|
-
]
|
|
130
|
+
cached_models = [name for name, info in cache_info["models"].items() if info["cached"]]
|
|
129
131
|
|
|
130
132
|
if cached_models:
|
|
131
133
|
logger.info(f"✅ Found cached models: {', '.join(cached_models)}")
|
|
@@ -184,6 +186,7 @@ class EnhancedSlovNetLoader:
|
|
|
184
186
|
sys.path.insert(0, str(model_dir))
|
|
185
187
|
|
|
186
188
|
# Import slovnet components
|
|
189
|
+
import slovnet
|
|
187
190
|
from slovnet import NewsEmbedding as _NewsEmbedding
|
|
188
191
|
from slovnet import NewsMorphTagger as _NewsMorphTagger
|
|
189
192
|
from slovnet import NewsNERTagger as _NewsNERTagger
|
|
@@ -357,11 +360,19 @@ def get_model_info() -> dict[str, Any]:
|
|
|
357
360
|
__version__ = "2.0.0-mawo-enhanced"
|
|
358
361
|
__author__ = "MAWO Team (based on SlovNet by Alexander Kukushkin)"
|
|
359
362
|
|
|
363
|
+
# Алиасы для удобства использования
|
|
364
|
+
NER = NewsNERTagger
|
|
365
|
+
Morph = NewsMorphTagger
|
|
366
|
+
Syntax = NewsSyntaxParser
|
|
367
|
+
|
|
360
368
|
__all__ = [
|
|
361
369
|
"NewsEmbedding",
|
|
362
370
|
"NewsMorphTagger",
|
|
363
371
|
"NewsNERTagger",
|
|
364
372
|
"NewsSyntaxParser",
|
|
373
|
+
"NER",
|
|
374
|
+
"Morph",
|
|
375
|
+
"Syntax",
|
|
365
376
|
"create_morphology_tagger",
|
|
366
377
|
"download_models",
|
|
367
378
|
"get_model_info",
|
|
@@ -5,13 +5,16 @@ Based on:
|
|
|
5
5
|
- SlovNet v0.6.0 (github.com/natasha/slovnet)
|
|
6
6
|
- MAWO offline-first architecture
|
|
7
7
|
"""
|
|
8
|
+
|
|
8
9
|
from __future__ import annotations
|
|
9
10
|
|
|
11
|
+
import hashlib
|
|
10
12
|
import logging
|
|
13
|
+
import os
|
|
11
14
|
import shutil
|
|
12
15
|
import tarfile
|
|
13
16
|
from pathlib import Path
|
|
14
|
-
from typing import Any
|
|
17
|
+
from typing import Any, Optional
|
|
15
18
|
from urllib.request import urlopen
|
|
16
19
|
|
|
17
20
|
logger = logging.getLogger(__name__)
|
|
@@ -129,7 +132,7 @@ class SlovNetModelDownloader:
|
|
|
129
132
|
)
|
|
130
133
|
|
|
131
134
|
# Extract tar archive
|
|
132
|
-
logger.info("📦 Extracting model archive...")
|
|
135
|
+
logger.info(f"📦 Extracting model archive...")
|
|
133
136
|
with tarfile.open(tar_path, "r") as tar:
|
|
134
137
|
tar.extractall(temp_dir)
|
|
135
138
|
|
|
@@ -287,9 +290,7 @@ class SlovNetModelDownloader:
|
|
|
287
290
|
size_mb = 0
|
|
288
291
|
if cached:
|
|
289
292
|
# Calculate directory size
|
|
290
|
-
size_bytes = sum(
|
|
291
|
-
f.stat().st_size for f in model_dir.rglob("*") if f.is_file()
|
|
292
|
-
)
|
|
293
|
+
size_bytes = sum(f.stat().st_size for f in model_dir.rglob("*") if f.is_file())
|
|
293
294
|
size_mb = size_bytes / (1024 * 1024)
|
|
294
295
|
info["total_size_mb"] += size_mb
|
|
295
296
|
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mawo-slovnet
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Нейросетевые модели для русского языка: NER, морфология и синтаксис с автоматической загрузкой
|
|
5
5
|
Author-email: MAWO Team <team@mawo.ru>
|
|
6
6
|
Maintainer-email: MAWO Team <team@mawo.ru>
|
|
7
|
-
License
|
|
7
|
+
License: MIT
|
|
8
8
|
Project-URL: Homepage, https://github.com/mawo-ru/mawo-slovnet
|
|
9
9
|
Project-URL: Documentation, https://github.com/mawo-ru/mawo-slovnet#readme
|
|
10
10
|
Project-URL: Repository, https://github.com/mawo-ru/mawo-slovnet
|
|
@@ -14,6 +14,7 @@ Keywords: nlp,russian,ner,morphology,syntax,slovnet,mawo
|
|
|
14
14
|
Classifier: Development Status :: 5 - Production/Stable
|
|
15
15
|
Classifier: Intended Audience :: Developers
|
|
16
16
|
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
18
|
Classifier: Natural Language :: Russian
|
|
18
19
|
Classifier: Operating System :: OS Independent
|
|
19
20
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -4,11 +4,11 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "mawo-slovnet"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.2"
|
|
8
8
|
description = "Нейросетевые модели для русского языка: NER, морфология и синтаксис с автоматической загрузкой"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
11
|
-
license = "MIT"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
12
|
authors = [
|
|
13
13
|
{name = "MAWO Team", email = "team@mawo.ru"},
|
|
14
14
|
]
|
|
@@ -28,6 +28,7 @@ classifiers = [
|
|
|
28
28
|
"Development Status :: 5 - Production/Stable",
|
|
29
29
|
"Intended Audience :: Developers",
|
|
30
30
|
"Intended Audience :: Science/Research",
|
|
31
|
+
"License :: OSI Approved :: MIT License",
|
|
31
32
|
"Natural Language :: Russian",
|
|
32
33
|
"Operating System :: OS Independent",
|
|
33
34
|
"Programming Language :: Python :: 3",
|
|
@@ -77,10 +78,8 @@ target-version = ["py310", "py311", "py312"]
|
|
|
77
78
|
[tool.ruff]
|
|
78
79
|
line-length = 100
|
|
79
80
|
target-version = "py310"
|
|
80
|
-
|
|
81
|
-
[tool.ruff.lint]
|
|
82
81
|
select = ["E", "F", "I", "N", "W", "B", "UP"]
|
|
83
|
-
ignore = ["E501"
|
|
82
|
+
ignore = ["E501"]
|
|
84
83
|
|
|
85
84
|
[tool.mypy]
|
|
86
85
|
python_version = "3.10"
|
|
@@ -3,9 +3,8 @@
|
|
|
3
3
|
Тестируют библиотеку как самодостаточный проект
|
|
4
4
|
"""
|
|
5
5
|
|
|
6
|
-
from pathlib import Path
|
|
7
|
-
|
|
8
6
|
import pytest
|
|
7
|
+
from pathlib import Path
|
|
9
8
|
|
|
10
9
|
|
|
11
10
|
class TestImports:
|
|
@@ -15,8 +14,6 @@ class TestImports:
|
|
|
15
14
|
"""Тест: главный модуль импортируется"""
|
|
16
15
|
try:
|
|
17
16
|
import mawo_slovnet
|
|
18
|
-
|
|
19
|
-
assert mawo_slovnet is not None
|
|
20
17
|
except ImportError as e:
|
|
21
18
|
pytest.fail(f"Failed to import mawo_slovnet: {e}")
|
|
22
19
|
|
|
@@ -24,8 +21,6 @@ class TestImports:
|
|
|
24
21
|
"""Тест: класс NER импортируется"""
|
|
25
22
|
try:
|
|
26
23
|
from mawo_slovnet import NER
|
|
27
|
-
|
|
28
|
-
assert NER is not None
|
|
29
24
|
except ImportError as e:
|
|
30
25
|
pytest.fail(f"Failed to import NER: {e}")
|
|
31
26
|
|
|
@@ -33,8 +28,6 @@ class TestImports:
|
|
|
33
28
|
"""Тест: класс Morph импортируется"""
|
|
34
29
|
try:
|
|
35
30
|
from mawo_slovnet import Morph
|
|
36
|
-
|
|
37
|
-
assert Morph is not None
|
|
38
31
|
except ImportError as e:
|
|
39
32
|
pytest.fail(f"Failed to import Morph: {e}")
|
|
40
33
|
|
|
@@ -42,8 +35,6 @@ class TestImports:
|
|
|
42
35
|
"""Тест: класс Syntax импортируется"""
|
|
43
36
|
try:
|
|
44
37
|
from mawo_slovnet import Syntax
|
|
45
|
-
|
|
46
|
-
assert Syntax is not None
|
|
47
38
|
except ImportError as e:
|
|
48
39
|
pytest.fail(f"Failed to import Syntax: {e}")
|
|
49
40
|
|
|
@@ -67,7 +58,7 @@ class TestNERInitialization:
|
|
|
67
58
|
|
|
68
59
|
ner = NER()
|
|
69
60
|
# Проверяем, что модель загружена
|
|
70
|
-
assert hasattr(ner,
|
|
61
|
+
assert hasattr(ner, "model") or hasattr(ner, "_model")
|
|
71
62
|
|
|
72
63
|
|
|
73
64
|
class TestMorphInitialization:
|
|
@@ -127,7 +118,7 @@ class TestNERFunctionality:
|
|
|
127
118
|
assert len(result) > 0, "Expected to find at least one entity"
|
|
128
119
|
|
|
129
120
|
# Проверяем, что есть сущность типа PER
|
|
130
|
-
has_person = any(span.type ==
|
|
121
|
+
has_person = any(span.type == "PER" for span in result if hasattr(span, "type"))
|
|
131
122
|
assert has_person, "Expected to find PER entity for 'Александр Пушкин'"
|
|
132
123
|
|
|
133
124
|
def test_ner_finds_location(self):
|
|
@@ -139,7 +130,7 @@ class TestNERFunctionality:
|
|
|
139
130
|
result = ner(text)
|
|
140
131
|
|
|
141
132
|
assert len(result) > 0
|
|
142
|
-
has_location = any(span.type ==
|
|
133
|
+
has_location = any(span.type == "LOC" for span in result if hasattr(span, "type"))
|
|
143
134
|
assert has_location, "Expected to find LOC entity for 'Санкт-Петербурге'"
|
|
144
135
|
|
|
145
136
|
def test_ner_empty_text(self):
|
|
@@ -190,7 +181,7 @@ class TestMorphFunctionality:
|
|
|
190
181
|
assert len(result) > 0
|
|
191
182
|
# Проверяем, что есть разметка
|
|
192
183
|
first_token = result[0]
|
|
193
|
-
assert hasattr(first_token,
|
|
184
|
+
assert hasattr(first_token, "pos") or hasattr(first_token, "tag")
|
|
194
185
|
|
|
195
186
|
def test_morph_empty_text(self):
|
|
196
187
|
"""Тест: Morph обрабатывает пустой текст"""
|
|
@@ -230,7 +221,7 @@ class TestSyntaxFunctionality:
|
|
|
230
221
|
assert len(result) > 0
|
|
231
222
|
# Проверяем, что есть синтаксические связи
|
|
232
223
|
first_token = result[0]
|
|
233
|
-
assert hasattr(first_token,
|
|
224
|
+
assert hasattr(first_token, "head") or hasattr(first_token, "rel")
|
|
234
225
|
|
|
235
226
|
|
|
236
227
|
class TestDataFiles:
|
|
@@ -238,32 +229,35 @@ class TestDataFiles:
|
|
|
238
229
|
|
|
239
230
|
def test_ner_model_file_exists(self):
|
|
240
231
|
"""Тест: файл модели NER существует"""
|
|
232
|
+
from pathlib import Path
|
|
241
233
|
import mawo_slovnet
|
|
242
234
|
|
|
243
235
|
# Находим директорию модуля
|
|
244
236
|
module_path = Path(mawo_slovnet.__file__).parent
|
|
245
237
|
|
|
246
238
|
# Проверяем наличие модели NER
|
|
247
|
-
ner_model = module_path /
|
|
239
|
+
ner_model = module_path / "slovnet_ner_news_v1.tar.neural.gz"
|
|
248
240
|
assert ner_model.exists(), f"NER model not found at {ner_model}"
|
|
249
241
|
assert ner_model.stat().st_size > 1_000_000, "NER model file is too small"
|
|
250
242
|
|
|
251
243
|
def test_morph_model_file_exists(self):
|
|
252
244
|
"""Тест: файл модели Morph существует"""
|
|
245
|
+
from pathlib import Path
|
|
253
246
|
import mawo_slovnet
|
|
254
247
|
|
|
255
248
|
module_path = Path(mawo_slovnet.__file__).parent
|
|
256
|
-
morph_model = module_path /
|
|
249
|
+
morph_model = module_path / "slovnet_morph_news_v1.tar.neural.gz"
|
|
257
250
|
|
|
258
251
|
assert morph_model.exists(), f"Morph model not found at {morph_model}"
|
|
259
252
|
assert morph_model.stat().st_size > 1_000_000, "Morph model file is too small"
|
|
260
253
|
|
|
261
254
|
def test_syntax_model_file_exists(self):
|
|
262
255
|
"""Тест: файл модели Syntax существует"""
|
|
256
|
+
from pathlib import Path
|
|
263
257
|
import mawo_slovnet
|
|
264
258
|
|
|
265
259
|
module_path = Path(mawo_slovnet.__file__).parent
|
|
266
|
-
syntax_model = module_path /
|
|
260
|
+
syntax_model = module_path / "slovnet_syntax_news_v1.tar.neural.gz"
|
|
267
261
|
|
|
268
262
|
assert syntax_model.exists(), f"Syntax model not found at {syntax_model}"
|
|
269
263
|
assert syntax_model.stat().st_size > 1_000_000, "Syntax model file is too small"
|
|
@@ -349,5 +343,5 @@ class TestMultipleInstances:
|
|
|
349
343
|
assert syntax_result is not None
|
|
350
344
|
|
|
351
345
|
|
|
352
|
-
if __name__ ==
|
|
353
|
-
pytest.main([__file__,
|
|
346
|
+
if __name__ == "__main__":
|
|
347
|
+
pytest.main([__file__, "-v", "--tb=short"])
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|