mawo-slovnet 1.0.1__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mawo-slovnet might be problematic. Click here for more details.
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/PKG-INFO +1 -1
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/mawo_slovnet/__init__.py +13 -4
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/mawo_slovnet/model_downloader.py +2 -3
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/mawo_slovnet.egg-info/PKG-INFO +1 -1
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/pyproject.toml +1 -1
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/tests/test_integration.py +10 -10
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/LICENSE +0 -0
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/README.md +0 -0
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/mawo_slovnet/slovnet_morph_news_v1.tar.neural.gz +0 -0
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/mawo_slovnet/slovnet_ner_news_v1.tar.neural.gz +0 -0
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/mawo_slovnet/slovnet_syntax_news_v1.tar.neural.gz +0 -0
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/mawo_slovnet.egg-info/SOURCES.txt +0 -0
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/mawo_slovnet.egg-info/dependency_links.txt +0 -0
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/mawo_slovnet.egg-info/requires.txt +0 -0
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/mawo_slovnet.egg-info/top_level.txt +0 -0
- {mawo_slovnet-1.0.1 → mawo_slovnet-1.0.2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mawo-slovnet
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Нейросетевые модели для русского языка: NER, морфология и синтаксис с автоматической загрузкой
|
|
5
5
|
Author-email: MAWO Team <team@mawo.ru>
|
|
6
6
|
Maintainer-email: MAWO Team <team@mawo.ru>
|
|
@@ -7,6 +7,7 @@ Features:
|
|
|
7
7
|
- Hybrid mode: DL models + rule-based fallback
|
|
8
8
|
- 100% качество оригинального SlovNet (если модели доступны)
|
|
9
9
|
"""
|
|
10
|
+
|
|
10
11
|
from __future__ import annotations
|
|
11
12
|
|
|
12
13
|
import logging
|
|
@@ -105,7 +106,9 @@ class EnhancedSlovNetLoader:
|
|
|
105
106
|
self.slovnet_available = True
|
|
106
107
|
logger.info("✅ Original slovnet package available")
|
|
107
108
|
except ImportError:
|
|
108
|
-
logger.info(
|
|
109
|
+
logger.info(
|
|
110
|
+
"ℹ️ Original slovnet package not installed (will try to use numpy-only mode)"
|
|
111
|
+
)
|
|
109
112
|
|
|
110
113
|
def ensure_models_downloaded(self) -> bool:
|
|
111
114
|
"""Проверяет и загружает модели если нужно.
|
|
@@ -124,9 +127,7 @@ class EnhancedSlovNetLoader:
|
|
|
124
127
|
cache_info = downloader.get_cache_info()
|
|
125
128
|
|
|
126
129
|
# Check if any models are cached
|
|
127
|
-
cached_models = [
|
|
128
|
-
name for name, info in cache_info["models"].items() if info["cached"]
|
|
129
|
-
]
|
|
130
|
+
cached_models = [name for name, info in cache_info["models"].items() if info["cached"]]
|
|
130
131
|
|
|
131
132
|
if cached_models:
|
|
132
133
|
logger.info(f"✅ Found cached models: {', '.join(cached_models)}")
|
|
@@ -359,11 +360,19 @@ def get_model_info() -> dict[str, Any]:
|
|
|
359
360
|
__version__ = "2.0.0-mawo-enhanced"
|
|
360
361
|
__author__ = "MAWO Team (based on SlovNet by Alexander Kukushkin)"
|
|
361
362
|
|
|
363
|
+
# Алиасы для удобства использования
|
|
364
|
+
NER = NewsNERTagger
|
|
365
|
+
Morph = NewsMorphTagger
|
|
366
|
+
Syntax = NewsSyntaxParser
|
|
367
|
+
|
|
362
368
|
__all__ = [
|
|
363
369
|
"NewsEmbedding",
|
|
364
370
|
"NewsMorphTagger",
|
|
365
371
|
"NewsNERTagger",
|
|
366
372
|
"NewsSyntaxParser",
|
|
373
|
+
"NER",
|
|
374
|
+
"Morph",
|
|
375
|
+
"Syntax",
|
|
367
376
|
"create_morphology_tagger",
|
|
368
377
|
"download_models",
|
|
369
378
|
"get_model_info",
|
|
@@ -5,6 +5,7 @@ Based on:
|
|
|
5
5
|
- SlovNet v0.6.0 (github.com/natasha/slovnet)
|
|
6
6
|
- MAWO offline-first architecture
|
|
7
7
|
"""
|
|
8
|
+
|
|
8
9
|
from __future__ import annotations
|
|
9
10
|
|
|
10
11
|
import hashlib
|
|
@@ -289,9 +290,7 @@ class SlovNetModelDownloader:
|
|
|
289
290
|
size_mb = 0
|
|
290
291
|
if cached:
|
|
291
292
|
# Calculate directory size
|
|
292
|
-
size_bytes = sum(
|
|
293
|
-
f.stat().st_size for f in model_dir.rglob("*") if f.is_file()
|
|
294
|
-
)
|
|
293
|
+
size_bytes = sum(f.stat().st_size for f in model_dir.rglob("*") if f.is_file())
|
|
295
294
|
size_mb = size_bytes / (1024 * 1024)
|
|
296
295
|
info["total_size_mb"] += size_mb
|
|
297
296
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mawo-slovnet
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Нейросетевые модели для русского языка: NER, морфология и синтаксис с автоматической загрузкой
|
|
5
5
|
Author-email: MAWO Team <team@mawo.ru>
|
|
6
6
|
Maintainer-email: MAWO Team <team@mawo.ru>
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "mawo-slovnet"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.2"
|
|
8
8
|
description = "Нейросетевые модели для русского языка: NER, морфология и синтаксис с автоматической загрузкой"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -58,7 +58,7 @@ class TestNERInitialization:
|
|
|
58
58
|
|
|
59
59
|
ner = NER()
|
|
60
60
|
# Проверяем, что модель загружена
|
|
61
|
-
assert hasattr(ner,
|
|
61
|
+
assert hasattr(ner, "model") or hasattr(ner, "_model")
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
class TestMorphInitialization:
|
|
@@ -118,7 +118,7 @@ class TestNERFunctionality:
|
|
|
118
118
|
assert len(result) > 0, "Expected to find at least one entity"
|
|
119
119
|
|
|
120
120
|
# Проверяем, что есть сущность типа PER
|
|
121
|
-
has_person = any(span.type ==
|
|
121
|
+
has_person = any(span.type == "PER" for span in result if hasattr(span, "type"))
|
|
122
122
|
assert has_person, "Expected to find PER entity for 'Александр Пушкин'"
|
|
123
123
|
|
|
124
124
|
def test_ner_finds_location(self):
|
|
@@ -130,7 +130,7 @@ class TestNERFunctionality:
|
|
|
130
130
|
result = ner(text)
|
|
131
131
|
|
|
132
132
|
assert len(result) > 0
|
|
133
|
-
has_location = any(span.type ==
|
|
133
|
+
has_location = any(span.type == "LOC" for span in result if hasattr(span, "type"))
|
|
134
134
|
assert has_location, "Expected to find LOC entity for 'Санкт-Петербурге'"
|
|
135
135
|
|
|
136
136
|
def test_ner_empty_text(self):
|
|
@@ -181,7 +181,7 @@ class TestMorphFunctionality:
|
|
|
181
181
|
assert len(result) > 0
|
|
182
182
|
# Проверяем, что есть разметка
|
|
183
183
|
first_token = result[0]
|
|
184
|
-
assert hasattr(first_token,
|
|
184
|
+
assert hasattr(first_token, "pos") or hasattr(first_token, "tag")
|
|
185
185
|
|
|
186
186
|
def test_morph_empty_text(self):
|
|
187
187
|
"""Тест: Morph обрабатывает пустой текст"""
|
|
@@ -221,7 +221,7 @@ class TestSyntaxFunctionality:
|
|
|
221
221
|
assert len(result) > 0
|
|
222
222
|
# Проверяем, что есть синтаксические связи
|
|
223
223
|
first_token = result[0]
|
|
224
|
-
assert hasattr(first_token,
|
|
224
|
+
assert hasattr(first_token, "head") or hasattr(first_token, "rel")
|
|
225
225
|
|
|
226
226
|
|
|
227
227
|
class TestDataFiles:
|
|
@@ -236,7 +236,7 @@ class TestDataFiles:
|
|
|
236
236
|
module_path = Path(mawo_slovnet.__file__).parent
|
|
237
237
|
|
|
238
238
|
# Проверяем наличие модели NER
|
|
239
|
-
ner_model = module_path /
|
|
239
|
+
ner_model = module_path / "slovnet_ner_news_v1.tar.neural.gz"
|
|
240
240
|
assert ner_model.exists(), f"NER model not found at {ner_model}"
|
|
241
241
|
assert ner_model.stat().st_size > 1_000_000, "NER model file is too small"
|
|
242
242
|
|
|
@@ -246,7 +246,7 @@ class TestDataFiles:
|
|
|
246
246
|
import mawo_slovnet
|
|
247
247
|
|
|
248
248
|
module_path = Path(mawo_slovnet.__file__).parent
|
|
249
|
-
morph_model = module_path /
|
|
249
|
+
morph_model = module_path / "slovnet_morph_news_v1.tar.neural.gz"
|
|
250
250
|
|
|
251
251
|
assert morph_model.exists(), f"Morph model not found at {morph_model}"
|
|
252
252
|
assert morph_model.stat().st_size > 1_000_000, "Morph model file is too small"
|
|
@@ -257,7 +257,7 @@ class TestDataFiles:
|
|
|
257
257
|
import mawo_slovnet
|
|
258
258
|
|
|
259
259
|
module_path = Path(mawo_slovnet.__file__).parent
|
|
260
|
-
syntax_model = module_path /
|
|
260
|
+
syntax_model = module_path / "slovnet_syntax_news_v1.tar.neural.gz"
|
|
261
261
|
|
|
262
262
|
assert syntax_model.exists(), f"Syntax model not found at {syntax_model}"
|
|
263
263
|
assert syntax_model.stat().st_size > 1_000_000, "Syntax model file is too small"
|
|
@@ -343,5 +343,5 @@ class TestMultipleInstances:
|
|
|
343
343
|
assert syntax_result is not None
|
|
344
344
|
|
|
345
345
|
|
|
346
|
-
if __name__ ==
|
|
347
|
-
pytest.main([__file__,
|
|
346
|
+
if __name__ == "__main__":
|
|
347
|
+
pytest.main([__file__, "-v", "--tb=short"])
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|