mawo-slovnet 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mawo_slovnet/__init__.py +16 -5
- mawo_slovnet/model_downloader.py +6 -5
- {mawo_slovnet-1.0.0.dist-info → mawo_slovnet-1.0.2.dist-info}/METADATA +3 -2
- mawo_slovnet-1.0.2.dist-info/RECORD +10 -0
- mawo_slovnet-1.0.0.dist-info/RECORD +0 -10
- {mawo_slovnet-1.0.0.dist-info → mawo_slovnet-1.0.2.dist-info}/WHEEL +0 -0
- {mawo_slovnet-1.0.0.dist-info → mawo_slovnet-1.0.2.dist-info}/licenses/LICENSE +0 -0
- {mawo_slovnet-1.0.0.dist-info → mawo_slovnet-1.0.2.dist-info}/top_level.txt +0 -0
mawo_slovnet/__init__.py
CHANGED
|
@@ -7,12 +7,14 @@ Features:
|
|
|
7
7
|
- Hybrid mode: DL models + rule-based fallback
|
|
8
8
|
- 100% качество оригинального SlovNet (если модели доступны)
|
|
9
9
|
"""
|
|
10
|
+
|
|
10
11
|
from __future__ import annotations
|
|
11
12
|
|
|
12
13
|
import logging
|
|
13
14
|
import os
|
|
14
15
|
import sys
|
|
15
|
-
from
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Any, Optional
|
|
16
18
|
|
|
17
19
|
logger = logging.getLogger(__name__)
|
|
18
20
|
|
|
@@ -104,7 +106,9 @@ class EnhancedSlovNetLoader:
|
|
|
104
106
|
self.slovnet_available = True
|
|
105
107
|
logger.info("✅ Original slovnet package available")
|
|
106
108
|
except ImportError:
|
|
107
|
-
logger.info(
|
|
109
|
+
logger.info(
|
|
110
|
+
"ℹ️ Original slovnet package not installed (will try to use numpy-only mode)"
|
|
111
|
+
)
|
|
108
112
|
|
|
109
113
|
def ensure_models_downloaded(self) -> bool:
|
|
110
114
|
"""Проверяет и загружает модели если нужно.
|
|
@@ -123,9 +127,7 @@ class EnhancedSlovNetLoader:
|
|
|
123
127
|
cache_info = downloader.get_cache_info()
|
|
124
128
|
|
|
125
129
|
# Check if any models are cached
|
|
126
|
-
cached_models = [
|
|
127
|
-
name for name, info in cache_info["models"].items() if info["cached"]
|
|
128
|
-
]
|
|
130
|
+
cached_models = [name for name, info in cache_info["models"].items() if info["cached"]]
|
|
129
131
|
|
|
130
132
|
if cached_models:
|
|
131
133
|
logger.info(f"✅ Found cached models: {', '.join(cached_models)}")
|
|
@@ -184,6 +186,7 @@ class EnhancedSlovNetLoader:
|
|
|
184
186
|
sys.path.insert(0, str(model_dir))
|
|
185
187
|
|
|
186
188
|
# Import slovnet components
|
|
189
|
+
import slovnet
|
|
187
190
|
from slovnet import NewsEmbedding as _NewsEmbedding
|
|
188
191
|
from slovnet import NewsMorphTagger as _NewsMorphTagger
|
|
189
192
|
from slovnet import NewsNERTagger as _NewsNERTagger
|
|
@@ -357,11 +360,19 @@ def get_model_info() -> dict[str, Any]:
|
|
|
357
360
|
__version__ = "2.0.0-mawo-enhanced"
|
|
358
361
|
__author__ = "MAWO Team (based on SlovNet by Alexander Kukushkin)"
|
|
359
362
|
|
|
363
|
+
# Алиасы для удобства использования
|
|
364
|
+
NER = NewsNERTagger
|
|
365
|
+
Morph = NewsMorphTagger
|
|
366
|
+
Syntax = NewsSyntaxParser
|
|
367
|
+
|
|
360
368
|
__all__ = [
|
|
361
369
|
"NewsEmbedding",
|
|
362
370
|
"NewsMorphTagger",
|
|
363
371
|
"NewsNERTagger",
|
|
364
372
|
"NewsSyntaxParser",
|
|
373
|
+
"NER",
|
|
374
|
+
"Morph",
|
|
375
|
+
"Syntax",
|
|
365
376
|
"create_morphology_tagger",
|
|
366
377
|
"download_models",
|
|
367
378
|
"get_model_info",
|
mawo_slovnet/model_downloader.py
CHANGED
|
@@ -5,13 +5,16 @@ Based on:
|
|
|
5
5
|
- SlovNet v0.6.0 (github.com/natasha/slovnet)
|
|
6
6
|
- MAWO offline-first architecture
|
|
7
7
|
"""
|
|
8
|
+
|
|
8
9
|
from __future__ import annotations
|
|
9
10
|
|
|
11
|
+
import hashlib
|
|
10
12
|
import logging
|
|
13
|
+
import os
|
|
11
14
|
import shutil
|
|
12
15
|
import tarfile
|
|
13
16
|
from pathlib import Path
|
|
14
|
-
from typing import Any
|
|
17
|
+
from typing import Any, Optional
|
|
15
18
|
from urllib.request import urlopen
|
|
16
19
|
|
|
17
20
|
logger = logging.getLogger(__name__)
|
|
@@ -129,7 +132,7 @@ class SlovNetModelDownloader:
|
|
|
129
132
|
)
|
|
130
133
|
|
|
131
134
|
# Extract tar archive
|
|
132
|
-
logger.info("📦 Extracting model archive...")
|
|
135
|
+
logger.info(f"📦 Extracting model archive...")
|
|
133
136
|
with tarfile.open(tar_path, "r") as tar:
|
|
134
137
|
tar.extractall(temp_dir)
|
|
135
138
|
|
|
@@ -287,9 +290,7 @@ class SlovNetModelDownloader:
|
|
|
287
290
|
size_mb = 0
|
|
288
291
|
if cached:
|
|
289
292
|
# Calculate directory size
|
|
290
|
-
size_bytes = sum(
|
|
291
|
-
f.stat().st_size for f in model_dir.rglob("*") if f.is_file()
|
|
292
|
-
)
|
|
293
|
+
size_bytes = sum(f.stat().st_size for f in model_dir.rglob("*") if f.is_file())
|
|
293
294
|
size_mb = size_bytes / (1024 * 1024)
|
|
294
295
|
info["total_size_mb"] += size_mb
|
|
295
296
|
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mawo-slovnet
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Нейросетевые модели для русского языка: NER, морфология и синтаксис с автоматической загрузкой
|
|
5
5
|
Author-email: MAWO Team <team@mawo.ru>
|
|
6
6
|
Maintainer-email: MAWO Team <team@mawo.ru>
|
|
7
|
-
License
|
|
7
|
+
License: MIT
|
|
8
8
|
Project-URL: Homepage, https://github.com/mawo-ru/mawo-slovnet
|
|
9
9
|
Project-URL: Documentation, https://github.com/mawo-ru/mawo-slovnet#readme
|
|
10
10
|
Project-URL: Repository, https://github.com/mawo-ru/mawo-slovnet
|
|
@@ -14,6 +14,7 @@ Keywords: nlp,russian,ner,morphology,syntax,slovnet,mawo
|
|
|
14
14
|
Classifier: Development Status :: 5 - Production/Stable
|
|
15
15
|
Classifier: Intended Audience :: Developers
|
|
16
16
|
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
18
|
Classifier: Natural Language :: Russian
|
|
18
19
|
Classifier: Operating System :: OS Independent
|
|
19
20
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
mawo_slovnet/__init__.py,sha256=5sCtFUgKeoNzMmCLmS8a5rTVu6jBoB4zY9ZqfblaWDo,13104
|
|
2
|
+
mawo_slovnet/model_downloader.py,sha256=v2ianf2nHiotxA20nzcbZgm6YLVWnTnMm9pN7m1v2nM,12106
|
|
3
|
+
mawo_slovnet/slovnet_morph_news_v1.tar.neural.gz,sha256=J2yKPmU0oULiizuATPJp9KjLhcDBNCwFnRfh6Eu57Rg,2442002
|
|
4
|
+
mawo_slovnet/slovnet_ner_news_v1.tar.neural.gz,sha256=tIgP1tVTYJdIXJhde4oRvVk-qD4oZVSrs9Wh3xsrHwo,2249877
|
|
5
|
+
mawo_slovnet/slovnet_syntax_news_v1.tar.neural.gz,sha256=_SFLVCTcpw1KZjSrt6WrJ8Fom7DUljjBlkfbGMA3XZk,2459132
|
|
6
|
+
mawo_slovnet-1.0.2.dist-info/licenses/LICENSE,sha256=HxcBccBgl94zsrO98Iv1FqnG5cp8fSsnxfq3YDSi7Mg,1066
|
|
7
|
+
mawo_slovnet-1.0.2.dist-info/METADATA,sha256=c1-OZHwkMwgv-XsZuOpB0PY5syv8eALGu5ESRRmPojg,11686
|
|
8
|
+
mawo_slovnet-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
9
|
+
mawo_slovnet-1.0.2.dist-info/top_level.txt,sha256=i0LmrJ3do13Qjwsve1dfFRZN2Fg2Ymf7jN5x9Im2eFE,13
|
|
10
|
+
mawo_slovnet-1.0.2.dist-info/RECORD,,
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
mawo_slovnet/__init__.py,sha256=5NuqOhQlTxs8tJKAxOESMi6CbHjKIPgDBSHs3fjw1dI,12858
|
|
2
|
-
mawo_slovnet/model_downloader.py,sha256=l6hI149WQ-QHPsJQ7KRdL3f1BK9xz3tao0KrV_84RA4,12107
|
|
3
|
-
mawo_slovnet/slovnet_morph_news_v1.tar.neural.gz,sha256=J2yKPmU0oULiizuATPJp9KjLhcDBNCwFnRfh6Eu57Rg,2442002
|
|
4
|
-
mawo_slovnet/slovnet_ner_news_v1.tar.neural.gz,sha256=tIgP1tVTYJdIXJhde4oRvVk-qD4oZVSrs9Wh3xsrHwo,2249877
|
|
5
|
-
mawo_slovnet/slovnet_syntax_news_v1.tar.neural.gz,sha256=_SFLVCTcpw1KZjSrt6WrJ8Fom7DUljjBlkfbGMA3XZk,2459132
|
|
6
|
-
mawo_slovnet-1.0.0.dist-info/licenses/LICENSE,sha256=HxcBccBgl94zsrO98Iv1FqnG5cp8fSsnxfq3YDSi7Mg,1066
|
|
7
|
-
mawo_slovnet-1.0.0.dist-info/METADATA,sha256=7tYZruUhcN-R1zTfzZQCEoEDgwCrlh7I2hqfHsFwsEc,11646
|
|
8
|
-
mawo_slovnet-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
9
|
-
mawo_slovnet-1.0.0.dist-info/top_level.txt,sha256=i0LmrJ3do13Qjwsve1dfFRZN2Fg2Ymf7jN5x9Im2eFE,13
|
|
10
|
-
mawo_slovnet-1.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|