pelican-nlp 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pelican_nlp/_version.py +1 -1
- pelican_nlp/extraction/language_model.py +26 -2
- {pelican_nlp-0.2.6.dist-info → pelican_nlp-0.2.7.dist-info}/METADATA +3 -3
- {pelican_nlp-0.2.6.dist-info → pelican_nlp-0.2.7.dist-info}/RECORD +8 -8
- {pelican_nlp-0.2.6.dist-info → pelican_nlp-0.2.7.dist-info}/WHEEL +0 -0
- {pelican_nlp-0.2.6.dist-info → pelican_nlp-0.2.7.dist-info}/entry_points.txt +0 -0
- {pelican_nlp-0.2.6.dist-info → pelican_nlp-0.2.7.dist-info}/licenses/LICENSE +0 -0
- {pelican_nlp-0.2.6.dist-info → pelican_nlp-0.2.7.dist-info}/top_level.txt +0 -0
pelican_nlp/_version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.2.
|
1
|
+
__version__ = "0.2.7"
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import torch
|
2
2
|
import psutil
|
3
|
+
import os
|
3
4
|
|
4
5
|
from accelerate import init_empty_weights, infer_auto_device_map, dispatch_model
|
5
6
|
from transformers import AutoModelForCausalLM
|
@@ -17,8 +18,31 @@ class Model:
|
|
17
18
|
if self.model_name == 'fastText':
|
18
19
|
import fasttext
|
19
20
|
import fasttext.util
|
20
|
-
|
21
|
-
|
21
|
+
|
22
|
+
# Create a model directory if it doesn't exist
|
23
|
+
model_dir = os.path.join(os.path.expanduser('~'), '.fasttext')
|
24
|
+
os.makedirs(model_dir, exist_ok=True)
|
25
|
+
|
26
|
+
# Set the model path using proper OS path joining
|
27
|
+
model_path = os.path.join(model_dir, 'cc.de.300.bin')
|
28
|
+
|
29
|
+
# Download only if model doesn't exist
|
30
|
+
if not os.path.exists(model_path):
|
31
|
+
try:
|
32
|
+
fasttext.util.download_model('de', if_exists='ignore')
|
33
|
+
except OSError:
|
34
|
+
# Direct download fallback for Windows
|
35
|
+
import urllib.request
|
36
|
+
url = 'https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.de.300.bin.gz'
|
37
|
+
urllib.request.urlretrieve(url, model_path + '.gz')
|
38
|
+
# Decompress the file
|
39
|
+
import gzip
|
40
|
+
with gzip.open(model_path + '.gz', 'rb') as f_in:
|
41
|
+
with open(model_path, 'wb') as f_out:
|
42
|
+
f_out.write(f_in.read())
|
43
|
+
os.remove(model_path + '.gz')
|
44
|
+
|
45
|
+
self.model_instance = fasttext.load_model(model_path)
|
22
46
|
print('FastText model loaded.')
|
23
47
|
elif self.model_name == 'xlm-roberta-base':
|
24
48
|
from transformers import AutoModel
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: pelican_nlp
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.7
|
4
4
|
Summary: Preprocessing and Extraction of Linguistic Information for Computational Analysis
|
5
5
|
Author-email: Yves Pauli <yves.pauli@gmail.com>
|
6
6
|
License-Expression: CC-BY-NC-4.0
|
@@ -69,7 +69,7 @@ Create conda environment
|
|
69
69
|
|
70
70
|
.. code-block:: bash
|
71
71
|
|
72
|
-
conda create -n pelican-nlp python=3.10
|
72
|
+
conda create -n pelican-nlp -c defaults python=3.10
|
73
73
|
|
74
74
|
Activate environment
|
75
75
|
|
@@ -157,7 +157,7 @@ Features
|
|
157
157
|
Examples
|
158
158
|
========
|
159
159
|
|
160
|
-
You can find example setups on the github repository in the
|
160
|
+
You can find example setups on the github repository in the `examples <https://github.com/ypauli/pelican_nlp/tree/main/examples>`_ folder:
|
161
161
|
|
162
162
|
Contributing
|
163
163
|
============
|
@@ -1,5 +1,5 @@
|
|
1
1
|
pelican_nlp/__init__.py,sha256=TD5xjKeXXAH6nUWG-6igbClgovi5r8RIEqI_ix1QeYo,204
|
2
|
-
pelican_nlp/_version.py,sha256=
|
2
|
+
pelican_nlp/_version.py,sha256=LIho7asb0pp1iNbJvXEhRMluyGN4gB4RHIIbAKpROsc,21
|
3
3
|
pelican_nlp/cli.py,sha256=mPz-ASIMUme69G6YGVpTnHr5VfM3XA4h29WFd7DXpa4,588
|
4
4
|
pelican_nlp/main.py,sha256=HX2Rbl4j7RXaMXlGCtggBBqcg3gRh-ey1PdLsQcDX30,7660
|
5
5
|
pelican_nlp/Nils_backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -46,7 +46,7 @@ pelican_nlp/extraction/acoustic_feature_extraction.py,sha256=6Csrr6uotarhuAzxYlG
|
|
46
46
|
pelican_nlp/extraction/distance_from_randomness.py,sha256=yikZ3GK2dqpzuNFPVsjuUK0lo6kHOIoIhKPaVrGXRMQ,3365
|
47
47
|
pelican_nlp/extraction/extract_embeddings.py,sha256=e5bcNlskd7f-JkWtfd7YutGV5bqcURKrAkETRyTx93Q,2457
|
48
48
|
pelican_nlp/extraction/extract_logits.py,sha256=Lc7Es86T8mlSvLMhiDHpFdCc0kCZ9fNr3-VFnOyeybs,3869
|
49
|
-
pelican_nlp/extraction/language_model.py,sha256=
|
49
|
+
pelican_nlp/extraction/language_model.py,sha256=npew_4ziTCNE87pjN8LL0eTPujlewVr8pMT7BsmzEjo,4038
|
50
50
|
pelican_nlp/extraction/semantic_similarity.py,sha256=QhY5CAOAorxEo3UBWPlMegFvbySF0KH6j4j3m2I3_NY,2552
|
51
51
|
pelican_nlp/extraction/test_documents/test_features.csv,sha256=LR_3m4vIm-YWKw5gI5ziswhS-NF9VhKv14c2udLxtJU,488482
|
52
52
|
pelican_nlp/extraction/test_documents/wallace_1.15_3.txt,sha256=ShXxOHUZzGPNUqIcOn6-OYkarzNtTC22V05a_Xpvtlw,3731
|
@@ -69,9 +69,9 @@ pelican_nlp/utils/__init__.py,sha256=q1tGdOOj5UPRC2mGhoMUh8p4cbFCkkbD21bQaOVvFao
|
|
69
69
|
pelican_nlp/utils/csv_functions.py,sha256=hsG73gm3Up9sAerp6gIxuNHaeP1vJj6HSh7ggVm1SSo,7272
|
70
70
|
pelican_nlp/utils/sample_usage.py,sha256=W__OVMjWND-ZtxxRhfGJDHwbVpGlB-anXDxyA5P4cME,353
|
71
71
|
pelican_nlp/utils/setup_functions.py,sha256=t4WG5qd5iYpNNBGklje_8ukwmJp_C9RMLLi7veDgNeA,3574
|
72
|
-
pelican_nlp-0.2.
|
73
|
-
pelican_nlp-0.2.
|
74
|
-
pelican_nlp-0.2.
|
75
|
-
pelican_nlp-0.2.
|
76
|
-
pelican_nlp-0.2.
|
77
|
-
pelican_nlp-0.2.
|
72
|
+
pelican_nlp-0.2.7.dist-info/licenses/LICENSE,sha256=m3jshBZIXKiBX6qhmhtJcLTVJ1N6BEkQGIflneXvpYg,19336
|
73
|
+
pelican_nlp-0.2.7.dist-info/METADATA,sha256=YyZBYza89dtKbvLLHXkxOEZ1BODloXBjh-zZSODLfVI,6155
|
74
|
+
pelican_nlp-0.2.7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
75
|
+
pelican_nlp-0.2.7.dist-info/entry_points.txt,sha256=znlG0paAfju9P10UM3rm5HcCHoj4tarTllNpeaqH_gc,53
|
76
|
+
pelican_nlp-0.2.7.dist-info/top_level.txt,sha256=F0qlyqy5FCd3sTS_npUYPeLKN9_BZq6wD4qo9pI0xbg,12
|
77
|
+
pelican_nlp-0.2.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|