pelican-nlp 0.2.6__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pelican_nlp/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.2.6"
1
+ __version__ = "0.2.7"
@@ -1,5 +1,6 @@
1
1
  import torch
2
2
  import psutil
3
+ import os
3
4
 
4
5
  from accelerate import init_empty_weights, infer_auto_device_map, dispatch_model
5
6
  from transformers import AutoModelForCausalLM
@@ -17,8 +18,31 @@ class Model:
17
18
  if self.model_name == 'fastText':
18
19
  import fasttext
19
20
  import fasttext.util
20
- fasttext.util.download_model('de', if_exists='ignore')
21
- self.model_instance = fasttext.load_model('cc.de.300.bin')
21
+
22
+ # Create a model directory if it doesn't exist
23
+ model_dir = os.path.join(os.path.expanduser('~'), '.fasttext')
24
+ os.makedirs(model_dir, exist_ok=True)
25
+
26
+ # Set the model path using proper OS path joining
27
+ model_path = os.path.join(model_dir, 'cc.de.300.bin')
28
+
29
+ # Download only if model doesn't exist
30
+ if not os.path.exists(model_path):
31
+ try:
32
+ fasttext.util.download_model('de', if_exists='ignore')
33
+ except OSError:
34
+ # Direct download fallback for Windows
35
+ import urllib.request
36
+ url = 'https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.de.300.bin.gz'
37
+ urllib.request.urlretrieve(url, model_path + '.gz')
38
+ # Decompress the file
39
+ import gzip
40
+ with gzip.open(model_path + '.gz', 'rb') as f_in:
41
+ with open(model_path, 'wb') as f_out:
42
+ f_out.write(f_in.read())
43
+ os.remove(model_path + '.gz')
44
+
45
+ self.model_instance = fasttext.load_model(model_path)
22
46
  print('FastText model loaded.')
23
47
  elif self.model_name == 'xlm-roberta-base':
24
48
  from transformers import AutoModel
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pelican_nlp
3
- Version: 0.2.6
3
+ Version: 0.2.7
4
4
  Summary: Preprocessing and Extraction of Linguistic Information for Computational Analysis
5
5
  Author-email: Yves Pauli <yves.pauli@gmail.com>
6
6
  License-Expression: CC-BY-NC-4.0
@@ -69,7 +69,7 @@ Create conda environment
69
69
 
70
70
  .. code-block:: bash
71
71
 
72
- conda create -n pelican-nlp python=3.10
72
+ conda create -n pelican-nlp -c defaults python=3.10
73
73
 
74
74
  Activate environment
75
75
 
@@ -157,7 +157,7 @@ Features
157
157
  Examples
158
158
  ========
159
159
 
160
- You can find example setups on the github repository in the 'examples` folder: https://github.com/ypauli/pelican_nlp/tree/main/examples
160
+ You can find example setups on the github repository in the `examples <https://github.com/ypauli/pelican_nlp/tree/main/examples>`_ folder:
161
161
 
162
162
  Contributing
163
163
  ============
@@ -1,5 +1,5 @@
1
1
  pelican_nlp/__init__.py,sha256=TD5xjKeXXAH6nUWG-6igbClgovi5r8RIEqI_ix1QeYo,204
2
- pelican_nlp/_version.py,sha256=2-ouoBkTVsCtduvfXac3lGkBeSELS7N-9R2Sdu-KxL4,21
2
+ pelican_nlp/_version.py,sha256=LIho7asb0pp1iNbJvXEhRMluyGN4gB4RHIIbAKpROsc,21
3
3
  pelican_nlp/cli.py,sha256=mPz-ASIMUme69G6YGVpTnHr5VfM3XA4h29WFd7DXpa4,588
4
4
  pelican_nlp/main.py,sha256=HX2Rbl4j7RXaMXlGCtggBBqcg3gRh-ey1PdLsQcDX30,7660
5
5
  pelican_nlp/Nils_backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -46,7 +46,7 @@ pelican_nlp/extraction/acoustic_feature_extraction.py,sha256=6Csrr6uotarhuAzxYlG
46
46
  pelican_nlp/extraction/distance_from_randomness.py,sha256=yikZ3GK2dqpzuNFPVsjuUK0lo6kHOIoIhKPaVrGXRMQ,3365
47
47
  pelican_nlp/extraction/extract_embeddings.py,sha256=e5bcNlskd7f-JkWtfd7YutGV5bqcURKrAkETRyTx93Q,2457
48
48
  pelican_nlp/extraction/extract_logits.py,sha256=Lc7Es86T8mlSvLMhiDHpFdCc0kCZ9fNr3-VFnOyeybs,3869
49
- pelican_nlp/extraction/language_model.py,sha256=4tHJZIRCEeHVTwEf2jmOtu-zDGkdXiDjKmlpuxDuLiw,2929
49
+ pelican_nlp/extraction/language_model.py,sha256=npew_4ziTCNE87pjN8LL0eTPujlewVr8pMT7BsmzEjo,4038
50
50
  pelican_nlp/extraction/semantic_similarity.py,sha256=QhY5CAOAorxEo3UBWPlMegFvbySF0KH6j4j3m2I3_NY,2552
51
51
  pelican_nlp/extraction/test_documents/test_features.csv,sha256=LR_3m4vIm-YWKw5gI5ziswhS-NF9VhKv14c2udLxtJU,488482
52
52
  pelican_nlp/extraction/test_documents/wallace_1.15_3.txt,sha256=ShXxOHUZzGPNUqIcOn6-OYkarzNtTC22V05a_Xpvtlw,3731
@@ -69,9 +69,9 @@ pelican_nlp/utils/__init__.py,sha256=q1tGdOOj5UPRC2mGhoMUh8p4cbFCkkbD21bQaOVvFao
69
69
  pelican_nlp/utils/csv_functions.py,sha256=hsG73gm3Up9sAerp6gIxuNHaeP1vJj6HSh7ggVm1SSo,7272
70
70
  pelican_nlp/utils/sample_usage.py,sha256=W__OVMjWND-ZtxxRhfGJDHwbVpGlB-anXDxyA5P4cME,353
71
71
  pelican_nlp/utils/setup_functions.py,sha256=t4WG5qd5iYpNNBGklje_8ukwmJp_C9RMLLi7veDgNeA,3574
72
- pelican_nlp-0.2.6.dist-info/licenses/LICENSE,sha256=m3jshBZIXKiBX6qhmhtJcLTVJ1N6BEkQGIflneXvpYg,19336
73
- pelican_nlp-0.2.6.dist-info/METADATA,sha256=DYwHgLjafHAMgzDy0ADYPm_FBvLdRxW98jBAxTH70vU,6140
74
- pelican_nlp-0.2.6.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
75
- pelican_nlp-0.2.6.dist-info/entry_points.txt,sha256=znlG0paAfju9P10UM3rm5HcCHoj4tarTllNpeaqH_gc,53
76
- pelican_nlp-0.2.6.dist-info/top_level.txt,sha256=F0qlyqy5FCd3sTS_npUYPeLKN9_BZq6wD4qo9pI0xbg,12
77
- pelican_nlp-0.2.6.dist-info/RECORD,,
72
+ pelican_nlp-0.2.7.dist-info/licenses/LICENSE,sha256=m3jshBZIXKiBX6qhmhtJcLTVJ1N6BEkQGIflneXvpYg,19336
73
+ pelican_nlp-0.2.7.dist-info/METADATA,sha256=YyZBYza89dtKbvLLHXkxOEZ1BODloXBjh-zZSODLfVI,6155
74
+ pelican_nlp-0.2.7.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
75
+ pelican_nlp-0.2.7.dist-info/entry_points.txt,sha256=znlG0paAfju9P10UM3rm5HcCHoj4tarTllNpeaqH_gc,53
76
+ pelican_nlp-0.2.7.dist-info/top_level.txt,sha256=F0qlyqy5FCd3sTS_npUYPeLKN9_BZq6wD4qo9pI0xbg,12
77
+ pelican_nlp-0.2.7.dist-info/RECORD,,