pelican-nlp 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pelican_nlp/_version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.2.1"
1
+ __version__ = "0.2.3"
@@ -16,11 +16,12 @@ import pandas as pd
16
16
  import re
17
17
 
18
18
  class Corpus:
19
- def __init__(self, corpus_name, documents, configuration_settings):
19
+ def __init__(self, corpus_name, documents, configuration_settings, project_folder):
20
20
  self.name = corpus_name
21
21
  self.documents = documents
22
22
  self.config = configuration_settings
23
- self.derivative_dir = self.config['PATH_TO_PROJECT_FOLDER']+'/derivatives'
23
+ self.project_folder = project_folder
24
+ self.derivative_dir = project_folder + '/derivatives'
24
25
  self.pipeline = TextPreprocessingPipeline(self.config)
25
26
  self.task = configuration_settings['task_name']
26
27
  self.results_path = None
@@ -112,14 +113,13 @@ class Corpus:
112
113
  from pelican_nlp.extraction.extract_logits import LogitsExtractor
113
114
  from pelican_nlp.preprocessing.text_tokenizer import TextTokenizer
114
115
  logits_options = self.config['options_logits']
115
- project_path = self.config['PATH_TO_PROJECT_FOLDER']
116
116
 
117
117
  print('logits extraction in progress')
118
118
  model_name = logits_options['model_name']
119
119
  logitsExtractor = LogitsExtractor(logits_options,
120
120
  self.pipeline,
121
- project_path)
122
- model = Model(model_name, project_path)
121
+ self.project_folder)
122
+ model = Model(model_name, self.project_folder)
123
123
  model.load_model()
124
124
  model_instance = model.model_instance
125
125
  tokenizer = TextTokenizer(logits_options['tokenization_method'], model_name=logits_options['model_name'])
@@ -153,7 +153,7 @@ class Corpus:
153
153
 
154
154
  embedding_options = self.config['options_embeddings']
155
155
  print('Embeddings extraction in progress...')
156
- embeddingsExtractor = EmbeddingsExtractor(embedding_options, self.config['PATH_TO_PROJECT_FOLDER'])
156
+ embeddingsExtractor = EmbeddingsExtractor(embedding_options, self.project_folder)
157
157
  for i in range(len(self.documents)):
158
158
  for key, section in self.documents[i].cleaned_sections.items():
159
159
  print(f'Processing section {key}')
pelican_nlp/main.py CHANGED
@@ -78,7 +78,7 @@ class Pelican:
78
78
  self._LPDS()
79
79
 
80
80
  # Instantiate all subjects
81
- subjects = subject_instantiator(self.config)
81
+ subjects = subject_instantiator(self.config, self.project_path)
82
82
 
83
83
  # Process each corpus
84
84
  for corpus_name in self.config['corpus_names']:
@@ -4,8 +4,7 @@ import shutil
4
4
  import yaml
5
5
  import sys
6
6
 
7
- def subject_instantiator(config):
8
- project_folder = config['PATH_TO_PROJECT_FOLDER']
7
+ def subject_instantiator(config, project_folder):
9
8
  path_to_subjects = os.path.join(project_folder, 'subjects')
10
9
  print('Instantiating Subjects...')
11
10
  subjects = [Subject(subject) for subject in os.listdir(path_to_subjects)]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pelican_nlp
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Preprocessing and Extraction of Linguistic Information for Computational Analysis
5
5
  Author-email: Yves Pauli <yves.pauli@gmail.com>
6
6
  License-Expression: CC-BY-NC-4.0
@@ -1,7 +1,7 @@
1
1
  pelican_nlp/__init__.py,sha256=TD5xjKeXXAH6nUWG-6igbClgovi5r8RIEqI_ix1QeYo,204
2
- pelican_nlp/_version.py,sha256=tC9CwL4Nm8brVXJnZNGk_eoZaJj6eOtLKtOrdJMrpoI,21
2
+ pelican_nlp/_version.py,sha256=X0PliCRFAeVnSTceUeHX1eM0j1HFhGFDWCRxLdde2Bs,21
3
3
  pelican_nlp/cli.py,sha256=uXouL67mTjBynFMWpBTaGvGtEBWDnbS1BI7aRdxV-0M,439
4
- pelican_nlp/main.py,sha256=WKiDHXVFKtkD6AkTZSGlT824hKbpp5PAu2-HlcKsi-Q,7501
4
+ pelican_nlp/main.py,sha256=4Lmy87B3x0bDZbKa38Y9ig3DFYYvWhtK4WJpZwBsQB0,7520
5
5
  pelican_nlp/Nils_backup/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  pelican_nlp/Nils_backup/extract_acoustic_features.py,sha256=eSP8lXxbZ15YE1HqxGtma9uWOcSN-fI-ig-NwQ9eOA8,10771
7
7
  pelican_nlp/Nils_backup/speaker_diarization_Nils.py,sha256=3RIhjKihu4Z1rruMt9KESFE2lqesfzIpRr7rLummUEo,10219
@@ -38,7 +38,7 @@ pelican_nlp/configuration_files/config_general.yml,sha256=dOBiqOhw0VgV0LZ1boYJhh
38
38
  pelican_nlp/configuration_files/config_morteza.yml,sha256=T378fxvBY9hERVGsnXroDFCy8Zh5PIq4dyer2b5AiDY,3376
39
39
  pelican_nlp/core/__init__.py,sha256=whJc5dWsGsKn2IAw-D4BvCvUKW1sVtWYE1WJIuUr5uI,165
40
40
  pelican_nlp/core/audio_document.py,sha256=hhSJNgeqSYa6_uws2ho66agHhAdHuKN3EIEdIsIcXKg,586
41
- pelican_nlp/core/corpus.py,sha256=6pDRmeO0XoHylhjLE4Fi5Tc3HCMQJ-Xk0YRzEfz5Z1Y,15168
41
+ pelican_nlp/core/corpus.py,sha256=j_p4hWwLnHuGu4jQakG9oVeizDD9mvWVWVJKTu_lMB4,15143
42
42
  pelican_nlp/core/document.py,sha256=j2HP5FX6cfmXHo7OWVFCX6cMsDyqsOmNlnGNNNfCm2c,8467
43
43
  pelican_nlp/core/subject.py,sha256=-pi3jDzb2zLiG8JNAi9i-9Jd-VtsPxDO4ShQci2QSMg,1059
44
44
  pelican_nlp/extraction/__init__.py,sha256=hfqFiaKpQBS6cwRm9Yd7MpOcV60_xJmwuQ2Kegary5k,84
@@ -68,10 +68,10 @@ pelican_nlp/sample_configuration_files/config_general.yml,sha256=UuGnZUa-SVmioE9
68
68
  pelican_nlp/utils/__init__.py,sha256=q1tGdOOj5UPRC2mGhoMUh8p4cbFCkkbD21bQaOVvFao,189
69
69
  pelican_nlp/utils/csv_functions.py,sha256=hsG73gm3Up9sAerp6gIxuNHaeP1vJj6HSh7ggVm1SSo,7272
70
70
  pelican_nlp/utils/sample_usage.py,sha256=W__OVMjWND-ZtxxRhfGJDHwbVpGlB-anXDxyA5P4cME,353
71
- pelican_nlp/utils/setup_functions.py,sha256=s0QcarswU8qeFBcEQNIYC1ooaD-xwRiTJn--yPEId8E,3612
72
- pelican_nlp-0.2.1.dist-info/licenses/LICENSE,sha256=m3jshBZIXKiBX6qhmhtJcLTVJ1N6BEkQGIflneXvpYg,19336
73
- pelican_nlp-0.2.1.dist-info/METADATA,sha256=VvSKygkPyQ_RzTM9-HvKRdP-m0TtQThAC8pLoJJPikE,5562
74
- pelican_nlp-0.2.1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
75
- pelican_nlp-0.2.1.dist-info/entry_points.txt,sha256=znlG0paAfju9P10UM3rm5HcCHoj4tarTllNpeaqH_gc,53
76
- pelican_nlp-0.2.1.dist-info/top_level.txt,sha256=F0qlyqy5FCd3sTS_npUYPeLKN9_BZq6wD4qo9pI0xbg,12
77
- pelican_nlp-0.2.1.dist-info/RECORD,,
71
+ pelican_nlp/utils/setup_functions.py,sha256=t4WG5qd5iYpNNBGklje_8ukwmJp_C9RMLLi7veDgNeA,3574
72
+ pelican_nlp-0.2.3.dist-info/licenses/LICENSE,sha256=m3jshBZIXKiBX6qhmhtJcLTVJ1N6BEkQGIflneXvpYg,19336
73
+ pelican_nlp-0.2.3.dist-info/METADATA,sha256=3GFutxyfVQ-AfzbVYiew-AFmj_6NyoLIuF6y0bqRLqw,5562
74
+ pelican_nlp-0.2.3.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
75
+ pelican_nlp-0.2.3.dist-info/entry_points.txt,sha256=znlG0paAfju9P10UM3rm5HcCHoj4tarTllNpeaqH_gc,53
76
+ pelican_nlp-0.2.3.dist-info/top_level.txt,sha256=F0qlyqy5FCd3sTS_npUYPeLKN9_BZq6wD4qo9pI0xbg,12
77
+ pelican_nlp-0.2.3.dist-info/RECORD,,