reflexive 0.1.9__py3-none-any.whl → 1.0.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,128 +0,0 @@
1
-
2
- import logging,coloredlogs
3
- import pandas as pd
4
- import json
5
-
6
- from reflexive.common.parameters import Parameters
7
- from reflexive.common.util import Util
8
-
9
- coloredlogs.install(level='INFO')
10
-
11
- class General:
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
- def __init__(self,parameters:Parameters):
16
- #print(parameters)
17
- self.__parameters = parameters.all_parameters()
18
- self.logger.debug(f"Parameters: {self.__parameters}")
19
-
20
-
21
- def general_analytics(self,df):
22
- util = Util()
23
- custom_df = df.copy()
24
- custom_df["text_length"] = df.text.apply(lambda x: len(x))
25
- if (len(custom_df)>1):
26
- custom_df["text_scaled"] = util.scale_min_max(custom_df[['text_length']])
27
- else:
28
- custom_df["text_scaled"] = 1
29
- return custom_df
30
-
31
-
32
- def remove_IQR_outliers(self,df):
33
- tempdf = df.copy()
34
- # Calculate text length
35
- tempdf["text_length"] = tempdf.text.apply(lambda t: len(t))
36
- fence = Util.outlier_fence(tempdf.text_length)
37
- print(fence)
38
- # Check change with removed outliers
39
- checkdf = tempdf[tempdf.text_length<fence['UPPER']]
40
- checkdf.reset_index(drop=True,inplace=True)
41
- print("Original:",len(tempdf))
42
- print(tempdf.describe())
43
- print()
44
- print("Outliers:",len(tempdf)-len(checkdf))
45
- print()
46
- print("No outliers:",len(checkdf))
47
- print(checkdf.describe())
48
- return checkdf
49
-
50
- # Parse text for domain terms
51
- def parse_domain_terms(self,text,domain_terms):
52
- matched_terms = {}
53
- for dtk,dtv in domain_terms.items():
54
- matched_terms[dtk] = []
55
- for term in dtv:
56
- if term[0]=='_': #acronym - treat as whole word
57
- regex = r"\b{}\b".format(term[1:])
58
- matches = re.findall(regex,str.lower(text))
59
- if len(matches)>0:
60
- matched_terms[dtk].append((term[1:],len(matches)))
61
- else:
62
- count = str.lower(text).count(term)
63
- if count > 0:
64
- matched_terms[dtk].append((term,count))
65
- return matched_terms
66
-
67
-
68
- def get_top_ngrams(self,text_series,min_val=3):
69
- ngrams = {}
70
- for text in text_series:
71
- self.__ngrams345(text,ngrams)
72
- #print("Generated 3,4,5 ngrams:", len(ngrams))
73
- f_ngrams = self.filter_dict_by_value(ngrams,min_val)
74
- return self.sort_dict_by_value(f_ngrams)
75
-
76
- def get_top_ngrams_for_text(self,text,top_ngrams):
77
- ngrams = self.__ngrams345(text,{})
78
- return {key: ngrams[key] for key in top_ngrams.keys() if key in ngrams}
79
-
80
- def ngram_counts(self,ref_top_ngrams):
81
- return sum(ref_top_ngrams.values())
82
-
83
- # Given text and number of terms, create ngrams from the text
84
- def __make_ngrams(self,text, n=1):
85
- # Replace all none alphanumeric characters with spaces
86
- s = re.sub(r'[^a-zA-Z0-9\s]', ' ', text.lower())
87
-
88
- tokens = [token for token in s.split(" ") if token != ""]
89
- ngrams = zip(*[tokens[i:] for i in range(n)])
90
- return [" ".join(ngram) for ngram in ngrams]
91
-
92
- # Generate 3,4,5 -grams
93
- def __ngrams345(self,text,ngrams):
94
- ngrams3 = self.__make_ngrams(text,3)
95
- for n in ngrams3:
96
- ngrams[n] = ngrams.get(n,0)+1
97
- ngrams4 = self.__make_ngrams(text,4)
98
- for n in ngrams4:
99
- ngrams[n] = ngrams.get(n,0)+1
100
- ngrams5 = self.__make_ngrams(text,5)
101
- for n in ngrams5:
102
- ngrams[n] = ngrams.get(n,0)+1
103
- return ngrams
104
-
105
-
106
- # Count domain terms
107
- def count_domain_terms(self,terms):
108
- counts = {}
109
- for k,v in terms.items():
110
- for term in v:
111
- counts[k] = counts.setdefault(k,0) + term[1]
112
- return counts
113
-
114
-
115
- # Ratio between action POS and object POS
116
- def action_object_ratio(self,pos_ratios,action_pos = ['VERB'],object_pos = ['NOUN','PROPN']):
117
- ap = [s[1] for s in pos_ratios if s[0] in action_pos]
118
- if ap:
119
- aps = sum(ap)
120
- else:
121
- aps = 0
122
- op = [s[1] for s in pos_ratios if s[0] in object_pos]
123
- if op:
124
- ops = sum(op)
125
- else:
126
- ops = 1 #avoid divide zero error - only happens with aps of 1
127
- #print("aps",aps,"ops",ops)
128
- return aps/ops
@@ -1,124 +0,0 @@
1
- #
2
-
3
- import logging,coloredlogs
4
- import pandas as pd
5
- import json
6
-
7
- from reflexive.common.parameters import Parameters
8
- from reflexive.common.local import Local
9
- from reflexive.common.util import Util
10
- from reflexive.aws_connect.s3 import S3
11
- from reflexive.aws_connect.comprehend import Comprehend
12
-
13
-
14
- coloredlogs.install(level='INFO')
15
-
16
- class ReflexiveExpressions:
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
- def __init__(self,parameters:Parameters,aws_s3:S3,local:Local,comprehend:Comprehend):
21
- #print(parameters)
22
- self.__params = parameters
23
- self.__parameters = parameters.all_parameters()
24
- self.logger.debug(f"Parameters: {self.__parameters}")
25
- self.prefix = self.__parameters['prefix']
26
- self.postfix = self.__parameters['postfix']
27
- self.local_path = self.__parameters['local_path']
28
- self.__s3 = aws_s3
29
- self.__local = local
30
- self.__comprehend = comprehend
31
-
32
-
33
- ######## REFLEXIVE EXPRESSION ANALYSIS FUNCTIONS
34
-
35
- def analyse_reflexive_expressions(self,df): #,s3_bucket_name,access_role_arn,entity_recogniser_arn):
36
- #self.__bucket_name = s3_bucket_name
37
- text = df.text.replace('\r\n','\n') # Comprehend treats \r\n as one character
38
- # Upload reflections to S3 for analysis
39
- self.__s3.upload_docs(text)
40
-
41
- # Save a copy of reflections locally for offline analysis
42
- self.__local.save_docs(text)
43
-
44
- # Submit the job
45
- return self.__comprehend.submit_custom_entity_job("reflexive_expressions_analysis") #submitReflexiveExpressionsJob(access_role_arn, entity_recogniser_arn)
46
-
47
- def check_job_status(self):
48
- return self.__comprehend.check_job_status()
49
-
50
- def get_job_details(self):
51
- return self.__comprehend.get_job_details()
52
-
53
- def download_and_extract(self):
54
- local_output_dir = f"{self.local_path}{self.prefix}output{self.postfix}"
55
- job_details = self.get_job_details()
56
- s3Uri = job_details['OutputDataConfig']['S3Uri']
57
- return self.__s3.results_download_save_extract(s3Uri,local_output_dir)
58
-
59
- def extractAnalysisFromResults(self,results):
60
- analysis_output = dict()
61
- for result in results:
62
- j = json.loads(result)
63
- #print(j)
64
- idx = j["File"].split('_')[-1].split('.')[0]
65
- analysis_output[int(idx)] = j["Entities"]
66
- return analysis_output
67
-
68
- def add_to_dataframe(self,df,results):
69
- # Extract analysis from raw results
70
- analysis_output = self.extractAnalysisFromResults(results)
71
- # Add results to dataframe
72
- results_df = df.copy()
73
- results_df['reflexiveResults'] = pd.Series(analysis_output)
74
- return results_df
75
-
76
- def reflexive_analytics(self,df):
77
- util = Util()
78
- custom_df = df.copy()
79
- # custom_df["text_length"] = df.text.apply(lambda x: len(x))
80
- # if (len(custom_df)>1):
81
- # custom_df["text_scaled"] = util.scale_min_max(custom_df[['text_length']])
82
- # else:
83
- # custom_df["text_scaled"] = 1
84
- custom_df["reflexive_results"] = df.reflexiveResults
85
- # The expressions and their reflexive expression labels
86
- custom_df["reflexive_expressions"] = df.reflexiveResults.apply(self.parse_reflexiveResults)
87
- # The counts for each labels
88
- custom_df["reflexive_counts"] = custom_df.reflexive_expressions.apply(util.count_labels)
89
- # Ratios between reflexive expressions
90
- custom_df["reflexive_ratio"] = custom_df.reflexive_counts.apply(util.ratios)
91
- # Ratio vector
92
- custom_df['ratio_vector'] = custom_df.reflexive_ratio.apply(self.make_ratio_vector)
93
- # Get the diversity of reflexive types - out of 8 possible types
94
- custom_df["reflexive_type_diversity"] = custom_df.reflexive_counts.apply(lambda x: len(x)/8)
95
- # A total of all labels
96
- custom_df["reflexive_total"] = custom_df.reflexive_counts.apply(util.tuple_values_total)
97
- # MinMax scale the reflexive_total
98
- if (len(custom_df)>1):
99
- custom_df["reflexive_scaled"] = util.scale_min_max(custom_df[['reflexive_total']])
100
- else:
101
- custom_df["reflexive_scaled"] = 1
102
- # Normalise based on text_scaled
103
- custom_df['reflexive_norm'] = util.normalise_scaled(custom_df,'reflexive_scaled')
104
- return custom_df
105
-
106
-
107
- # Parse reflexive results - include all above threshold
108
- def parse_reflexiveResults(self,reflexiveResults,threshold=0.5):
109
- final_refs = list()
110
- for ref in reflexiveResults:
111
- if ref['Score'] > threshold:
112
- final_refs.append((str.lower(ref['Text']),ref['Type']))
113
- return final_refs
114
-
115
- # Function for creating a vector out of reflexive ratio - could be used for others
116
- def make_ratio_vector(self,ratio_list,ref_codes = ['RR','ER','VR','AR','EP','AF','CN','EV']):
117
- ratio_dict = dict(ratio_list)
118
- vec = []
119
- for rc in ref_codes:
120
- if rc in ratio_dict.keys():
121
- vec.append(ratio_dict[rc])
122
- else:
123
- vec.append(0)
124
- return vec
File without changes
@@ -1,205 +0,0 @@
1
- #
2
-
3
-
4
- import boto3
5
- import time
6
- import json
7
- import pandas as pd
8
-
9
- from reflexive.common.parameters import Parameters
10
- from reflexive.common.util import Util
11
- from reflexive.aws_connect.s3 import S3
12
-
13
- import logging
14
- try:
15
- import coloredlogs
16
- coloredlogs.install(level='INFO')
17
- except:
18
- print("Colored logs not available")
19
-
20
- class Comprehend:
21
-
22
- logger = logging.getLogger(__name__)
23
-
24
- def __init__(self,parameters:Parameters):
25
- #print(parameters)
26
- self.__parameters = parameters.all_parameters()
27
- self.logger.debug(f"Parameters: {self.__parameters}")
28
- self.region = self.__parameters['region']
29
- self.access_role_arn = self.__parameters['comprehend_access_role_arn']
30
- self.entity_recogniser_arn = self.__parameters['reflexive_entity_arn']
31
- self.local_path = self.__parameters['local_path']
32
- self.prefix = self.__parameters['prefix']
33
- self.postfix = self.__parameters['postfix']
34
- self.bucket_name = self.__parameters["bucket_name"]
35
- self.files_folder = f"{self.prefix}files{self.postfix}"
36
- self.results_folder = f"{self.prefix}results{self.postfix}"
37
- self.input_uri = f"s3://{self.bucket_name}/{self.files_folder}/{self.prefix}"
38
- self.output_uri = f"s3://{self.bucket_name}/{self.results_folder}/"
39
- self.analysis_types = self.__parameters['analysis_types']
40
- # create client
41
- try:
42
- self.logger.debug(f"Region:{self.region}")
43
- self.__comp_client = boto3.client(service_name='comprehend',region_name=self.region)
44
- except Exception as err:
45
- self.logger.error("Unable to create Comprehend client: ",err)
46
-
47
-
48
- def client(self):
49
- return self.__comp_client
50
-
51
-
52
- #### CUSTOM ENTITY
53
-
54
- def submit_custom_entity_job(self,job_name): #access_role_arn,entity_recogniser_arn):
55
- job_str = f"{self.prefix}{job_name}{self.postfix}"
56
-
57
- response = self.__comp_client.start_entities_detection_job(
58
- InputDataConfig={
59
- 'S3Uri': self.input_uri,
60
- 'InputFormat': 'ONE_DOC_PER_FILE'
61
- },
62
- OutputDataConfig={
63
- 'S3Uri': self.output_uri
64
- },
65
- DataAccessRoleArn=self.access_role_arn,
66
- JobName=job_str,
67
- EntityRecognizerArn=self.entity_recogniser_arn,
68
- LanguageCode='en'
69
- )
70
- self.job_id = response['JobId']
71
- return response
72
-
73
- # Check job status
74
- def check_job_status(self):
75
- job_status = self.__comp_client.describe_entities_detection_job(
76
- JobId=self.job_id
77
- )
78
- self.__job_properties = job_status['EntitiesDetectionJobProperties']
79
- return self.__job_properties['JobStatus']
80
-
81
- def get_job_details(self):
82
- return self.__job_properties
83
-
84
-
85
- # Use AWS comprehend to get bulk key phrases from single batch of chunked text
86
- def get_single_batch_analysis(self,index,chunk):
87
- comprehend = self.client()
88
- results = {}
89
- print("Analysing chunk",index)
90
- print(" . key_phrase")
91
- kpresult = comprehend.batch_detect_key_phrases(TextList=chunk,LanguageCode='en')
92
- results['KeyPhraseResults'] = kpresult
93
- #key_phrase_results.append(kpresult)
94
- time.sleep(2)
95
- print(" . sentiment")
96
- senresult = comprehend.batch_detect_sentiment(TextList=chunk,LanguageCode='en')
97
- results['SentimentResults'] = senresult
98
- #sentiment_results.append(senresult)
99
- time.sleep(2)
100
- print(" . targeted_sentiment")
101
- tsenresult = comprehend.batch_detect_targeted_sentiment(TextList=chunk,LanguageCode='en')
102
- results['TargetedSentimentResults'] = tsenresult
103
- #target_sent_results.append(tsenresult)
104
- time.sleep(2)
105
- print(" . syntax")
106
- synresult = comprehend.batch_detect_syntax(TextList=chunk,LanguageCode='en')
107
- results['SyntaxResults'] = synresult
108
- #syntax_results.append(synresult)
109
- time.sleep(2)
110
- return results
111
-
112
-
113
- # Use AWS comprehend to get bulk key phrases from chunked text
114
- def get_multiple_batch_analysis(self,chunked_text):
115
- chunk_results = {}
116
- for key in self.analysis_types.keys():
117
- chunk_results[key] = []
118
-
119
- for idx,chunk in enumerate(chunked_text):
120
- if len(chunked_text) > 4999:
121
- print("WARNING: Text too long to analyse - index",idx,"skipped!")
122
- else:
123
- try:
124
- results = self.get_single_batch_analysis(index=idx,chunk=chunk)
125
- except(Exception) as error:
126
- print("There was an error with index",idx,error)
127
- finally:
128
- if results:
129
- for key in results.keys():
130
- chunk_results[key].append(results[key])
131
-
132
- return chunk_results
133
-
134
- # Take batched responses and concenate single lists of results, errors, and http responses
135
- def unbatch_results(self,result_type,results,batch_size=25):
136
- unbatched_results = {}
137
- unbatched_errors = {}
138
- batch_responses = {}
139
- for idx,batch in enumerate(results):
140
- #print("Response for batch:",idx)
141
- batch_responses[idx] = batch['ResponseMetadata']
142
- result_list = batch['ResultList']
143
- error_list = batch['ErrorList']
144
- for r in result_list:
145
- ridx = idx*batch_size + r['Index']
146
- rdata = r[result_type]
147
- unbatched_results[ridx] = rdata
148
- for e in error_list:
149
- eidx = e['Index']
150
- unbatched_errors[eidx] = 'ERROR' + e['ErrorCode'] + ': ' + e['ErrorMessage']
151
- unbatched = {}
152
- unbatched['results'] = unbatched_results
153
- unbatched['errors'] = unbatched_errors
154
- unbatched['responses'] = batch_responses
155
- return unbatched
156
-
157
-
158
-
159
- def check_long_text(self,df):
160
- # Check for long reflections (too long for batch analysis)
161
- long_df = df.copy()
162
- long_df = long_df[long_df.text.str.len()>5000]
163
- long_df['length'] = long_df.text.str.len()
164
- return long_df
165
-
166
-
167
- # def extract_result(self,result,batch,batch_params):
168
- # match batch:
169
- # case "KeyPhraseResults":
170
- # extracted = [r['Text'] for r in result if r['Score'] >= batch_params["min_score"]]
171
- # case "SentimentResults":
172
- # extracted = result
173
- # case "TargetedSentimentResults":
174
- # extracted = dict()
175
- # for r in result:
176
- # for mention in r['Mentions']:
177
- # if (mention['Score'] >= batch_params["min_score"]):
178
- # text = mention['Text']
179
- # key = f"{mention['Type']}_{mention['MentionSentiment']['Sentiment']}"
180
- # if key in extracted.keys():
181
- # extracted[key].add(text)
182
- # else:
183
- # extracted[key] = {text}
184
- # case "SyntaxResults":
185
- # tags = []
186
- # tokens = []
187
- # for r in result:
188
- # pos = r['PartOfSpeech']
189
- # tag = pos['Tag']
190
- # if pos['Score'] < batch_params["max_score"]:
191
- # tag = tag+"_?"
192
- # tags.append(tag)
193
- # tokens.append(r['Text'])
194
-
195
- # extracted = {'tokens':tokens,'tags':tags}
196
- # case other:
197
- # extracted = []
198
- # return extracted
199
-
200
-
201
-
202
-
203
-
204
-
205
-
@@ -1,89 +0,0 @@
1
- #
2
-
3
- import logging,coloredlogs
4
- import boto3
5
- import pandas as pd
6
- import tarfile
7
- import json
8
-
9
- from reflexive.common.parameters import Parameters
10
-
11
- coloredlogs.install(level='INFO')
12
-
13
- class S3:
14
-
15
- logger = logging.getLogger(__name__)
16
-
17
- def __init__(self,parameters:Parameters):
18
- #print(parameters)
19
- # set local parameters
20
- self.__parameters = parameters.all_parameters()
21
- self.logger.debug(f"Parameters: {self.__parameters}")
22
- self.region = self.__parameters['region']
23
- self.prefix = self.__parameters['prefix']
24
- self.postfix = self.__parameters['postfix']
25
- self.s3_access_point_arn = self.__parameters["s3_accesspoint_arn"]
26
- self.bucket_name = self.__parameters["bucket_name"]
27
- # create client
28
- try:
29
- self.logger.debug(f"Region:{self.region}")
30
- self.__s3_client = boto3.client(service_name='s3',region_name=self.region)
31
- except Exception as err:
32
- self.logger.error("Unable to create S3 client: ",err)
33
-
34
-
35
- # Return the S3 client
36
- def client(self):
37
- return self.__s3_client
38
-
39
- # Function to upload reflections to S3
40
- def upload_docs(self,text_series):
41
- #self.__prefix, self.__postfix
42
- files_folder = f"{self.prefix}files{self.postfix}"
43
-
44
- s3 = self.__s3_client
45
- s3ap = self.s3_access_point_arn
46
- self.logger.debug(f"ACCESS POINT: {s3ap}")
47
-
48
- self.logger.info(f"Uploading {len(text_series)} reflections to S3 ({files_folder})...")
49
- self.logger.debug(f"({s3ap}/{files_folder})")
50
- for idx in text_series.index:
51
- file_name = f"{self.prefix}{idx}.txt"
52
- file_body = text_series.iloc[idx]
53
- self.logger.info(f"Uploading {file_name}")
54
- #print(file_body)
55
- response = s3.put_object(Body=file_body,Bucket=s3ap,Key=f"{files_folder}/{file_name}")
56
- if response['ResponseMetadata']['HTTPStatusCode'] != 200:
57
- self.logger.error("------------------------------------------------------------")
58
- self.logger.error(f"ERROR: There was a problem with {file_name}")
59
- self.logger.error(response)
60
- self.logger.error("------------------------------------------------------------")
61
- else:
62
- self.logger.info('Success')
63
- self.logger.info("Finished uploading reflections to S3.")
64
- return response
65
-
66
- # download and save results
67
- def results_download_save_extract(self,s3Uri,local_file_path):
68
- s3 = self.__s3_client
69
- output_key = s3Uri.split(self.bucket_name)[1]
70
- # download from S3 to local path
71
- with open(f"{local_file_path}.tar.gz",'wb') as output_data:
72
- s3.download_fileobj(self.bucket_name,output_key[1:],output_data)
73
-
74
- # extract the files from tar archive
75
- files = list()
76
- with tarfile.open(f"{local_file_path}.tar.gz", "r:gz") as tf:
77
- for member in tf.getmembers():
78
- f = tf.extractfile(member)
79
- if f is not None:
80
- content = f.read()
81
- files.append(content)
82
- #print("Number of files:",len(files))
83
- # extract results and save and return
84
- raw_results = files[0].decode("utf-8").split('\n')
85
- raw_results.pop() # pop last item off as empty entry due to final \n
86
- with open(f"{local_file_path}.json","w") as fp:
87
- fp.write(json.dumps(raw_results))
88
- return raw_results
89
-
File without changes
reflexive/common/local.py DELETED
@@ -1,48 +0,0 @@
1
-
2
- import os
3
- import logging,coloredlogs
4
- #import boto3
5
- import pandas as pd
6
-
7
- from reflexive.common.parameters import Parameters
8
-
9
- coloredlogs.install(level='INFO')
10
-
11
- class Local:
12
-
13
- logger = logging.getLogger(__name__)
14
-
15
- def __init__(self,parameters:Parameters):
16
- self.__parameters = parameters.all_parameters()
17
- self.logger.debug(f"Parameters: {self.__parameters}")
18
- self.local_path = self.__parameters['local_path']
19
- self.local_dir = self.local_path
20
- self.logger.info(f"Path: {self.local_path}")
21
- self.prefix = self.__parameters['prefix']
22
- self.postfix = self.__parameters['postfix']
23
-
24
- def get_data_path_name(self,name,ext):
25
- return f"{self.local_path}{self.prefix}{name}{self.postfix}.{ext}"
26
-
27
- def set_sub_dir(self,sub_dir=None):
28
- # check dir sub_dir exists
29
- if sub_dir:
30
- self.local_dir = f"{self.local_path}{sub_dir}/"
31
- self.logger.debug(f"local_dir: {self.local_dir}")
32
- dirExists = os.path.exists(self.local_dir)
33
- if not dirExists:
34
- self.logger.info(f"Creating subdirectory: {self.local_dir}")
35
- os.makedirs(self.local_dir)
36
- else:
37
- self.local_dir = self.local_path
38
-
39
- def save_docs(self,text_series,):
40
- self.logger.info(f"Saving {len(text_series)} docs to {self.local_dir}...")
41
- for idx in text_series.index:
42
- file_name = f"{self.prefix}{idx}.txt"
43
- file_body = text_series.iloc[idx]
44
- self.logger.info(f"Saving {file_name}")
45
- with open(f"{self.local_dir}{file_name}",'w') as fp:
46
- fp.write(file_body)
47
- self.logger.info("Finished saving reflections locally.")
48
-
@@ -1,77 +0,0 @@
1
- # Store the parameters for connecting to AWS
2
-
3
- import os
4
- import logging,coloredlogs
5
- from datetime import datetime
6
- import boto3
7
-
8
- coloredlogs.install(level='INFO')
9
-
10
- class Parameters:
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
- def __init__(self,profile="default",name_prefix="refex",local_path=None,date_string=None):
15
- working_dir = os.getcwd()
16
- try:
17
- aws_session = boto3.Session(profile_name=profile)
18
- self.region = aws_session.region_name
19
- self.logger.info("AWS region:",self.region)
20
- self.access_key = aws_session.get_credentials().access_key
21
- self.logger.debug("AWS access key:",self.access_key)
22
- self.account_number = aws_session.client('sts').get_caller_identity().get('Account')
23
- except Exception as err:
24
- self.logger.error("Unable to retrieve AWS credentials",err)
25
- self.access_key = None
26
- self.region = None
27
- self.account_number = None
28
-
29
- # AWS specific
30
-
31
- self.analysis_types = {
32
- "KeyPhraseResults":"KeyPhrases",
33
- "SentimentResults":"Sentiment",
34
- "TargetedSentimentResults":"Entities",
35
- "SyntaxResults":"SyntaxTokens"
36
- }
37
- # General parameters
38
-
39
- if not local_path:
40
- self.logger.warning("No path supplied, creating a data directory...")
41
- #print(f"WD: {working_dir}")
42
- data_dir = working_dir+"/data/"
43
- if not os.path.exists(data_dir):
44
- os.makedirs(data_dir)
45
- self.logger.info("Created:",data_dir)
46
- self.local_path = data_dir
47
- else:
48
- data_dir = local_path
49
- if not os.path.exists(data_dir):
50
- self.logger.warning("Path does not exist, creating directory")
51
- os.makedirs(data_dir)
52
- self.logger.info(f"Created {data_dir}")
53
- self.local_path = local_path
54
- if not date_string:
55
- date_string = datetime.today().strftime('%Y%m%d')
56
- self.logger.warning(f"No date_string supplied, using today: {date_string}")
57
- self.date_string = date_string
58
- self.prefix = f"{name_prefix}_"
59
- self.postfix = f"-{date_string}"
60
- return None
61
-
62
- def all_parameters(self):
63
- return self.__dict__
64
-
65
- def set_s3_parameters(self,s3_access_point,bucket_name):
66
- self.s3_access_point = s3_access_point
67
- self.bucket_name = bucket_name
68
- self.s3_accesspoint_arn = f"arn:aws:s3:{self.region}:{self.account_number}:accesspoint/{s3_access_point}"
69
-
70
- def set_comprehend_parameters(self,comprehend_service_role_name):
71
- self.comprehend_service_role_name = comprehend_service_role_name
72
- self.comprehend_access_role_arn = f"arn:aws:iam::{self.account_number}:role/service-role/{comprehend_service_role_name}"
73
-
74
- def set_comprehend_custom_entity_parameters(self,reflexive_entity_name,reflexive_entity_version):
75
- self.reflexive_entity_name = reflexive_entity_name
76
- self.reflexive_entity_version = reflexive_entity_version
77
- self.reflexive_entity_arn = f"arn:aws:comprehend:{self.region}:{self.account_number}:entity-recognizer/{self.reflexive_entity_name}/version/{self.reflexive_entity_version}"