reflexive 1.2.7__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reflexive/__init__.py +3 -18
- reflexive/analysis_functions.py +383 -0
- reflexive/display_functions.py +165 -0
- reflexive/res_analysis.py +217 -0
- reflexive/service.py +58 -0
- {reflexive-1.2.7.dist-info → reflexive-2.0.0.dist-info}/METADATA +1 -1
- reflexive-2.0.0.dist-info/RECORD +9 -0
- reflexive/analyse.py +0 -430
- reflexive/cfg.py +0 -116
- reflexive/res.py +0 -225
- reflexive/res_functions.py +0 -62
- reflexive/session.py +0 -264
- reflexive/util.py +0 -127
- reflexive/visualise.py +0 -355
- reflexive-1.2.7.dist-info/RECORD +0 -12
- {reflexive-1.2.7.dist-info → reflexive-2.0.0.dist-info}/WHEEL +0 -0
- {reflexive-1.2.7.dist-info → reflexive-2.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
from functools import (partial)
|
|
4
|
+
import pandas as pd
|
|
5
|
+
from reflexive.service import AWS_service
|
|
6
|
+
from reflexive.analysis_functions import (
|
|
7
|
+
_clean_text,
|
|
8
|
+
_whitespace_cleaner,
|
|
9
|
+
_upload_text,
|
|
10
|
+
_s3_text_uploader,
|
|
11
|
+
_analyse_text,
|
|
12
|
+
_comprehend_cer_analyser,
|
|
13
|
+
_cer_job_progress,
|
|
14
|
+
_download_from_s3,
|
|
15
|
+
_extract_save_results,
|
|
16
|
+
_analysis_to_dataframe,
|
|
17
|
+
_add_offsets,
|
|
18
|
+
_offset_cleaner,
|
|
19
|
+
_orphan_joiner,
|
|
20
|
+
_add_res_sequence,
|
|
21
|
+
_add_res_interactions,
|
|
22
|
+
_add_res_weights,
|
|
23
|
+
_add_res_adj_matrix,
|
|
24
|
+
_jaccard_similarity,
|
|
25
|
+
_cosine_similarity,
|
|
26
|
+
_date_string,
|
|
27
|
+
_create_local_dir
|
|
28
|
+
)
|
|
29
|
+
from reflexive.display_functions import (
|
|
30
|
+
_create_displacy_ents,
|
|
31
|
+
_render_annotated_text,
|
|
32
|
+
_create_graph,
|
|
33
|
+
_draw_graph
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
class RES_analyser:
|
|
37
|
+
|
|
38
|
+
aws_service:AWS_service
|
|
39
|
+
config:dict
|
|
40
|
+
|
|
41
|
+
logger = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def __init__(self,parameters,prefix="res",postfix=None,dir="/data/")->None:
|
|
45
|
+
|
|
46
|
+
self.config = self._build_config(prefix,postfix,dir)
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
self.aws_service = AWS_service(parameters)
|
|
50
|
+
self.aws_service.connect()
|
|
51
|
+
self.aws_service.get_s3_client()
|
|
52
|
+
self.aws_service.get_comprehend_client()
|
|
53
|
+
except Exception as e:
|
|
54
|
+
self.logger.error("There was an error setting up the AWS service: %s",repr(e))
|
|
55
|
+
else:
|
|
56
|
+
self.logger.info("AWS service setup successfully")
|
|
57
|
+
|
|
58
|
+
############################################################
|
|
59
|
+
# MAIN ANALYSIS METHODS
|
|
60
|
+
|
|
61
|
+
# Import files for analysis
|
|
62
|
+
def import_files_to_df(self,subdir="data_source/"):
|
|
63
|
+
return pd.DataFrame.from_dict(self._open_files_in_subdir(subdir))
|
|
64
|
+
|
|
65
|
+
# Preprocess the text
|
|
66
|
+
# Uses clean_text() and whitespace_cleaner()
|
|
67
|
+
# Accepts a df, Returns a df
|
|
68
|
+
|
|
69
|
+
preprocess_text = partial(_clean_text,text_cleaner=_whitespace_cleaner)
|
|
70
|
+
|
|
71
|
+
# Upload text to s3
|
|
72
|
+
# Uses _upload_text()
|
|
73
|
+
# Accepts a df with 'text' and returns a df with 'uploaded' column
|
|
74
|
+
|
|
75
|
+
def upload_text_to_s3(self,df):
|
|
76
|
+
return _upload_text(df,uploader=_s3_text_uploader,res_analyser=self)
|
|
77
|
+
|
|
78
|
+
# Initiate comprehend custom entity analysis
|
|
79
|
+
# Uses _analyse_text()
|
|
80
|
+
# Accepts a df with 'uploaded' and returns a df with 'analysed' column
|
|
81
|
+
|
|
82
|
+
def analyse_text(self):
|
|
83
|
+
return _analyse_text(analyser=_comprehend_cer_analyser,res_analyser=self)
|
|
84
|
+
|
|
85
|
+
# Monitor analysis process
|
|
86
|
+
def monitor_job_progress(self,status,tz="UTC",useprint=False):
|
|
87
|
+
if useprint:
|
|
88
|
+
output = print
|
|
89
|
+
else:
|
|
90
|
+
output = self.logger.info
|
|
91
|
+
return _cer_job_progress(status,self.aws_service,tz,output)
|
|
92
|
+
|
|
93
|
+
# Download and extract results
|
|
94
|
+
def results_download_save_extract(self,status):
|
|
95
|
+
local_file = _download_from_s3(self,status)
|
|
96
|
+
return _extract_save_results(self,local_file)
|
|
97
|
+
|
|
98
|
+
# Add results to dataframe
|
|
99
|
+
def add_res_results(self,df,results):
|
|
100
|
+
return _analysis_to_dataframe(df,results)
|
|
101
|
+
|
|
102
|
+
# Get offsets from results and add to dataframe
|
|
103
|
+
process_offsets = partial(_add_offsets,offset_cleaner=_offset_cleaner,orphan_joiner=_orphan_joiner)
|
|
104
|
+
|
|
105
|
+
# Add text_display_ents
|
|
106
|
+
def add_text_display(self,df):
|
|
107
|
+
df['text_display_ents'] = [_create_displacy_ents(r.doc_name,r.text,r.offsets_clean) for i,r in df.iterrows()]
|
|
108
|
+
return df
|
|
109
|
+
|
|
110
|
+
# Create adjacency matrix from offsets
|
|
111
|
+
def add_interactions(self,df):
|
|
112
|
+
#Get RE sequence
|
|
113
|
+
df = _add_res_sequence(df)
|
|
114
|
+
df = _add_res_interactions(df)
|
|
115
|
+
df = _add_res_weights(df)
|
|
116
|
+
df = _add_res_adj_matrix(df)
|
|
117
|
+
return df
|
|
118
|
+
|
|
119
|
+
# Graph Jaccard Similarity
|
|
120
|
+
def get_jaccard_similarity(self,g1,g2):
|
|
121
|
+
return _jaccard_similarity(g1,g2)
|
|
122
|
+
|
|
123
|
+
def get_cosine_similarity(self,m1,m2):
|
|
124
|
+
return _cosine_similarity(m1,m2)
|
|
125
|
+
|
|
126
|
+
############################################################
|
|
127
|
+
# UTILITY METHODS
|
|
128
|
+
|
|
129
|
+
# Create the config dict used by s3 and comprehend methods
|
|
130
|
+
def _build_config(self,prefix:str,postfix:str,dir:str)->dict[str,str]:
|
|
131
|
+
if not postfix:
|
|
132
|
+
postfix = _date_string()
|
|
133
|
+
return {"local_data_dir": _create_local_dir(dir,logger=self.logger),
|
|
134
|
+
"s3_source_dir":f"{prefix}_files_{postfix}",
|
|
135
|
+
"s3_target_dir":f"{prefix}_results_{postfix}"}
|
|
136
|
+
|
|
137
|
+
# Import files
|
|
138
|
+
def _open_files_in_subdir(self,subdir):
|
|
139
|
+
file_path = os.path.join(os.getcwd(),subdir)
|
|
140
|
+
file_names = []
|
|
141
|
+
texts = []
|
|
142
|
+
for file_name in sorted(os.listdir(file_path)):
|
|
143
|
+
file_names.append(file_name.split('.')[0])
|
|
144
|
+
with open(os.path.join(file_path,file_name),'r') as fp:
|
|
145
|
+
texts.append(fp.read())
|
|
146
|
+
return {"doc_name":file_names,"text":texts}
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class RES_visualiser:
|
|
152
|
+
|
|
153
|
+
config:dict
|
|
154
|
+
|
|
155
|
+
logger = logging.getLogger(__name__)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def __init__(self)->None:
|
|
159
|
+
return None
|
|
160
|
+
# self.config = self._build_config()
|
|
161
|
+
|
|
162
|
+
# try:
|
|
163
|
+
|
|
164
|
+
# except Exception as e:
|
|
165
|
+
# self.logger.error("There was an error setting up: %s",repr(e))
|
|
166
|
+
# else:
|
|
167
|
+
# self.logger.info("setup successfully")
|
|
168
|
+
|
|
169
|
+
############################################################
|
|
170
|
+
# MAIN VISUALISATION METHODS
|
|
171
|
+
|
|
172
|
+
def show_annotated_text(self,ents):
|
|
173
|
+
return _render_annotated_text(ents)
|
|
174
|
+
|
|
175
|
+
def save_annotated_text(self,name,ents,subdir="data/"):
|
|
176
|
+
file_path = os.path.join(os.getcwd(),subdir)
|
|
177
|
+
with open(f"{file_path}{name}.html","w") as fp:
|
|
178
|
+
fp.write(_render_annotated_text(ents,inline=False))
|
|
179
|
+
|
|
180
|
+
def create_res_graph(self,matrix=None,id=None):
|
|
181
|
+
return _create_graph(matrix,id)
|
|
182
|
+
|
|
183
|
+
def show_graph(self,graph):
|
|
184
|
+
return _draw_graph(graph,True)
|
|
185
|
+
|
|
186
|
+
def save_graph(self,graph):
|
|
187
|
+
return _draw_graph(graph,False)
|
|
188
|
+
|
|
189
|
+
############################################################
|
|
190
|
+
# UTILITY METHODS
|
|
191
|
+
|
|
192
|
+
# def show_df_graphs(self,df,scale=10,inline=True) -> str:
|
|
193
|
+
# for am in df.res_adj_matrix:
|
|
194
|
+
# if scale > 1:
|
|
195
|
+
# sm = self._scale_adj_matrix(am,scale)
|
|
196
|
+
# else:
|
|
197
|
+
# sm = am
|
|
198
|
+
# g = self.create_res_graph(sm)
|
|
199
|
+
# _draw_graph(g,True)
|
|
200
|
+
# return ""
|
|
201
|
+
|
|
202
|
+
# def _scale_adj_matrix(self,adj_matrix,scale):
|
|
203
|
+
# new_adj = []
|
|
204
|
+
# for row in adj_matrix:
|
|
205
|
+
# new_row = []
|
|
206
|
+
# for c in row:
|
|
207
|
+
# new_row.append(round(c*scale,1))
|
|
208
|
+
# new_adj.append(new_row)
|
|
209
|
+
# return new_adj
|
|
210
|
+
|
|
211
|
+
# Create the config dict used by s3 and comprehend methods
|
|
212
|
+
# def _build_config()->dict[str,str]:
|
|
213
|
+
# return {"":"",
|
|
214
|
+
# "":"",
|
|
215
|
+
# "":""}
|
|
216
|
+
|
|
217
|
+
|
reflexive/service.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import boto3
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
class AWS_service:
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
aws_params = {"profile":"default",
|
|
9
|
+
"s3_access_point":"",
|
|
10
|
+
"s3_bucket_name":"",
|
|
11
|
+
"comprehend_service_role_name":"",
|
|
12
|
+
"reflexive_entity_name":"",
|
|
13
|
+
"reflexive_entity_version":""
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
aws_session:boto3.Session = None
|
|
17
|
+
aws_account_number:str = ""
|
|
18
|
+
s3_client = None
|
|
19
|
+
comprehend_client = None
|
|
20
|
+
|
|
21
|
+
def __init__(self,params:dict[str,str])-> None:
|
|
22
|
+
self.aws_params = params
|
|
23
|
+
return None
|
|
24
|
+
|
|
25
|
+
def connect(self)->None:
|
|
26
|
+
try:
|
|
27
|
+
self.aws_session = boto3.Session(profile_name=self.aws_params['profile'])
|
|
28
|
+
self.aws_account_number = self.aws_session.client('sts').get_caller_identity().get('Account')
|
|
29
|
+
except Exception as e:
|
|
30
|
+
self.logger.error("Unable to create an AWS session: %s",repr(e))
|
|
31
|
+
else:
|
|
32
|
+
self.logger.info("AWS session created successfully")
|
|
33
|
+
|
|
34
|
+
def get_s3_client(self)->None:
|
|
35
|
+
try:
|
|
36
|
+
self.s3_client = self.aws_session.client(service_name='s3')
|
|
37
|
+
except Exception as e:
|
|
38
|
+
self.logger.error("Unable to get S3 client: %s",repr(e))
|
|
39
|
+
else:
|
|
40
|
+
self.logger.info("AWS s3 client obtained successfully")
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
def get_comprehend_client(self)->None:
|
|
44
|
+
try:
|
|
45
|
+
self.comprehend_client = self.aws_session.client(service_name='comprehend')
|
|
46
|
+
except Exception as e:
|
|
47
|
+
self.logger.error("Unable to get comprehend client: %s",repr(e))
|
|
48
|
+
else:
|
|
49
|
+
self.logger.info("AWS comprehend client obtained successfully")
|
|
50
|
+
return None
|
|
51
|
+
|
|
52
|
+
# def region(self):
|
|
53
|
+
# return self.aws_session.region_name
|
|
54
|
+
|
|
55
|
+
# def access_key(self):
|
|
56
|
+
# return self.aws_session.get_credentials().access_key
|
|
57
|
+
|
|
58
|
+
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
reflexive/__init__.py,sha256=UkA6xvSu40CGYguUuiBqBMnPBkld0h0GR9UnZrptdZI,167
|
|
2
|
+
reflexive/analysis_functions.py,sha256=cVTQtQnQigMJSCoQOtXI0_tyX49Re96Uz-ubx4UToUw,13966
|
|
3
|
+
reflexive/display_functions.py,sha256=2LdkINOgmZfiV7nkW0x_IeimxW3J80YIOOnjX21-RJA,5699
|
|
4
|
+
reflexive/res_analysis.py,sha256=jpb2Fh_jLEZmi4t6I6Q9nVE9LYRc_YJMGx12_co_69Y,6841
|
|
5
|
+
reflexive/service.py,sha256=O0MX2BCHTSNG_eW6LHBN1FOjaNTCGgYgh7vsk58NNAk,1927
|
|
6
|
+
reflexive-2.0.0.dist-info/METADATA,sha256=60ZmW9iHzBxs3BZeUm6KrsRq3LFwDQB6xGS_zuRaYoo,574
|
|
7
|
+
reflexive-2.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
8
|
+
reflexive-2.0.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
9
|
+
reflexive-2.0.0.dist-info/RECORD,,
|