reflexive 1.2.8__py3-none-any.whl → 2.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,228 @@
1
+ import logging
2
+ import os
3
+ from functools import (partial)
4
+ import pandas as pd
5
+ from reflexive.service import AWS_service
6
+ from reflexive.analysis_functions import (
7
+ _clean_text,
8
+ _whitespace_cleaner,
9
+ _upload_text,
10
+ _s3_text_uploader,
11
+ _analyse_text,
12
+ _comprehend_cer_analyser,
13
+ _cer_job_progress,
14
+ _download_from_s3,
15
+ _extract_save_results,
16
+ _analysis_to_dataframe,
17
+ _add_offsets,
18
+ _offset_cleaner,
19
+ _orphan_joiner,
20
+ _add_res_sequence,
21
+ _add_res_interactions,
22
+ _add_res_weights,
23
+ _add_semantic_weights,
24
+ _add_res_adj_matrix,
25
+ _jaccard_similarity,
26
+ _cosine_similarity,
27
+ _date_string,
28
+ _create_local_dir
29
+ )
30
+ from reflexive.display_functions import (
31
+ _create_displacy_ents,
32
+ _render_annotated_text,
33
+ _create_graph,
34
+ _draw_graph
35
+ )
36
+
37
+ class RES_analyser:
38
+
39
+ aws_service:AWS_service
40
+ config:dict
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+ ranking_factors = {('RR','RR'):0.5, ('NR','RR'):0.9, ('AR','RR'):0.9, ('AF','RR'):0.8, ('EP','RR'):0.7, #Alpha sorted tuples
45
+ ('NR','NR'):0.6, ('AR','NR'):1.0, ('AF','NR'):0.8, ('EP','NR'):0.7,
46
+ ('AR','AR'):0.6, ('AF','AR'):0.8, ('AR','EP'):0.7,
47
+ ('AF','AF'):0.5, ('AF','EP'):0.6,
48
+ ('EP','EP'):0.5}
49
+
50
+
51
+ def __init__(self,parameters,prefix="res",postfix=None,dir="/data/")->None:
52
+
53
+ self.config = self._build_config(prefix,postfix,dir)
54
+
55
+ try:
56
+ self.aws_service = AWS_service(parameters)
57
+ self.aws_service.connect()
58
+ self.aws_service.get_s3_client()
59
+ self.aws_service.get_comprehend_client()
60
+ except Exception as e:
61
+ self.logger.error("There was an error setting up the AWS service: %s",repr(e))
62
+ else:
63
+ self.logger.info("AWS service setup successfully")
64
+
65
+ ############################################################
66
+ # MAIN ANALYSIS METHODS
67
+
68
+ # Import files for analysis
69
+ def import_files_to_df(self,subdir="data_source/"):
70
+ return pd.DataFrame.from_dict(self._open_files_in_subdir(subdir))
71
+
72
+ # Preprocess the text
73
+ # Uses clean_text() and whitespace_cleaner()
74
+ # Accepts a df, Returns a df
75
+
76
+ preprocess_text = partial(_clean_text,text_cleaner=_whitespace_cleaner)
77
+
78
+ # Upload text to s3
79
+ # Uses _upload_text()
80
+ # Accepts a df with 'text' and returns a df with 'uploaded' column
81
+
82
+ def upload_text_to_s3(self,df):
83
+ return _upload_text(df,uploader=_s3_text_uploader,res_analyser=self)
84
+
85
+ # Initiate comprehend custom entity analysis
86
+ # Uses _analyse_text()
87
+ # Accepts a df with 'uploaded' and returns a df with 'analysed' column
88
+
89
+ def analyse_text(self):
90
+ return _analyse_text(analyser=_comprehend_cer_analyser,res_analyser=self)
91
+
92
+ # Monitor analysis process
93
+ def monitor_job_progress(self,status,tz="UTC",useprint=False):
94
+ if useprint:
95
+ output = print
96
+ else:
97
+ output = self.logger.info
98
+ return _cer_job_progress(status,self.aws_service,tz,output)
99
+
100
+ # Download and extract results
101
+ def results_download_save_extract(self,status):
102
+ local_file = _download_from_s3(self,status)
103
+ return _extract_save_results(self,local_file)
104
+
105
+ # Add results to dataframe
106
+ def add_res_results(self,df,results):
107
+ return _analysis_to_dataframe(df,results)
108
+
109
+ # Get offsets from results and add to dataframe
110
+ process_offsets = partial(_add_offsets,offset_cleaner=_offset_cleaner,orphan_joiner=_orphan_joiner)
111
+
112
+ # Add text_display_ents
113
+ def add_text_display(self,df):
114
+ df['text_display_ents'] = [_create_displacy_ents(r.doc_name,r.text,r.offsets_clean) for i,r in df.iterrows()]
115
+ return df
116
+
117
+ # Create adjacency matrix from offsets
118
+ def add_interactions(self,df):
119
+ #Get RE sequence
120
+ df = _add_res_sequence(df)
121
+ df = _add_res_interactions(df)
122
+ df = _add_res_weights(df)
123
+ df = _add_semantic_weights(df,self.ranking_factors)
124
+ df = _add_res_adj_matrix(df)
125
+ return df
126
+
127
+ # def add_semantic_weights(self,df):
128
+ # return _add_semantic_weights(df,self.ranking_factors)
129
+
130
+ # Graph Jaccard Similarity
131
+ def get_jaccard_similarity(self,g1,g2):
132
+ return _jaccard_similarity(g1,g2)
133
+
134
+ def get_cosine_similarity(self,m1,m2):
135
+ return _cosine_similarity(m1,m2)
136
+
137
+ ############################################################
138
+ # UTILITY METHODS
139
+
140
+ # Create the config dict used by s3 and comprehend methods
141
+ def _build_config(self,prefix:str,postfix:str,dir:str)->dict[str,str]:
142
+ if not postfix:
143
+ postfix = _date_string()
144
+ return {"local_data_dir": _create_local_dir(dir,logger=self.logger),
145
+ "s3_source_dir":f"{prefix}_files_{postfix}",
146
+ "s3_target_dir":f"{prefix}_results_{postfix}"}
147
+
148
+ # Import files
149
+ def _open_files_in_subdir(self,subdir):
150
+ file_path = os.path.join(os.getcwd(),subdir)
151
+ file_names = []
152
+ texts = []
153
+ for file_name in sorted(os.listdir(file_path)):
154
+ file_names.append(file_name.split('.')[0])
155
+ with open(os.path.join(file_path,file_name),'r') as fp:
156
+ texts.append(fp.read())
157
+ return {"doc_name":file_names,"text":texts}
158
+
159
+
160
+
161
+
162
+ class RES_visualiser:
163
+
164
+ config:dict
165
+
166
+ logger = logging.getLogger(__name__)
167
+
168
+
169
+ def __init__(self)->None:
170
+ return None
171
+ # self.config = self._build_config()
172
+
173
+ # try:
174
+
175
+ # except Exception as e:
176
+ # self.logger.error("There was an error setting up: %s",repr(e))
177
+ # else:
178
+ # self.logger.info("setup successfully")
179
+
180
+ ############################################################
181
+ # MAIN VISUALISATION METHODS
182
+
183
+ def show_annotated_text(self,ents):
184
+ return _render_annotated_text(ents)
185
+
186
+ def save_annotated_text(self,name,ents,subdir="data/"):
187
+ file_path = os.path.join(os.getcwd(),subdir)
188
+ with open(f"{file_path}{name}.html","w") as fp:
189
+ fp.write(_render_annotated_text(ents,inline=False))
190
+
191
+ def create_res_graph(self,matrix=None,id=None):
192
+ return _create_graph(matrix,id)
193
+
194
+ def show_graph(self,graph):
195
+ return _draw_graph(graph,True)
196
+
197
+ def save_graph(self,graph):
198
+ return _draw_graph(graph,False)
199
+
200
+ ############################################################
201
+ # UTILITY METHODS
202
+
203
+ # def show_df_graphs(self,df,scale=10,inline=True) -> str:
204
+ # for am in df.res_adj_matrix:
205
+ # if scale > 1:
206
+ # sm = self._scale_adj_matrix(am,scale)
207
+ # else:
208
+ # sm = am
209
+ # g = self.create_res_graph(sm)
210
+ # _draw_graph(g,True)
211
+ # return ""
212
+
213
+ # def _scale_adj_matrix(self,adj_matrix,scale):
214
+ # new_adj = []
215
+ # for row in adj_matrix:
216
+ # new_row = []
217
+ # for c in row:
218
+ # new_row.append(round(c*scale,1))
219
+ # new_adj.append(new_row)
220
+ # return new_adj
221
+
222
+ # Create the config dict used by s3 and comprehend methods
223
+ # def _build_config()->dict[str,str]:
224
+ # return {"":"",
225
+ # "":"",
226
+ # "":""}
227
+
228
+
reflexive/service.py ADDED
@@ -0,0 +1,58 @@
1
+ import boto3
2
+ import logging
3
+
4
+ class AWS_service:
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ aws_params = {"profile":"default",
9
+ "s3_access_point":"",
10
+ "s3_bucket_name":"",
11
+ "comprehend_service_role_name":"",
12
+ "reflexive_entity_name":"",
13
+ "reflexive_entity_version":""
14
+ }
15
+
16
+ aws_session:boto3.Session = None
17
+ aws_account_number:str = ""
18
+ s3_client = None
19
+ comprehend_client = None
20
+
21
+ def __init__(self,params:dict[str,str])-> None:
22
+ self.aws_params = params
23
+ return None
24
+
25
+ def connect(self)->None:
26
+ try:
27
+ self.aws_session = boto3.Session(profile_name=self.aws_params['profile'])
28
+ self.aws_account_number = self.aws_session.client('sts').get_caller_identity().get('Account')
29
+ except Exception as e:
30
+ self.logger.error("Unable to create an AWS session: %s",repr(e))
31
+ else:
32
+ self.logger.info("AWS session created successfully")
33
+
34
+ def get_s3_client(self)->None:
35
+ try:
36
+ self.s3_client = self.aws_session.client(service_name='s3')
37
+ except Exception as e:
38
+ self.logger.error("Unable to get S3 client: %s",repr(e))
39
+ else:
40
+ self.logger.info("AWS s3 client obtained successfully")
41
+ return None
42
+
43
+ def get_comprehend_client(self)->None:
44
+ try:
45
+ self.comprehend_client = self.aws_session.client(service_name='comprehend')
46
+ except Exception as e:
47
+ self.logger.error("Unable to get comprehend client: %s",repr(e))
48
+ else:
49
+ self.logger.info("AWS comprehend client obtained successfully")
50
+ return None
51
+
52
+ # def region(self):
53
+ # return self.aws_session.region_name
54
+
55
+ # def access_key(self):
56
+ # return self.aws_session.get_credentials().access_key
57
+
58
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: reflexive
3
- Version: 1.2.8
3
+ Version: 2.1.0
4
4
  Summary: Supports AWS Reflexive Expressions Systems (RES) Analysis
5
5
  Project-URL: Repository, https://github.com/nlytx/reflexive.git
6
6
  Author-email: Andrew Gibson <andrew@nlytx.io>
@@ -0,0 +1,9 @@
1
+ reflexive/__init__.py,sha256=UkA6xvSu40CGYguUuiBqBMnPBkld0h0GR9UnZrptdZI,167
2
+ reflexive/analysis_functions.py,sha256=Qlod0svjBu9kRtGwPL-tPWnrYKLO-QTe07jSrxHZ6-k,14468
3
+ reflexive/display_functions.py,sha256=2LdkINOgmZfiV7nkW0x_IeimxW3J80YIOOnjX21-RJA,5699
4
+ reflexive/res_analysis.py,sha256=1W7Mph30s4qiV63CsN3IDxIydvYelOu28aVdsmF8gcs,7442
5
+ reflexive/service.py,sha256=O0MX2BCHTSNG_eW6LHBN1FOjaNTCGgYgh7vsk58NNAk,1927
6
+ reflexive-2.1.0.dist-info/METADATA,sha256=ulz3xZXF5W41XSq6ZEVF5Nfyi_18vBQ4ie6FA1F7bdo,574
7
+ reflexive-2.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
8
+ reflexive-2.1.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
9
+ reflexive-2.1.0.dist-info/RECORD,,