reflexive 1.2.7__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,217 @@
1
+ import logging
2
+ import os
3
+ from functools import (partial)
4
+ import pandas as pd
5
+ from reflexive.service import AWS_service
6
+ from reflexive.analysis_functions import (
7
+ _clean_text,
8
+ _whitespace_cleaner,
9
+ _upload_text,
10
+ _s3_text_uploader,
11
+ _analyse_text,
12
+ _comprehend_cer_analyser,
13
+ _cer_job_progress,
14
+ _download_from_s3,
15
+ _extract_save_results,
16
+ _analysis_to_dataframe,
17
+ _add_offsets,
18
+ _offset_cleaner,
19
+ _orphan_joiner,
20
+ _add_res_sequence,
21
+ _add_res_interactions,
22
+ _add_res_weights,
23
+ _add_res_adj_matrix,
24
+ _jaccard_similarity,
25
+ _cosine_similarity,
26
+ _date_string,
27
+ _create_local_dir
28
+ )
29
+ from reflexive.display_functions import (
30
+ _create_displacy_ents,
31
+ _render_annotated_text,
32
+ _create_graph,
33
+ _draw_graph
34
+ )
35
+
36
+ class RES_analyser:
37
+
38
+ aws_service:AWS_service
39
+ config:dict
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+
44
+ def __init__(self,parameters,prefix="res",postfix=None,dir="/data/")->None:
45
+
46
+ self.config = self._build_config(prefix,postfix,dir)
47
+
48
+ try:
49
+ self.aws_service = AWS_service(parameters)
50
+ self.aws_service.connect()
51
+ self.aws_service.get_s3_client()
52
+ self.aws_service.get_comprehend_client()
53
+ except Exception as e:
54
+ self.logger.error("There was an error setting up the AWS service: %s",repr(e))
55
+ else:
56
+ self.logger.info("AWS service setup successfully")
57
+
58
+ ############################################################
59
+ # MAIN ANALYSIS METHODS
60
+
61
+ # Import files for analysis
62
+ def import_files_to_df(self,subdir="data_source/"):
63
+ return pd.DataFrame.from_dict(self._open_files_in_subdir(subdir))
64
+
65
+ # Preprocess the text
66
+ # Uses clean_text() and whitespace_cleaner()
67
+ # Accepts a df, Returns a df
68
+
69
+ preprocess_text = partial(_clean_text,text_cleaner=_whitespace_cleaner)
70
+
71
+ # Upload text to s3
72
+ # Uses _upload_text()
73
+ # Accepts a df with 'text' and returns a df with 'uploaded' column
74
+
75
+ def upload_text_to_s3(self,df):
76
+ return _upload_text(df,uploader=_s3_text_uploader,res_analyser=self)
77
+
78
+ # Initiate comprehend custom entity analysis
79
+ # Uses _analyse_text()
80
+ # Accepts a df with 'uploaded' and returns a df with 'analysed' column
81
+
82
+ def analyse_text(self):
83
+ return _analyse_text(analyser=_comprehend_cer_analyser,res_analyser=self)
84
+
85
+ # Monitor analysis process
86
+ def monitor_job_progress(self,status,tz="UTC",useprint=False):
87
+ if useprint:
88
+ output = print
89
+ else:
90
+ output = self.logger.info
91
+ return _cer_job_progress(status,self.aws_service,tz,output)
92
+
93
+ # Download and extract results
94
+ def results_download_save_extract(self,status):
95
+ local_file = _download_from_s3(self,status)
96
+ return _extract_save_results(self,local_file)
97
+
98
+ # Add results to dataframe
99
+ def add_res_results(self,df,results):
100
+ return _analysis_to_dataframe(df,results)
101
+
102
+ # Get offsets from results and add to dataframe
103
+ process_offsets = partial(_add_offsets,offset_cleaner=_offset_cleaner,orphan_joiner=_orphan_joiner)
104
+
105
+ # Add text_display_ents
106
+ def add_text_display(self,df):
107
+ df['text_display_ents'] = [_create_displacy_ents(r.doc_name,r.text,r.offsets_clean) for i,r in df.iterrows()]
108
+ return df
109
+
110
+ # Create adjacency matrix from offsets
111
+ def add_interactions(self,df):
112
+ #Get RE sequence
113
+ df = _add_res_sequence(df)
114
+ df = _add_res_interactions(df)
115
+ df = _add_res_weights(df)
116
+ df = _add_res_adj_matrix(df)
117
+ return df
118
+
119
+ # Graph Jaccard Similarity
120
+ def get_jaccard_similarity(self,g1,g2):
121
+ return _jaccard_similarity(g1,g2)
122
+
123
+ def get_cosine_similarity(self,m1,m2):
124
+ return _cosine_similarity(m1,m2)
125
+
126
+ ############################################################
127
+ # UTILITY METHODS
128
+
129
+ # Create the config dict used by s3 and comprehend methods
130
+ def _build_config(self,prefix:str,postfix:str,dir:str)->dict[str,str]:
131
+ if not postfix:
132
+ postfix = _date_string()
133
+ return {"local_data_dir": _create_local_dir(dir,logger=self.logger),
134
+ "s3_source_dir":f"{prefix}_files_{postfix}",
135
+ "s3_target_dir":f"{prefix}_results_{postfix}"}
136
+
137
+ # Import files
138
+ def _open_files_in_subdir(self,subdir):
139
+ file_path = os.path.join(os.getcwd(),subdir)
140
+ file_names = []
141
+ texts = []
142
+ for file_name in sorted(os.listdir(file_path)):
143
+ file_names.append(file_name.split('.')[0])
144
+ with open(os.path.join(file_path,file_name),'r') as fp:
145
+ texts.append(fp.read())
146
+ return {"doc_name":file_names,"text":texts}
147
+
148
+
149
+
150
+
151
+ class RES_visualiser:
152
+
153
+ config:dict
154
+
155
+ logger = logging.getLogger(__name__)
156
+
157
+
158
+ def __init__(self)->None:
159
+ return None
160
+ # self.config = self._build_config()
161
+
162
+ # try:
163
+
164
+ # except Exception as e:
165
+ # self.logger.error("There was an error setting up: %s",repr(e))
166
+ # else:
167
+ # self.logger.info("setup successfully")
168
+
169
+ ############################################################
170
+ # MAIN VISUALISATION METHODS
171
+
172
+ def show_annotated_text(self,ents):
173
+ return _render_annotated_text(ents)
174
+
175
+ def save_annotated_text(self,name,ents,subdir="data/"):
176
+ file_path = os.path.join(os.getcwd(),subdir)
177
+ with open(f"{file_path}{name}.html","w") as fp:
178
+ fp.write(_render_annotated_text(ents,inline=False))
179
+
180
+ def create_res_graph(self,matrix=None,id=None):
181
+ return _create_graph(matrix,id)
182
+
183
+ def show_graph(self,graph):
184
+ return _draw_graph(graph,True)
185
+
186
+ def save_graph(self,graph):
187
+ return _draw_graph(graph,False)
188
+
189
+ ############################################################
190
+ # UTILITY METHODS
191
+
192
+ # def show_df_graphs(self,df,scale=10,inline=True) -> str:
193
+ # for am in df.res_adj_matrix:
194
+ # if scale > 1:
195
+ # sm = self._scale_adj_matrix(am,scale)
196
+ # else:
197
+ # sm = am
198
+ # g = self.create_res_graph(sm)
199
+ # _draw_graph(g,True)
200
+ # return ""
201
+
202
+ # def _scale_adj_matrix(self,adj_matrix,scale):
203
+ # new_adj = []
204
+ # for row in adj_matrix:
205
+ # new_row = []
206
+ # for c in row:
207
+ # new_row.append(round(c*scale,1))
208
+ # new_adj.append(new_row)
209
+ # return new_adj
210
+
211
+ # Create the config dict used by s3 and comprehend methods
212
+ # def _build_config()->dict[str,str]:
213
+ # return {"":"",
214
+ # "":"",
215
+ # "":""}
216
+
217
+
reflexive/service.py ADDED
@@ -0,0 +1,58 @@
1
+ import boto3
2
+ import logging
3
+
4
+ class AWS_service:
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ aws_params = {"profile":"default",
9
+ "s3_access_point":"",
10
+ "s3_bucket_name":"",
11
+ "comprehend_service_role_name":"",
12
+ "reflexive_entity_name":"",
13
+ "reflexive_entity_version":""
14
+ }
15
+
16
+ aws_session:boto3.Session = None
17
+ aws_account_number:str = ""
18
+ s3_client = None
19
+ comprehend_client = None
20
+
21
+ def __init__(self,params:dict[str,str])-> None:
22
+ self.aws_params = params
23
+ return None
24
+
25
+ def connect(self)->None:
26
+ try:
27
+ self.aws_session = boto3.Session(profile_name=self.aws_params['profile'])
28
+ self.aws_account_number = self.aws_session.client('sts').get_caller_identity().get('Account')
29
+ except Exception as e:
30
+ self.logger.error("Unable to create an AWS session: %s",repr(e))
31
+ else:
32
+ self.logger.info("AWS session created successfully")
33
+
34
+ def get_s3_client(self)->None:
35
+ try:
36
+ self.s3_client = self.aws_session.client(service_name='s3')
37
+ except Exception as e:
38
+ self.logger.error("Unable to get S3 client: %s",repr(e))
39
+ else:
40
+ self.logger.info("AWS s3 client obtained successfully")
41
+ return None
42
+
43
+ def get_comprehend_client(self)->None:
44
+ try:
45
+ self.comprehend_client = self.aws_session.client(service_name='comprehend')
46
+ except Exception as e:
47
+ self.logger.error("Unable to get comprehend client: %s",repr(e))
48
+ else:
49
+ self.logger.info("AWS comprehend client obtained successfully")
50
+ return None
51
+
52
+ # def region(self):
53
+ # return self.aws_session.region_name
54
+
55
+ # def access_key(self):
56
+ # return self.aws_session.get_credentials().access_key
57
+
58
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: reflexive
3
- Version: 1.2.7
3
+ Version: 2.0.0
4
4
  Summary: Supports AWS Reflexive Expressions Systems (RES) Analysis
5
5
  Project-URL: Repository, https://github.com/nlytx/reflexive.git
6
6
  Author-email: Andrew Gibson <andrew@nlytx.io>
@@ -0,0 +1,9 @@
1
+ reflexive/__init__.py,sha256=UkA6xvSu40CGYguUuiBqBMnPBkld0h0GR9UnZrptdZI,167
2
+ reflexive/analysis_functions.py,sha256=cVTQtQnQigMJSCoQOtXI0_tyX49Re96Uz-ubx4UToUw,13966
3
+ reflexive/display_functions.py,sha256=2LdkINOgmZfiV7nkW0x_IeimxW3J80YIOOnjX21-RJA,5699
4
+ reflexive/res_analysis.py,sha256=jpb2Fh_jLEZmi4t6I6Q9nVE9LYRc_YJMGx12_co_69Y,6841
5
+ reflexive/service.py,sha256=O0MX2BCHTSNG_eW6LHBN1FOjaNTCGgYgh7vsk58NNAk,1927
6
+ reflexive-2.0.0.dist-info/METADATA,sha256=60ZmW9iHzBxs3BZeUm6KrsRq3LFwDQB6xGS_zuRaYoo,574
7
+ reflexive-2.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
8
+ reflexive-2.0.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
9
+ reflexive-2.0.0.dist-info/RECORD,,