reflexive 1.2.8__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
reflexive/res.py DELETED
@@ -1,225 +0,0 @@
1
- import json
2
- from pandas import DataFrame
3
- from spacy import displacy
4
- from time import sleep
5
- from reflexive import Config
6
- from reflexive import AWS
7
- from reflexive import S3
8
- from reflexive import Comprehend
9
- from reflexive import Nlp
10
- from reflexive import Display
11
- from reflexive import RES_graph
12
-
13
- class Res_analyse:
14
-
15
- config:Config
16
- aws:AWS
17
- s3:S3
18
- comprehend:Comprehend
19
- nlp:Nlp
20
-
21
- def __init__(self,aws_profile="default") -> None:
22
- return self._setup(aws_profile)
23
-
24
- def _setup(self,aws_profile) -> None:
25
- self.config = Config(aws_profile)
26
- self.config.set_parameters(name_prefix="RES")
27
- self.aws = AWS(self.config)
28
- return None
29
-
30
- def set_parameters(self,
31
- s3_access_point:str,
32
- s3_bucket_name:str,
33
- comprehend_service_role_name:str,
34
- reflexive_entity_name:str,
35
- reflexive_entity_version:str) -> str:
36
- self.config.set_s3_parameters(s3_access_point,s3_bucket_name)
37
- self.config.set_comprehend_parameters(comprehend_service_role_name)
38
- self.config.set_comprehend_custom_entity_parameters(reflexive_entity_name,reflexive_entity_version)
39
- params = self.config.get_parameters()
40
- return json.dumps(params, indent=2)
41
-
42
- def setup_aws(self) -> None:
43
- # Create a new S3 client
44
- self.s3 = S3(self.aws)
45
- # Create a new Comprehend client
46
- self.comprehend = Comprehend(self.aws)
47
- # Create an Nlp object to perform analysis on the text
48
- self.nlp = Nlp(self.aws)
49
- return None
50
-
51
- def get_basic_analytics(self,df:DataFrame) -> DataFrame:
52
-
53
- # Text length - this is needed for comprehend analytics
54
- df = self.nlp.text_length(df)
55
- #df = nlp.remove_IQR_outliers(df)
56
- # Comprehend analysis
57
- results = self.nlp.comprehend_analysis(self.comprehend,df)
58
- #print(results)
59
- errors = self.nlp.check_results(results)
60
- #print(errors)
61
- if errors=={}:
62
- print("No errors, so adding results to dataframe")
63
- df = self.nlp.add_results_to_df(results,df)
64
- df = self.nlp.comprehend_analytics(df)
65
- return df
66
-
67
- def get_reflexive_analytics(self,df:DataFrame) -> DataFrame:
68
- # Reflexive expression analysis
69
- response = self.nlp.analyse_reflexive_expressions(df,self.s3,self.comprehend)
70
- #print(response)
71
- job_id = self.comprehend.get_current_job_id()
72
- print("Job ID:",job_id)
73
- status = self.comprehend.check_job_status()
74
- print("Status:",status)
75
-
76
- # Get the details of the job
77
- # details = comp.get_job_details()
78
- # print("Job details:",details)
79
-
80
- inc = 0
81
- while status=="SUBMITTED" or status=="IN_PROGRESS":
82
- print("Waiting 10 seconds...")
83
- sleep(10)
84
- status = self.comprehend.check_job_status()
85
- print(f"Job status {inc}:",status)
86
- inc += 1
87
-
88
- # Download from S3 and extract results
89
- print("Downloading and extracting results...")
90
- results = self.comprehend.download_and_extract(self.s3)
91
- print("RESULTS:")
92
- print(results)
93
-
94
- # Extract output of analysis and add to df
95
- return self.nlp.add_to_dataframe(df,results)
96
-
97
- class Res_display:
98
-
99
- res_analyse:Res_analyse
100
- vis:Display
101
-
102
- def __init__(self,res:Res_analyse) -> None:
103
- return self._setup(res)
104
-
105
- def _setup(self,res:Res_analyse) -> None:
106
- self.res_analyse = res
107
- self.vis = Display(res.aws)
108
- return None
109
-
110
- def show_text(self,df:DataFrame,inline=True) -> str:
111
- df = self.vis.add_offsets(df)
112
- disp_data = self.vis.create_displacy(df)
113
- if inline:
114
- displacy.render(disp_data,manual=True,style="ent", options=self.res_analyse.config.display_options)
115
- html_out = "Set inline to false to produce HTML"
116
- else:
117
- html_out = displacy.render(disp_data,manual=True,style="ent", options=self.res_analyse.config.display_options,page=True,jupyter=False)
118
- return html_out
119
-
120
- def get_interactions(self,df:DataFrame) -> DataFrame:
121
- #Get RE sequence
122
- df = self._add_res_sequence(df)
123
- df = self._add_res_interactions(df)
124
- df = self._add_res_weights(df)
125
- df = self._add_res_adj_matrix(df)
126
- return df
127
-
128
- def show_graph(self,df:DataFrame,scale=10,inline=True) -> str:
129
- for am in df.res_adj_matrix:
130
- if scale > 1:
131
- sm = self._scale_adj_matrix(am,scale)
132
- else:
133
- sm = am
134
- g = RES_graph(sm)
135
- g.show()
136
- return ""
137
-
138
- def _scale_adj_matrix(self,adj_matrix,scale):
139
- new_adj = []
140
- for row in adj_matrix:
141
- new_row = []
142
- for c in row:
143
- new_row.append(round(c*scale,1))
144
- new_adj.append(new_row)
145
- return new_adj
146
-
147
- def _add_res_sequence(self,df):
148
- temp_df = df.copy()
149
- temp_df['res_sequence'] = temp_df.reflexive_expressions.apply(self._get_res_sequence)
150
- return temp_df
151
-
152
- def _add_res_interactions(self,df):
153
- temp_df = df.copy()
154
- temp_df['res_interactions'] = temp_df.res_sequence.apply(self._count_res_interactions)
155
- return temp_df
156
-
157
- def _add_res_weights(self,df):
158
- temp_df = df.copy()
159
- temp_df['res_weights'] = temp_df.res_interactions.apply(self._calc_res_weights)
160
- return temp_df
161
-
162
- def _add_res_adj_matrix(self,df):
163
- temp_df = df.copy()
164
- temp_df['res_adj_matrix'] = temp_df.res_weights.apply(self._create_adj_matrix)
165
- return temp_df
166
-
167
- def _get_res_sequence(self,reflexive_expressions):
168
- re_seq = [label for re,label in reflexive_expressions]
169
- res_seq = []
170
- # Need to substitute new RES labels for old RE labels
171
- for re in re_seq:
172
- if re=='ER' or re=='VR':
173
- res_seq.append('NR')
174
- elif re=='EV':
175
- res_seq.append('EP')
176
- elif re=='CN':
177
- res_seq.append('AF')
178
- else:
179
- res_seq.append(re)
180
- return res_seq
181
-
182
- def _empty_res_interactions(self) -> dict[tuple,int]:
183
- RE_types = ['RR','NR','AR','AF','EP']
184
- RE_interactions:dict[tuple,int] = dict()
185
- for t1 in RE_types:
186
- for t2 in RE_types:
187
- entry = tuple(sorted((t1,t2)))
188
- if entry not in RE_interactions.keys():
189
- RE_interactions[entry] = 0
190
- return RE_interactions
191
-
192
- def _count_res_interactions(self,re_sequence:list[str]) -> dict[tuple,int]:
193
- re_ints = self._empty_res_interactions()
194
- limit = len(re_sequence)-1
195
- for i,s in enumerate(re_sequence):
196
- if i < limit:
197
- rei = tuple(sorted((s,re_sequence[i+1])))
198
- #print(i,rei)
199
- re_ints[rei] += 1
200
- return re_ints
201
-
202
- def _calc_res_weights(self,interactions:dict[tuple,int])->dict[tuple,float]:
203
- max_count = max(interactions.values())
204
- weights = dict()
205
- for edge,count in interactions.items():
206
- weights[edge] = round(count/(max_count),2)
207
- return weights
208
-
209
-
210
- def _create_adj_matrix(self,weights:dict[tuple,float])->list[list[float]]:
211
- re_types = ["RR","NR","AR","AF","EP"]
212
- matrix = []
213
- for r in re_types:
214
- row = []
215
- for c in re_types:
216
- key = tuple(sorted((r,c)))
217
- #print(key)
218
- weight = weights.get(key,0)
219
- row.append(weight)
220
- matrix.append(row)
221
- return matrix
222
-
223
-
224
-
225
-
@@ -1,62 +0,0 @@
1
- # import text into dataframe
2
- # accepts either an iterable of strings, or an iterable over text files
3
- # returns a pandas series iterable of type string
4
-
5
- # clean text and calculate length
6
- # accepts an iterable of strings in form of pandas series of type string
7
- # returns a pandas series iterable of type int
8
-
9
-
10
- # chunk text and keep original index ref
11
- # accepts an iterable of dataframe rows
12
- # returns an iterable of dataframe rows with added column 'text_chunks' - list of strings
13
-
14
- # upload docs to s3 and save local copy - side effects
15
- # accepts an iterable of iterable of chunks (with ids)
16
- # returns an an iterable of s3 responses? URLs to S3 file?
17
-
18
- # initiate custom entity job on comprehend
19
- # no parameters
20
- # returns job id for checking status, and downloading
21
-
22
- # check status
23
- # accepts job id
24
- # returns status
25
-
26
- # download results
27
- # accepts job id
28
- # returns iterable of results
29
-
30
- # unpack results and load into dataframe
31
-
32
-
33
- # extract reflexive expressions into dataframe
34
-
35
-
36
- # get reflexive sequences
37
-
38
-
39
- # get interactions
40
-
41
-
42
- # create count adj matrix
43
-
44
-
45
- # create weighted adj matrix
46
-
47
-
48
- # save dataframe to file
49
-
50
-
51
- # visualise expressions in text
52
-
53
-
54
- # visualise reflexive sequence
55
-
56
-
57
- # visualise res graph
58
-
59
-
60
- #
61
- # Network analysis functions
62
- #
reflexive/session.py DELETED
@@ -1,264 +0,0 @@
1
-
2
- import boto3
3
- import time
4
- import tarfile
5
- import json
6
-
7
- from reflexive import cfg
8
-
9
- import logging
10
- #logging.basicConfig(level=logging.DEBUG)
11
- logger = logging.getLogger(__name__)
12
-
13
- class AWS:
14
-
15
- config = None
16
- aws_session = None
17
-
18
- def __init__(self,config:cfg.Config):
19
- # on initialisation create a new session with provided profile (or with default profile)
20
- #logger.error(config.get_parameters())
21
- if config is None:
22
- config = cfg.Config()
23
- self.config = config
24
- self.new_session()
25
-
26
- def get_parameters(self):
27
- return self.__dict__
28
-
29
- def new_session(self):
30
- logger.info("In new_session")
31
- try:
32
- self.aws_session = boto3.Session(profile_name=self.config.aws_profile)
33
- self.config.aws_region = self.aws_session.region_name
34
- self.config.aws_access_key = self.aws_session.get_credentials().access_key
35
- logger.info("Created new AWS session in region %s for profile: %s",self.config.aws_region,self.config.aws_profile)
36
-
37
- except Exception as e:
38
- logger.error("Unable to create an AWS session: %s",repr(e))
39
-
40
- try:
41
- self.config.aws_account_number = self.aws_session.client('sts').get_caller_identity().get('Account')
42
- logger.info("Retrieved account number from AWS")
43
- except Exception as e:
44
- logger.error("Unable to retrieve account number from AWS: %s",repr(e))
45
-
46
- return self.aws_session
47
-
48
-
49
- class S3:
50
-
51
- aws = None
52
- config = None
53
- __s3_client = None
54
-
55
- def __init__(self,aws:AWS):
56
- self.aws = aws
57
- self.config = self.aws.config
58
-
59
- # create client
60
- try:
61
- logger.debug(f"Region:{self.aws.aws_session.region_name}")
62
- self.__s3_client = aws.aws_session.client(service_name='s3')
63
- except Exception as err:
64
- logger.error("Unable to create S3 client: ",err)
65
-
66
- # Return the S3 client
67
- def client(self):
68
- return self.__s3_client
69
-
70
- # Function to upload reflections to S3
71
- def upload_docs(self,text_series):
72
-
73
- files_folder = f"{self.config.prefix}files{self.config.postfix}"
74
-
75
- s3 = self.__s3_client
76
- s3ap = self.config.s3_accesspoint_arn
77
- logger.debug(f"ACCESS POINT: {s3ap}")
78
-
79
- logger.info(f"Uploading {len(text_series)} reflections to S3 ({files_folder})...")
80
- logger.debug(f"({s3ap}/{files_folder})")
81
- for idx in text_series.index:
82
- file_name = f"{self.config.prefix}{idx}.txt"
83
- file_body = text_series.iloc[idx]
84
- logger.info(f"Uploading {file_name}")
85
- #print(file_body)
86
- response = s3.put_object(Body=file_body,Bucket=s3ap,Key=f"{files_folder}/{file_name}")
87
- if response['ResponseMetadata']['HTTPStatusCode'] != 200:
88
- logger.error("------------------------------------------------------------")
89
- logger.error(f"ERROR: There was a problem with {file_name}")
90
- logger.error(response)
91
- logger.error("------------------------------------------------------------")
92
- else:
93
- logger.info('Success')
94
- logger.info("Finished uploading reflections to S3.")
95
- return response
96
-
97
- # download and save results
98
- def results_download_save_extract(self,s3Uri,local_file_path):
99
- s3 = self.__s3_client
100
- output_key = s3Uri.split(self.config.s3_bucket_name)[1]
101
- # download from S3 to local path
102
- with open(f"{local_file_path}.tar.gz",'wb') as output_data:
103
- s3.download_fileobj(self.config.s3_bucket_name,output_key[1:],output_data)
104
-
105
- # extract the files from tar archive
106
- files = list()
107
- with tarfile.open(f"{local_file_path}.tar.gz", "r:gz") as tf:
108
- for member in tf.getmembers():
109
- f = tf.extractfile(member)
110
- if f is not None:
111
- content = f.read()
112
- files.append(content)
113
- #print("Number of files:",len(files))
114
- # extract results and save and return
115
- raw_results = files[0].decode("utf-8").split('\n')
116
- raw_results.pop() # pop last item off as empty entry due to final \n
117
- json_results = json.dumps(raw_results)
118
- with open(f"{local_file_path}.json","w") as fp:
119
- fp.write(json_results)
120
- return json_results
121
-
122
-
123
- class Comprehend:
124
-
125
- aws = None
126
- config = None
127
- __comp_client = None
128
-
129
- def __init__(self,aws:AWS):
130
- self.aws = aws
131
- self.config = self.aws.config
132
-
133
- # create client
134
- try:
135
- logger.debug(f"Region:{self.aws.aws_session.region_name}")
136
- self.__comp_client = self.aws.aws_session.client(service_name='comprehend')
137
- except Exception as err:
138
- logger.error("Unable to create Comprehend client: ",err)
139
-
140
- def client(self):
141
- return self.__comp_client
142
-
143
- # Use AWS comprehend to get bulk key phrases from single batch of chunked text
144
- def get_single_batch_analysis(self,index,chunk):
145
- comp_client = self.client()
146
- results = {}
147
- print("Analysing chunk",index)
148
- print(" . key_phrase")
149
- kpresult = comp_client.batch_detect_key_phrases(TextList=chunk,LanguageCode='en')
150
- results['KeyPhraseResults'] = kpresult
151
- #key_phrase_results.append(kpresult)
152
- time.sleep(2)
153
- print(" . sentiment")
154
- senresult = comp_client.batch_detect_sentiment(TextList=chunk,LanguageCode='en')
155
- results['SentimentResults'] = senresult
156
- #sentiment_results.append(senresult)
157
- time.sleep(2)
158
- print(" . targeted_sentiment")
159
- tsenresult = comp_client.batch_detect_targeted_sentiment(TextList=chunk,LanguageCode='en')
160
- results['TargetedSentimentResults'] = tsenresult
161
- #target_sent_results.append(tsenresult)
162
- time.sleep(2)
163
- print(" . syntax")
164
- synresult = comp_client.batch_detect_syntax(TextList=chunk,LanguageCode='en')
165
- results['SyntaxResults'] = synresult
166
- #syntax_results.append(synresult)
167
- time.sleep(2)
168
- return results
169
-
170
-
171
- # Use AWS comprehend to get bulk key phrases from chunked text
172
- def get_multiple_batch_analysis(self,chunked_text):
173
- chunk_results = {}
174
- for key in self.config.analysis_types.keys():
175
- chunk_results[key] = []
176
-
177
- for idx,chunk in enumerate(chunked_text):
178
- if len(chunked_text) > 4999:
179
- print("WARNING: Text too long to analyse - index",idx,"skipped!")
180
- else:
181
- try:
182
- results = self.get_single_batch_analysis(index=idx,chunk=chunk)
183
- except(Exception) as error:
184
- print("There was an error with index",idx,error)
185
- finally:
186
- if results:
187
- for key in results.keys():
188
- chunk_results[key].append(results[key])
189
-
190
- return chunk_results
191
-
192
- # Take batched responses and concenate single lists of results, errors, and http responses
193
- def unbatch_results(self,result_type,results,batch_size=25):
194
- unbatched_results = {}
195
- unbatched_errors = {}
196
- batch_responses = {}
197
- for idx,batch in enumerate(results):
198
- #print("Response for batch:",idx)
199
- batch_responses[idx] = batch['ResponseMetadata']
200
- result_list = batch['ResultList']
201
- error_list = batch['ErrorList']
202
- for r in result_list:
203
- ridx = idx*batch_size + r['Index']
204
- rdata = r[result_type]
205
- unbatched_results[ridx] = rdata
206
- for e in error_list:
207
- eidx = e['Index']
208
- unbatched_errors[eidx] = 'ERROR' + e['ErrorCode'] + ': ' + e['ErrorMessage']
209
- unbatched = {}
210
- unbatched['results'] = unbatched_results
211
- unbatched['errors'] = unbatched_errors
212
- unbatched['responses'] = batch_responses
213
- return unbatched
214
-
215
- def check_long_text(self,df):
216
- # Check for long reflections (too long for batch analysis)
217
- long_df = df.copy()
218
- long_df = long_df[long_df.text.str.len()>5000]
219
- long_df['length'] = long_df.text.str.len()
220
- return long_df
221
-
222
- # #### CUSTOM ENTITY
223
-
224
- def submit_custom_entity_job(self,job_name): #access_role_arn,entity_recogniser_arn):
225
- job_str = f"{self.config.prefix}{job_name}{self.config.postfix}"
226
-
227
- response = self.__comp_client.start_entities_detection_job(
228
- InputDataConfig={
229
- 'S3Uri': self.config.s3_input_uri,
230
- 'InputFormat': 'ONE_DOC_PER_FILE'
231
- },
232
- OutputDataConfig={
233
- 'S3Uri': self.config.s3_output_uri
234
- },
235
- DataAccessRoleArn=self.config.comprehend_access_role_arn,
236
- JobName=job_str,
237
- EntityRecognizerArn=self.config.reflexive_entity_arn,
238
- LanguageCode='en'
239
- )
240
- self.job_id = response['JobId']
241
- self.check_job_status() # force the creation of __job_properties
242
- return response
243
-
244
- def get_current_job_id(self):
245
- return self.job_id
246
-
247
- # Check job status
248
- def check_job_status(self):
249
- job_status = self.__comp_client.describe_entities_detection_job(
250
- JobId=self.job_id
251
- )
252
- self.__job_properties = job_status['EntitiesDetectionJobProperties']
253
- return self.__job_properties['JobStatus']
254
-
255
- def get_job_details(self):
256
- return self.__job_properties
257
-
258
- #checked
259
- def download_and_extract(self,s3):
260
- local_output_dir = f"{self.config.local_path}{self.config.prefix}output{self.config.postfix}"
261
- job_details = self.get_job_details()
262
- s3Uri = job_details['OutputDataConfig']['S3Uri']
263
- return s3.results_download_save_extract(s3Uri,local_output_dir)
264
-
reflexive/util.py DELETED
@@ -1,127 +0,0 @@
1
- import os
2
- import json
3
- import pandas as pd
4
- from sklearn.preprocessing import MinMaxScaler
5
- import logging
6
- #logging.basicConfig(level=logging.DEBUG)
7
- logger = logging.getLogger(__name__)
8
-
9
-
10
-
11
- # File functions
12
- def get_data_path_name(config,name,ext):
13
- return f"{config.local_path}{config.prefix}{name}{config.postfix}.{ext}"
14
-
15
- def set_sub_dir(config,sub_dir=None):
16
- # check dir sub_dir exists
17
- if sub_dir:
18
- local_dir = f"{config.local_path}{sub_dir}/"
19
- logger.debug(f"local_dir: {local_dir}")
20
- dirExists = os.path.exists(local_dir)
21
- if not dirExists:
22
- logger.info(f"Creating subdirectory: {local_dir}")
23
- os.makedirs(local_dir)
24
- else:
25
- local_dir = config.local_path
26
- return local_dir
27
-
28
-
29
-
30
- # Function to write dictionaries to both json and csv
31
- def writeDictJsonCSV(dictionary,path_file):
32
- with open(f"{path_file}.json",'w') as fp:
33
- fp.write(json.dumps(dictionary))
34
-
35
- ngram_df = pd.DataFrame.from_dict(dictionary,orient='index')
36
- ngram_df.to_csv(f"{path_file}.csv")
37
-
38
- # Data functions
39
- def sort_dict_by_value(d):
40
- return dict(sorted(d.items(), key=lambda x:x[1], reverse=True))
41
-
42
- def filter_dict_by_value(ngrams,min_val=3):
43
- filtered_ngrams = {}
44
- for k,v in ngrams.items():
45
- if v >=min_val:
46
- filtered_ngrams[k] = v
47
- return filtered_ngrams
48
-
49
- # Input a series and output a list of lists with each maxn elements
50
- def series_to_chunked_list(series,maxn=25):
51
- lst = list(series)
52
- return __chunk_list(lst,maxn)
53
-
54
- # Chunk a list into a list of lists with maxn elements
55
- def __chunk_list(lst,maxn=25):
56
- return [lst[i:i + maxn] for i in range(0, len(lst), maxn)]
57
-
58
- # Count named entities
59
- def count_entities(entities):
60
- counts = []
61
- for k,v in entities.items():
62
- counts.append((k,len(v)))
63
- return sorted(counts, key=lambda x: x[1], reverse=True)
64
-
65
- # Function for calculating proportions of features
66
- def ratios(elements):
67
- etotal = sum([v[1] for v in elements])
68
- if etotal==0:
69
- return elements
70
- else:
71
- proportioned = []
72
- for element in elements:
73
- prop_val = round((element[1]/etotal),4)
74
- proportioned.append((element[0],prop_val))
75
- return proportioned
76
-
77
-
78
-
79
- # Count labels associated with strings
80
- def count_labels(string_labels):
81
- counts = dict()
82
- for rt in string_labels:
83
- counts[rt[1]] = counts.setdefault(rt[1],0) + 1
84
- return sorted(counts.items(), key=lambda x: x[1], reverse=True)
85
-
86
- def count_keys(key_count_dict):
87
- counts = dict()
88
- for k,v in key_count_dict.items():
89
- counts[k] = counts.setdefault(k,0) + v
90
- return sorted(counts.items(), key=lambda x: x[1], reverse=True)
91
-
92
- # Total the values in list of tuples
93
- def tuple_values_total(tuples):
94
- tvs = [t[1] for t in tuples]
95
- return sum(tvs)
96
-
97
- #### SCALING AND NORMALISING
98
-
99
- # Outliers
100
-
101
- def outlier_fence(series):
102
- bounds = {}
103
- stats = series.describe()
104
- iqr = stats['75%'] - stats['25%']
105
- bounds["IQR"]=iqr
106
- upper = stats['75%']+1.5*iqr
107
- bounds["UPPER"]=upper
108
- lower = stats['25%']-1.5*iqr
109
- bounds["LOWER"]=lower
110
- return bounds
111
-
112
- # MinMax Scaling
113
- def scale_min_max(df_cols):
114
- scaler = MinMaxScaler()
115
- return scaler.fit_transform(df_cols)
116
-
117
- # Normalise domain term counts
118
- def normalise_domain_counts(domain_counts,text_size):
119
- norms = {}
120
- for k,v in domain_counts.items():
121
- norms[k] = round(v*text_size,3)
122
- return norms
123
-
124
- def normalise_scaled(df,feature,norm_feature = 'text_scaled'):
125
- tempdf = df[[feature,norm_feature]].copy()
126
- tempdf['norm_scaled'] = tempdf.apply(lambda r: round(r[feature]/(r[norm_feature]+0.01),4),axis=1)
127
- return tempdf['norm_scaled']