brynq-sdk-zermelo 2.0.0__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 1.0
2
2
  Name: brynq_sdk_zermelo
3
- Version: 2.0.0
3
+ Version: 2.1.0
4
4
  Summary: Zermelo wrapper from BrynQ
5
5
  Home-page: UNKNOWN
6
6
  Author: BrynQ
@@ -0,0 +1 @@
1
+ from .zermelo import Zermelo
@@ -0,0 +1,406 @@
1
+ from brynq_sdk_brynq import BrynQ
2
+ from pandas import json_normalize
3
+ from time import mktime
4
+ from typing import List, Union
5
+ import requests
6
+ import pandas as pd
7
+ import numpy as np
8
+ import datetime
9
+ import json
10
+ import time
11
+ import sys
12
+
13
+
14
+ class Zermelo(BrynQ):
15
+ def __init__(self, label: Union[str, List], storage_location, initial_zermelo_extract=False, extract_cancelled_appointments=True, debug=False):
16
+ """
17
+ Extracts data from source based on the entered parameters
18
+ For documentation see: https://wartburg.zportal.nl/static/swagger/ & https://zermelo.atlassian.net/wiki/display/DEV/API+Entities
19
+ :param storage_location: indicates the location where the extracted data file is saved
20
+ :param initial_zermelo_extract: store the extract as a delta file (true) or not (false)
21
+ :param extract_cancelled_appointments: doesn't get the cancelled appointments by default. Can be changed to an empty string to get the cancelled appointments
22
+ """
23
+ super().__init__()
24
+ credentials = self.get_system_credential(system='zermelo', label=label)
25
+ self.access_token = credentials['token']
26
+ self.url = f"https://{credentials['customer']}.zportal.nl/api/v3/"
27
+ self.storage_location = storage_location
28
+ self.initial_zermelo_extract = initial_zermelo_extract
29
+ self.debug = debug
30
+ if extract_cancelled_appointments:
31
+ self.cancelled_appointments = ''
32
+ else:
33
+ self.cancelled_appointments = '&cancelled=false'
34
+
35
+ def run_all_extracts(self):
36
+ # The following endpoints are delivering such huge amounts of data, that these one should be splitted in seperate schoolyears
37
+ start_of_data = datetime.date(year=datetime.datetime.today().year, month=8, day=1).timetuple()
38
+ end_of_data = datetime.date(year=datetime.datetime.today().year + 2, month=8, day=1).timetuple()
39
+ if self.initial_zermelo_extract:
40
+ for i in range(1, 7):
41
+ start_of_data = datetime.date(year=datetime.datetime.today().year - i, month=8, day=1).timetuple()
42
+ end_of_data = datetime.date(year=datetime.datetime.today().year - i + 1, month=7, day=31).timetuple()
43
+ # self.get_zermelo_substituded_lessons(endpoint='substitutedlessons', fields=['contract', 'employee', 'appointment', 'start', 'end', 'changeDescription', 'appointmentInstance'],
44
+ # startdate=start_of_data, enddate=end_of_data)
45
+ self.get_zermelo_appointments(endpoint='appointments', fields=['id', 'start', 'end', 'type', 'remark', 'valid', 'cancelled', 'modified',
46
+ 'moved', 'changeDescription', 'branch', 'branchOfSchool', 'created', 'lastModified',
47
+ 'hidden', 'appointmentInstance', 'new', 'teachers', 'students', 'subjects', 'groups',
48
+ 'locations', 'locationsOfBranch', 'groupsInDepartments'],
49
+ startdate=start_of_data, enddate=end_of_data)
50
+ elif datetime.datetime.today().month <= 7:
51
+ start_of_data = datetime.date(year=datetime.datetime.today().year - 1, month=8, day=1).timetuple()
52
+ end_of_data = datetime.date(year=datetime.datetime.today().year + 1, month=8, day=1).timetuple()
53
+ self.get_zermelo_appointments(endpoint='appointments', fields=['id', 'start', 'end', 'type', 'remark', 'valid', 'cancelled', 'modified',
54
+ 'moved', 'changeDescription', 'branch', 'branchOfSchool', 'created', 'lastModified',
55
+ 'hidden', 'appointmentInstance', 'new', 'teachers', 'students', 'subjects', 'groups',
56
+ 'locations', 'locationsOfBranch', 'groupsInDepartments'],
57
+ startdate=start_of_data, enddate=end_of_data)
58
+ else:
59
+ self.get_zermelo_appointments(endpoint='appointments', fields=['id', 'start', 'end', 'type', 'remark', 'valid', 'cancelled', 'modified',
60
+ 'moved', 'changeDescription', 'branch', 'branchOfSchool', 'created', 'lastModified',
61
+ 'hidden', 'appointmentInstance', 'new', 'teachers', 'students', 'subjects', 'groups',
62
+ 'locations', 'locationsOfBranch', 'groupsInDepartments'],
63
+ startdate=start_of_data, enddate=end_of_data)
64
+
65
+ self.get_zermelo(endpoint='branches', fields=['code', 'name'])
66
+ self.get_zermelo(endpoint='branchesofschools', fields=['id', 'schoolInSchoolYear', 'branch', 'name'])
67
+ self.get_zermelo(endpoint='choosableindepartments', fields=['id', 'subject', 'departmentOfBranch', 'departmentOfBranchCode', 'sectionOfBranch', 'clockHoursPerLesson', 'teachingLevelManually',
68
+ 'teachingLevel', 'subjectType', 'subjectCode', 'subjectName', 'scheduleCode', 'subjectScheduleCode', 'lessonDemand', 'lessonHoursInClassPeriods'],
69
+ nested=True, nested_fields=['lessonHoursInClassPeriods'])
70
+ self.get_zermelo(endpoint='classperiods', fields=['id', 'name', 'schoolInSchoolYear', 'weeks'], nested=True, nested_fields=['weeks'])
71
+ self.get_zermelo(endpoint='contracts', fields=['id', 'start', 'end', 'employee', 'defaultFunctionCategory', 'teacherTeam', 'clockHoursGeneralTasks', 'clockHoursGeneralTasksManually',
72
+ 'clockHoursTasks', 'clockHoursProfessionalDevelopmentManually', 'clockHoursProfessionalDevelopment', 'clockHoursNet', 'lessonsMax', 'type',
73
+ 'yearFraction', 'fteYearLeave', 'ftePermanent', 'fteTemporary', 'fteNet', 'clockHoursGross', 'clockHoursBalance', 'clockHoursLessonsMax',
74
+ 'lessonReducingTasks', 'taskSpace', 'taskBalance', 'lessonSpace', 'mainBranchOfSchool', 'school', 'schoolName', 'schoolYear', 'firstName',
75
+ 'lastName', 'prefix', 'clockHoursLessons'])
76
+ self.get_zermelo(endpoint='departmentsofbranches', fields=['id', 'code', 'yearOfEducation', 'branchOfSchool', 'clockHoursPerLesson', 'schoolInSchoolYearId', 'schoolInSchoolYearName', 'studentCount', 'prognosticStudentCount'])
77
+ self.get_zermelo(endpoint='employees', fields=['userCode', 'commencementTeaching', 'commencementSchool', 'prefix', 'gender', 'dateOfBirth', 'firstName', 'lastName', 'street', 'houseNumber', 'postalCode', 'city'])
78
+ self.get_zermelo(endpoint='groupindepartments', fields=['id', 'departmentOfBranch', 'name', 'isMainGroup', 'isMentorGroup', 'extendedName'])
79
+ self.get_zermelo(endpoint='holidays', fields=['id', 'schoolInSchoolYear', 'name', 'start', 'end'])
80
+ self.get_zermelo(endpoint='jobs', fields=['id', 'contract', 'functionCategory', 'employmentType', 'start', 'end', 'fteReal', 'fteManually', 'fte', 'type', 'employee', 'clockHoursGross'])
81
+ self.get_zermelo(endpoint='jobextensions', fields=['id', 'contract', 'start', 'end', 'fteReal', 'lessonsAndTasks', 'total', 'employee', 'fte', 'generalTasks', 'professionalDevelopment', 'personalBudget'])
82
+ self.get_zermelo(endpoint='leaves', fields=['id', 'contract', 'leaveType', 'leaveTypeName', 'start', 'end', 'total', 'leaveApproved', 'employee', 'fteReal'])
83
+ self.get_zermelo(endpoint='leavetypes', fields=['id', 'name', 'fixed', 'affectsPersonalBudget'])
84
+ self.get_zermelo(endpoint='locationofbranches', fields=['id', 'name', 'parentteachernightCapacity', 'courseCapacity', 'branchOfSchool'])
85
+ self.get_zermelo(endpoint='plannedlessons', fields=['id', 'clockHoursPerLesson', 'clockHoursPerLessonManually', 'plannedGroups', 'lessonDemand', 'branchOfSchool', 'departmentOfBranches',
86
+ 'lessonHoursInClassPeriods', 'subjects', 'sectionOfBranches', 'maxTeachingLevel', 'regularTeachingAssignments',
87
+ 'prognosticStudentsPerTeacherCount', 'expectedTeacherCount', 'privateComment', 'publicComment'],
88
+ nested=True, nested_fields=['plannedGroups', 'departmentOfBranches', 'subjects', 'sectionOfBranches', 'regularTeachingAssignments', 'lessonHoursInClassPeriods'])
89
+ self.get_zermelo(endpoint='plannedgroups', fields=['id', 'choosableInDepartment', 'groupInDepartment', 'teachingLevel', 'subjectCode', 'groupInDepartmentName',
90
+ 'groupInDepartmentIsMainGroup', 'groupInDepartmentIsMentorGroup', 'groupInDepartmentExtendedName', 'name', 'rank'])
91
+ self.get_zermelo(endpoint='schools', fields=['id', 'name', 'brin'])
92
+ self.get_zermelo(endpoint='schoolsinschoolyears', fields=['id', 'school', 'year', 'project', 'archived', 'projectName', 'schoolName', 'name'])
93
+ self.get_zermelo(endpoint='sectionassignments', fields=['contract', 'id', 'lessonHoursFirstDegree', 'lessonHoursSecondDegree', 'sectionOfBranch'])
94
+ selected_subject = self.get_zermelo_filtered(endpoint='selectedsubjects',
95
+ fields=['id', 'subjectSelection', 'choosableInDepartment', 'alternativeChoosableInDepartment', 'manualLessonInvolvement',
96
+ 'exemption', 'studentInDepartment', 'subjectCode', 'subject', 'segmentCode', 'lessonInvolvement'],
97
+ startdate=start_of_data,
98
+ enddate=end_of_data)
99
+ selected_subject.drop_duplicates(inplace=True)
100
+ selected_subject.reset_index(drop=True, inplace=True)
101
+ selected_subject.index.name = '{0}_id'.format('selectedsubjects')
102
+ file = '{0}{1}.csv'.format(self.storage_location, 'selectedsubjects')
103
+ selected_subject.to_csv(file, sep='|', decimal=',')
104
+
105
+ self.get_zermelo(endpoint='sections', fields=['id', 'abbreviation', 'name', 'sectionOfBranches'], nested=True, nested_fields=['sectionOfBranches'])
106
+ self.get_zermelo(endpoint='students', fields=['dateOfBirth', 'email', 'street', 'houseNumber', 'postalCode', 'city', 'lastName', 'prefix',
107
+ 'firstName', 'lwoo', 'userCode', 'studentInDepartments'], nested=True, nested_fields=['studentInDepartments'])
108
+ self.get_zermelo(endpoint='studentsindepartments', fields=['id', 'student', 'departmentOfBranch', 'groupInDepartments', 'mainGroup'])
109
+ self.get_zermelo(endpoint='subjectselections', fields=['id', 'selectedSubjects', 'studentCode', 'departmentOfBranch'])
110
+ self.get_zermelo(endpoint='subjectselectionsubjects', fields=['id', 'code', 'name', 'scheduleCode'])
111
+ self.get_zermelo(endpoint='taskassignments', fields=['branchOfSchool', 'contract', 'employee', 'contract', 'hours', 'hoursReplacement', 'taskGroup', 'taskInBranchOfSchool',
112
+ 'type', 'start', 'end'])
113
+ self.get_zermelo(endpoint='tasks', fields=['abbreviation', 'id', 'name', 'taskGroup', 'taskGroupAbbreviation'])
114
+ self.get_zermelo(endpoint='taskgroups', fields=['abbreviation', 'description', 'id', 'name'])
115
+ self.get_zermelo(endpoint='tasksinbranchofschool', fields=['branchOfSchool', 'clockHoursAssigned', 'clockHoursBalance', 'id', 'maxHours', 'task', 'taskAbbreviation'])
116
+ self.get_zermelo(endpoint='teacherteams', fields=['id', 'name', 'branchOfSchool', 'departmentOfBranches'], nested=True, nested_fields=['departmentOfBranches'])
117
+ self.get_zermelo(endpoint='teachingassignments', fields=['id', 'contract', 'plannedLesson', 'type', 'regular', 'lessonHoursInClassPeriodsManually', 'startWeek', 'endWeek',
118
+ 'employee', 'regularContract', 'teachingQualificationStatus', 'lessonHoursNet', 'clockHoursPerLesson', 'clockHoursTotal',
119
+ 'sectionOfBranches', 'publicComment', 'privateComment', 'clockHoursAlgorithm', 'replacements',
120
+ 'lessonHoursInClassPeriods', 'plannedGroups'],
121
+ nested=True, nested_fields=['lessonHoursInClassPeriods', 'plannedGroups', 'sectionOfBranches', 'replacements'])
122
+ self.get_zermelo(endpoint='teachingqualifications', fields=['id', 'employee', 'choosable', 'startWeek', 'endWeek', 'diploma', 'teachingLevel', 'choosableAbbreviation', 'status', 'name'])
123
+ self.get_zermelo(endpoint='workforceparameters', fields=['defaultclockhoursperlesson', 'id', 'schoolInSchoolYear'])
124
+
125
+ def get_zermelo(self, endpoint, fields, nested=False, nested_fields=[]):
126
+ """
127
+ Database in Zermelo is divided in different endpoints which consist of fields. Some fields are nested, which
128
+ means that some data lines have a subdivision.
129
+ :param endpoint: name of the endpoint. Not case-sensitive
130
+ :param fields: make a selection of the desired fields. Selection of the field(s) is case-sensitive
131
+ :param nested: field is nested or not
132
+ :param nested_fields: select nested fields
133
+ :return: returns error when extract didn't succeed
134
+ """
135
+ try:
136
+ print('{0} - Started with {1}'.format(time.strftime('%H:%M:%S'), endpoint))
137
+ url_fields = ','.join(fields)
138
+ url = '{0}{1}?access_token={2}&fields={3}'.format(self.url, endpoint, self.access_token, url_fields)
139
+
140
+ if nested:
141
+ # Get the response without any transformation
142
+ response = requests.get(url).json()['response']['data']
143
+
144
+ # From all the fields, hold only the meta_fields (the not nested fields)
145
+ meta_fields = fields.copy()
146
+ for nested_field in nested_fields:
147
+ meta_fields.remove(nested_field)
148
+
149
+ # From the initial response, create a dataframe with only the meta_fields
150
+ df = pd.DataFrame(response)
151
+ df = df[meta_fields]
152
+
153
+ # Set the columns in df as the same type as in the original df. Sometimes, an empty field will change the column type in df_temp
154
+ # to object while the dtype in the original df is int or float. This will give an error when merging
155
+ existing_field_types = dict(df.dtypes)
156
+ for column in df:
157
+ if column in existing_field_types:
158
+ existing_dtype = existing_field_types[column]
159
+ if existing_dtype == 'int64' or existing_dtype == 'float64':
160
+ df[column] = df[column].fillna(0)
161
+ df[column] = df[column].astype(existing_dtype)
162
+
163
+ # Loop through the nested_fields, create a dataframe for each nested field and join the result to the initial dataframe
164
+ for nested_field in nested_fields:
165
+ # If the nested_field hold a key, value pair, then the record_prefix is usable. Only a value give a TypeError. Catch this error and rename the column
166
+ try:
167
+ df_temp = pd.io.json.json_normalize(data=response, meta=meta_fields, record_path=[nested_field], record_prefix='{}_'.format(nested_field))
168
+ except TypeError:
169
+ df_temp = pd.io.json.json_normalize(data=response, meta=meta_fields, record_path=[nested_field])
170
+ df_temp.rename(columns={0: nested_field}, inplace=True)
171
+ # Set the columns in df_temp as the same type as in the original df. Sometimes, an empty field will change the column type in df_temp
172
+ # to object while the dtype in the original df is int or float. This will give an error when merging
173
+ existing_field_types = dict(df.dtypes)
174
+ for column in df_temp:
175
+ if column in existing_field_types:
176
+ existing_dtype = existing_field_types[column]
177
+ if existing_dtype == 'int64' or existing_dtype == 'float64':
178
+ df_temp[column] = df_temp[column].fillna(0)
179
+ df_temp[column] = df_temp[column].astype(existing_dtype)
180
+ # Merge the initial dataframe and the new one
181
+ df = pd.merge(df, df_temp, how='left', on=meta_fields)
182
+ data = df
183
+ else:
184
+ init_response = json.loads(requests.get(url).content)
185
+ status = init_response['response']['status']
186
+ if status == 200:
187
+ data = pd.DataFrame(init_response['response']['data'])
188
+
189
+ # Check each column if the column only holds integers. If yes, and the type is a Float, set type to float. Otherwise, this gives problems in QLik Sense (2 becomes 2.0)
190
+ for column in data.columns:
191
+ try:
192
+ if data.loc[:, column].dtype == np.float64 or data.loc[:, column].dtype == np.int64:
193
+ data.loc[:, column].fillna(0, inplace=True)
194
+ else:
195
+ data.loc[:, column].fillna('', inplace=True)
196
+ column_name = 'check_{}'.format(column)
197
+ data.loc[:, column_name] = data.apply(lambda x: 'int64' if x[column].is_integer() else 'float', axis=1)
198
+ if 'float' in data.loc[:, column_name].values:
199
+ pass
200
+ else:
201
+ data.loc[:, column] = data.loc[:, column].astype('int64')
202
+ del data[column_name]
203
+ except Exception as e:
204
+ continue
205
+
206
+ else:
207
+ data = init_response['response']['message']
208
+ print(data)
209
+
210
+ data.index.name = '{0}_id'.format(endpoint)
211
+ file = '{0}{1}.csv'.format(self.storage_location, endpoint)
212
+ data.to_csv(file, sep='|', decimal=',')
213
+ print('{0} - {1} saved'.format(time.strftime('%H:%M:%S'), endpoint))
214
+ except Exception as e:
215
+ exc_type, exc_obj, exc_tb = sys.exc_info()
216
+ error = str(e)[:400].replace('\'', '').replace('\"', '') + ' | Line: {}'.format(exc_tb.tb_lineno)
217
+ return error
218
+
219
+ def get_zermelo_substituded_lessons(self, endpoint, fields, startdate, enddate):
220
+ start = time.time()
221
+ fields = ','.join(fields)
222
+
223
+ # Loop through the data per 3 days (3600 seconds * 24 hours * 3 days) because the dataset is too big to receive in once. Start three years back
224
+ df = pd.DataFrame()
225
+ start_epoch = int(time.mktime(startdate))
226
+ last_epoch = int(time.mktime(enddate))
227
+ while start_epoch < last_epoch:
228
+ try:
229
+ if (start_epoch + (3600 * 24 * 7)) > last_epoch:
230
+ end_epoch = int(last_epoch)
231
+ else:
232
+ end_epoch = int(start_epoch + (3600 * 24 * 7))
233
+
234
+ url = '{0}{1}?access_token={2}&fields={3}&start={4}&end={5}'.format(self.url, endpoint, self.access_token, fields, start_epoch, end_epoch)
235
+ data = requests.get(url).json()['response']['data']
236
+
237
+ # checks if data is not empty list
238
+ if data:
239
+ df_new = pd.DataFrame(data)
240
+ df_new['changeDescription'] = df_new['changeDescription'].str.replace('\n', '')
241
+ df_new['changeDescription'] = df_new['changeDescription'].str.replace('\r', '')
242
+ df = pd.concat([df, df_new])
243
+
244
+ print('Substituded: Start: {}, End: {}, Length: {}'.format(start_epoch, end_epoch, len(df_new)))
245
+
246
+ start_epoch += (3600 * 24 * 7)
247
+
248
+ except Exception as e:
249
+ print('{} - Error at timestamp {}: {}'.format(time.strftime('%H:%M:%S'), start_epoch, e))
250
+ start_epoch += (3600 * 24 * 7)
251
+
252
+ # Store the total dataframe to a new csv file
253
+ df.drop_duplicates(inplace=True)
254
+ df.reset_index(drop=True, inplace=True)
255
+ df.index.name = '{0}_id'.format(endpoint)
256
+ file = '{}{}_{}.csv'.format(self.storage_location, 'substituded_lessons', datetime.datetime.fromtimestamp(mktime(startdate)).strftime('%Y-%m-%d'))
257
+ df.to_csv(file, sep='|', decimal=',')
258
+
259
+ print('Done in {} seconds'.format(time.time() - start))
260
+
261
+ def get_zermelo_appointments(self, endpoint, fields, startdate, enddate):
262
+ start = time.time()
263
+ fields = ','.join(fields)
264
+
265
+ df = pd.DataFrame()
266
+
267
+ start_epoch = int(time.mktime(startdate))
268
+ last_epoch = int(time.mktime(enddate))
269
+ while start_epoch < last_epoch:
270
+ try:
271
+ if (start_epoch + (3600 * 24 * 7)) > last_epoch:
272
+ end_epoch = int(last_epoch)
273
+ else:
274
+ end_epoch = int(start_epoch + (3600 * 24 * 7))
275
+ print(start_epoch, end_epoch)
276
+ url = '{0}{1}?access_token={2}&fields={3}&start={4}&end={5}&includeHidden=True{6}&valid=True'.format(self.url, endpoint, self.access_token, fields, start_epoch, end_epoch, self.cancelled_appointments)
277
+ data = requests.get(url).json()['response']['data']
278
+
279
+ # checks if data is not empty list
280
+ if data:
281
+ df_new = pd.DataFrame(data)
282
+ df_new['remark'] = df_new['remark'].str.replace('\n', '')
283
+ df_new['remark'] = df_new['remark'].str.replace('\r', '')
284
+ df = pd.concat([df, df_new])
285
+
286
+ print('Appointments: Start: {}, End: {}, Length: {}'.format(start_epoch, end_epoch, len(df_new)))
287
+ # Add one week
288
+ start_epoch += (3600 * 24 * 7)
289
+
290
+ except Exception as e:
291
+ print('{} - Error at timestamp {}: {}'.format(time.strftime('%H:%M:%S'), start_epoch, e))
292
+ start_epoch += (3600 * 24 * 7)
293
+
294
+ # During summer vacation, it can occur that no data call is executed. The df is empty in this case
295
+ if len(df) > 0:
296
+ # Reset some columns from Float to Int
297
+ df.loc[:, 'branchOfSchool'].fillna(0, inplace=True)
298
+ df.loc[:, 'branchOfSchool'] = df.loc[:, 'branchOfSchool'].astype('int64')
299
+ df.reset_index(inplace=True, drop=True)
300
+
301
+ # Subtract all the nested layers from the appointments and save to separate files
302
+ self.appointments_create_lookup_table(df, 'students', 'userCode', startdate)
303
+ self.appointments_create_lookup_table(df, 'teachers', 'userCode', startdate)
304
+ self.appointments_create_lookup_table(df, 'subjects', 'scheduleCode', startdate)
305
+ self.appointments_create_lookup_table(df, 'groups', 'code', startdate)
306
+ self.appointments_create_lookup_table(df, 'locations', 'code', startdate)
307
+ self.appointments_create_lookup_table(df, 'locationsOfBranch', 'id', startdate)
308
+ self.appointments_create_lookup_table(df, 'groupsInDepartments', 'id', startdate)
309
+
310
+ # Store the total dataframe to a new csv file
311
+ df.drop(columns=['students', 'teachers', 'subjects', 'groups', 'locations', 'locationsOfBranch', 'groupsInDepartments'], inplace=True)
312
+ df.index.name = '{0}_id'.format(endpoint)
313
+ file = '{}{}_{}.csv'.format(self.storage_location, 'appointments', datetime.datetime.fromtimestamp(mktime(startdate)).strftime('%Y-%m-%d'))
314
+
315
+ df.to_csv(file, sep='|', decimal=',')
316
+ print('Done in {} seconds'.format(time.time() - start))
317
+
318
+ def appointments_create_lookup_table(self, df, col_name, link_id, startdate):
319
+ df = df[['id', col_name]]
320
+ # Only hold rows whith filled arrays
321
+ df = df[df[col_name].apply(len) > 0]
322
+ appointments_lookup_df = []
323
+ for index, row in df.iterrows():
324
+ appointmentId = row['id']
325
+ to_link = row[col_name]
326
+ for item in to_link:
327
+ appointments_lookup_df.append({'appointmentsId': appointmentId, link_id: item})
328
+ df = pd.DataFrame(appointments_lookup_df)
329
+ file = '{0}{1}.csv'.format(self.storage_location, 'appointments_{}_{}'.format(col_name, datetime.datetime.fromtimestamp(mktime(startdate)).strftime('%Y-%m-%d')))
330
+ df.index.name = 'appointments_{0}_id'.format(col_name)
331
+ df.to_csv(file, sep='|', decimal=',')
332
+
333
+ def get_zermelo_filtered(self, endpoint: str, fields: List = None, startdate=None, enddate=None, filters: dict = None) -> pd.DataFrame:
334
+ """
335
+ :param endpoint: endpoint
336
+ :param fields: fields to get, if left empty, all fields are returned
337
+ :param startdate: optional startdate to get data from
338
+ :param enddate: optional enddate to get data from
339
+ :param filters: dict of fields with corresponding values to filter
340
+ :return:
341
+ """
342
+ # Loop through the data per 7 days (3600 seconds * 24 hours * 7 days) because the dataset is too big to receive at once.
343
+ df = pd.DataFrame()
344
+ url = f'{self.url}{endpoint}'
345
+ params = {'access_token': self.access_token}
346
+
347
+ if fields is not None:
348
+ params.update({'fields': ','.join(fields)})
349
+ if filters is not None:
350
+ params.update(filters)
351
+
352
+ if startdate is not None:
353
+ start_epoch = int(time.mktime(startdate))
354
+ last_epoch = int(time.mktime(enddate))
355
+ # loop epoch is 7 days from start_date except when last_epoch is smaller than start + 7 days
356
+ end_epoch = int(start_epoch + (3600 * 24 * 7)) if (start_epoch + (3600 * 24 * 7)) < last_epoch else last_epoch
357
+
358
+ while start_epoch < last_epoch:
359
+ try:
360
+ # merge params with loop params
361
+ time_params = params | {'start': start_epoch, 'end': end_epoch}
362
+ resp = requests.get(url=url, params=time_params)
363
+ resp.raise_for_status()
364
+
365
+ data = resp.json()['response']['data']
366
+
367
+ # checks if data is not empty list
368
+ if len(data) > 0:
369
+ df = pd.concat([df, pd.DataFrame(data)])
370
+
371
+ # move to next 7-day block
372
+ start_epoch += (3600 * 24 * 7)
373
+ end_epoch += (3600 * 24 * 7)
374
+ # Adjust end_epoch to not exceed last_epoch
375
+ if end_epoch > last_epoch:
376
+ end_epoch = last_epoch
377
+
378
+ except requests.exceptions.HTTPError as http_err:
379
+ # Stop the loop for certain HTTP errors like 403 or 401
380
+ if resp.status_code == 403:
381
+ print('{} - 403 Forbidden at timestamp {}: {}'.format(time.strftime('%H:%M:%S'), start_epoch, http_err))
382
+ break # Stop the loop for 403 error
383
+ elif resp.status_code == 401:
384
+ print('{} - 401 Unauthorized at timestamp {}: {}'.format(time.strftime('%H:%M:%S'), start_epoch, http_err))
385
+ break # Stop the loop for 401 error
386
+
387
+ # For other HTTP errors, retry the loop
388
+ print('{} - HTTP Error at timestamp {}: {}'.format(time.strftime('%H:%M:%S'), start_epoch, http_err))
389
+ start_epoch += (3600 * 24 * 7) # Move forward to next block to prevent endless retry
390
+
391
+ except Exception as e:
392
+ # Handle other types of exceptions (e.g., network errors)
393
+ print('{} - General Error at timestamp {}: {}'.format(time.strftime('%H:%M:%S'), start_epoch, e))
394
+ start_epoch += (3600 * 24 * 7) # Skip this block to avoid infinite retry
395
+ else:
396
+ try:
397
+ resp = requests.get(url=url, params=params)
398
+ resp.raise_for_status()
399
+ data = resp.json()['response']['data']
400
+ df = pd.DataFrame(data)
401
+ except requests.exceptions.HTTPError as http_err:
402
+ print('{} - HTTP Error: {}'.format(time.strftime('%H:%M:%S'), http_err))
403
+ except Exception as e:
404
+ print('{} - General Error: {}'.format(time.strftime('%H:%M:%S'), e))
405
+
406
+ return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 1.0
2
2
  Name: brynq-sdk-zermelo
3
- Version: 2.0.0
3
+ Version: 2.1.0
4
4
  Summary: Zermelo wrapper from BrynQ
5
5
  Home-page: UNKNOWN
6
6
  Author: BrynQ
@@ -1,4 +1,6 @@
1
1
  setup.py
2
+ brynq_sdk_zermelo/__init__.py
3
+ brynq_sdk_zermelo/zermelo.py
2
4
  brynq_sdk_zermelo.egg-info/PKG-INFO
3
5
  brynq_sdk_zermelo.egg-info/SOURCES.txt
4
6
  brynq_sdk_zermelo.egg-info/dependency_links.txt
@@ -0,0 +1 @@
1
+ brynq_sdk_zermelo
@@ -2,7 +2,7 @@ from setuptools import setup, find_namespace_packages
2
2
 
3
3
  setup(
4
4
  name='brynq_sdk_zermelo',
5
- version='2.0.0',
5
+ version='2.1.0',
6
6
  description='Zermelo wrapper from BrynQ',
7
7
  long_description='Zermelo wrapper from BrynQ',
8
8
  author='BrynQ',