brynq-sdk-zermelo 1.0.2__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 1.0
2
2
  Name: brynq_sdk_zermelo
3
- Version: 1.0.2
3
+ Version: 2.0.0
4
4
  Summary: Zermelo wrapper from BrynQ
5
5
  Home-page: UNKNOWN
6
6
  Author: BrynQ
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 1.0
2
2
  Name: brynq-sdk-zermelo
3
- Version: 1.0.2
3
+ Version: 2.0.0
4
4
  Summary: Zermelo wrapper from BrynQ
5
5
  Home-page: UNKNOWN
6
6
  Author: BrynQ
@@ -1,6 +1,4 @@
1
1
  setup.py
2
- brynq_sdk/zermelo/__init__.py
3
- brynq_sdk/zermelo/zermelo.py
4
2
  brynq_sdk_zermelo.egg-info/PKG-INFO
5
3
  brynq_sdk_zermelo.egg-info/SOURCES.txt
6
4
  brynq_sdk_zermelo.egg-info/dependency_links.txt
@@ -1,3 +1,3 @@
1
- brynq-sdk-brynq>=1
1
+ brynq-sdk-brynq>=2
2
2
  pandas<3,>=1
3
3
  requests<=3,>=2
@@ -1,17 +1,16 @@
1
- from setuptools import setup
2
-
1
+ from setuptools import setup, find_namespace_packages
3
2
 
4
3
  setup(
5
4
  name='brynq_sdk_zermelo',
6
- version='1.0.2',
5
+ version='2.0.0',
7
6
  description='Zermelo wrapper from BrynQ',
8
7
  long_description='Zermelo wrapper from BrynQ',
9
8
  author='BrynQ',
10
9
  author_email='support@brynq.com',
11
- packages=["brynq_sdk.zermelo"],
10
+ packages=find_namespace_packages(include=['brynq_sdk*']),
12
11
  license='BrynQ License',
13
12
  install_requires=[
14
- 'brynq-sdk-brynq>=1',
13
+ 'brynq-sdk-brynq>=2',
15
14
  'pandas>=1,<3',
16
15
  'requests>=2,<=3'
17
16
  ],
@@ -1 +0,0 @@
1
- from brynq_sdk.zermelo.zermelo import Zermelo
@@ -1,408 +0,0 @@
1
- from pandas import json_normalize
2
- from time import mktime
3
- from brynq_sdk.brynq import BrynQ
4
- from typing import List, Union
5
- import requests
6
- import pandas as pd
7
- import numpy as np
8
- import datetime
9
- import json
10
- import time
11
- import sys
12
-
13
-
14
- class Zermelo(BrynQ):
15
- def __init__(self, label: Union[str, List], storage_location, initial_zermelo_extract=False, extract_cancelled_appointments=True, debug=False):
16
- """
17
- Extracts data from source based on the entered parameters
18
- For documentation see: https://wartburg.zportal.nl/static/swagger/ & https://zermelo.atlassian.net/wiki/display/DEV/API+Entities
19
- :param storage_location: indicates the location where the extracted data file is saved
20
- :param initial_zermelo_extract: store the extract as a delta file (true) or not (false)
21
- :param extract_cancelled_appointments: doesn't get the cancelled appointments by default. Can be changed to an empty string to get the cancelled appointments
22
- """
23
- super().__init__()
24
- credentials = self.get_system_credential(system='zermelo', label=label)
25
- self.access_token = credentials['token']
26
- self.url = f"https://{credentials['customer']}.zportal.nl/api/v3/"
27
- self.storage_location = storage_location
28
- self.initial_zermelo_extract = initial_zermelo_extract
29
- self.debug = debug
30
- if extract_cancelled_appointments:
31
- self.cancelled_appointments = ''
32
- else:
33
- self.cancelled_appointments = '&cancelled=false'
34
-
35
- def run_all_extracts(self):
36
- # The following endpoints are delivering such huge amounts of data, that these one should be splitted in seperate schoolyears
37
- start_of_data = datetime.date(year=datetime.datetime.today().year, month=8, day=1).timetuple()
38
- end_of_data = datetime.date(year=datetime.datetime.today().year + 2, month=8, day=1).timetuple()
39
- if self.initial_zermelo_extract:
40
- for i in range(1, 7):
41
- start_of_data = datetime.date(year=datetime.datetime.today().year - i, month=8, day=1).timetuple()
42
- end_of_data = datetime.date(year=datetime.datetime.today().year - i + 1, month=7, day=31).timetuple()
43
- # self.get_zermelo_substituded_lessons(endpoint='substitutedlessons', fields=['contract', 'employee', 'appointment', 'start', 'end', 'changeDescription', 'appointmentInstance'],
44
- # startdate=start_of_data, enddate=end_of_data)
45
- self.get_zermelo_appointments(endpoint='appointments', fields=['id', 'start', 'end', 'type', 'remark', 'valid', 'cancelled', 'modified',
46
- 'moved', 'changeDescription', 'branch', 'branchOfSchool', 'created', 'lastModified',
47
- 'hidden', 'appointmentInstance', 'new', 'teachers', 'students', 'subjects', 'groups',
48
- 'locations', 'locationsOfBranch', 'groupsInDepartments'],
49
- startdate=start_of_data, enddate=end_of_data)
50
- elif datetime.datetime.today().month <= 7:
51
- start_of_data = datetime.date(year=datetime.datetime.today().year - 1, month=8, day=1).timetuple()
52
- end_of_data = datetime.date(year=datetime.datetime.today().year + 1, month=8, day=1).timetuple()
53
- self.get_zermelo_appointments(endpoint='appointments', fields=['id', 'start', 'end', 'type', 'remark', 'valid', 'cancelled', 'modified',
54
- 'moved', 'changeDescription', 'branch', 'branchOfSchool', 'created', 'lastModified',
55
- 'hidden', 'appointmentInstance', 'new', 'teachers', 'students', 'subjects', 'groups',
56
- 'locations', 'locationsOfBranch', 'groupsInDepartments'],
57
- startdate=start_of_data, enddate=end_of_data)
58
- else:
59
- self.get_zermelo_appointments(endpoint='appointments', fields=['id', 'start', 'end', 'type', 'remark', 'valid', 'cancelled', 'modified',
60
- 'moved', 'changeDescription', 'branch', 'branchOfSchool', 'created', 'lastModified',
61
- 'hidden', 'appointmentInstance', 'new', 'teachers', 'students', 'subjects', 'groups',
62
- 'locations', 'locationsOfBranch', 'groupsInDepartments'],
63
- startdate=start_of_data, enddate=end_of_data)
64
-
65
- self.get_zermelo(endpoint='branches', fields=['code', 'name'])
66
- self.get_zermelo(endpoint='branchesofschools', fields=['id', 'schoolInSchoolYear', 'branch', 'name'])
67
- self.get_zermelo(endpoint='choosableindepartments', fields=['id', 'subject', 'departmentOfBranch', 'departmentOfBranchCode', 'sectionOfBranch', 'clockHoursPerLesson', 'teachingLevelManually',
68
- 'teachingLevel', 'subjectType', 'subjectCode', 'subjectName', 'scheduleCode', 'subjectScheduleCode', 'lessonDemand', 'lessonHoursInClassPeriods'],
69
- nested=True, nested_fields=['lessonHoursInClassPeriods'])
70
- self.get_zermelo(endpoint='classperiods', fields=['id', 'name', 'schoolInSchoolYear', 'weeks'], nested=True, nested_fields=['weeks'])
71
- self.get_zermelo(endpoint='contracts', fields=['id', 'start', 'end', 'employee', 'defaultFunctionCategory', 'teacherTeam', 'clockHoursGeneralTasks', 'clockHoursGeneralTasksManually',
72
- 'clockHoursTasks', 'clockHoursProfessionalDevelopmentManually', 'clockHoursProfessionalDevelopment', 'clockHoursNet', 'lessonsMax', 'type',
73
- 'yearFraction', 'fteYearLeave', 'ftePermanent', 'fteTemporary', 'fteNet', 'clockHoursGross', 'clockHoursBalance', 'clockHoursLessonsMax',
74
- 'lessonReducingTasks', 'taskSpace', 'taskBalance', 'lessonSpace', 'mainBranchOfSchool', 'school', 'schoolName', 'schoolYear', 'firstName',
75
- 'lastName', 'prefix', 'clockHoursLessons'])
76
- self.get_zermelo(endpoint='departmentsofbranches', fields=['id', 'code', 'yearOfEducation', 'branchOfSchool', 'clockHoursPerLesson', 'schoolInSchoolYearId', 'schoolInSchoolYearName', 'studentCount', 'prognosticStudentCount'])
77
- self.get_zermelo(endpoint='employees', fields=['userCode', 'commencementTeaching', 'commencementSchool', 'prefix', 'gender', 'dateOfBirth', 'firstName', 'lastName', 'street', 'houseNumber', 'postalCode', 'city'])
78
- self.get_zermelo(endpoint='groups', fields=['id', 'code'])
79
- self.get_zermelo(endpoint='groupindepartments', fields=['id', 'departmentOfBranch', 'name', 'isMainGroup', 'isMentorGroup', 'extendedName'])
80
- self.get_zermelo(endpoint='holidays', fields=['id', 'schoolInSchoolYear', 'name', 'start', 'end'])
81
- self.get_zermelo(endpoint='jobs', fields=['id', 'contract', 'functionCategory', 'employmentType', 'start', 'end', 'fteReal', 'fteManually', 'fte', 'type', 'employee', 'clockHoursGross'])
82
- self.get_zermelo(endpoint='jobextensions', fields=['id', 'contract', 'start', 'end', 'fteReal', 'lessonsAndTasks', 'total', 'employee', 'fte', 'generalTasks', 'professionalDevelopment', 'personalBudget'])
83
- self.get_zermelo(endpoint='leaves', fields=['id', 'contract', 'leaveType', 'leaveTypeName', 'start', 'end', 'total', 'leaveApproved', 'employee', 'fteReal'])
84
- self.get_zermelo(endpoint='leavetypes', fields=['id', 'name', 'fixed', 'affectsPersonalBudget'])
85
- self.get_zermelo(endpoint='locations', fields=['code'])
86
- self.get_zermelo(endpoint='locationofbranches', fields=['id', 'name', 'parentteachernightCapacity', 'courseCapacity', 'branchOfSchool'])
87
- self.get_zermelo(endpoint='plannedlessons', fields=['id', 'clockHoursPerLesson', 'clockHoursPerLessonManually', 'plannedGroups', 'lessonDemand', 'branchOfSchool', 'departmentOfBranches',
88
- 'lessonHoursInClassPeriods', 'subjects', 'sectionOfBranches', 'maxTeachingLevel', 'regularTeachingAssignments',
89
- 'prognosticStudentsPerTeacherCount', 'expectedTeacherCount', 'privateComment', 'publicComment'],
90
- nested=True, nested_fields=['plannedGroups', 'departmentOfBranches', 'subjects', 'sectionOfBranches', 'regularTeachingAssignments', 'lessonHoursInClassPeriods'])
91
- self.get_zermelo(endpoint='plannedgroups', fields=['id', 'choosableInDepartment', 'groupInDepartment', 'teachingLevel', 'subjectCode', 'groupInDepartmentName',
92
- 'groupInDepartmentIsMainGroup', 'groupInDepartmentIsMentorGroup', 'groupInDepartmentExtendedName', 'name', 'rank'])
93
- self.get_zermelo(endpoint='schools', fields=['id', 'name', 'brin'])
94
- self.get_zermelo(endpoint='schoolsinschoolyears', fields=['id', 'school', 'year', 'project', 'archived', 'projectName', 'schoolName', 'name'])
95
- self.get_zermelo(endpoint='sectionassignments', fields=['contract', 'id', 'lessonHoursFirstDegree', 'lessonHoursSecondDegree', 'sectionOfBranch'])
96
- selected_subject = self.get_zermelo_filtered(endpoint='selectedsubjects',
97
- fields=['id', 'subjectSelection', 'choosableInDepartment', 'alternativeChoosableInDepartment', 'manualLessonInvolvement',
98
- 'exemption', 'studentInDepartment', 'subjectCode', 'subject', 'segmentCode', 'lessonInvolvement'],
99
- startdate=start_of_data,
100
- enddate=end_of_data)
101
- selected_subject.drop_duplicates(inplace=True)
102
- selected_subject.reset_index(drop=True, inplace=True)
103
- selected_subject.index.name = '{0}_id'.format('selectedsubjects')
104
- file = '{0}{1}.csv'.format(self.storage_location, 'selectedsubjects')
105
- selected_subject.to_csv(file, sep='|', decimal=',')
106
-
107
- self.get_zermelo(endpoint='sections', fields=['id', 'abbreviation', 'name', 'sectionOfBranches'], nested=True, nested_fields=['sectionOfBranches'])
108
- self.get_zermelo(endpoint='students', fields=['dateOfBirth', 'email', 'street', 'houseNumber', 'postalCode', 'city', 'lastName', 'prefix',
109
- 'firstName', 'lwoo', 'userCode', 'studentInDepartments'], nested=True, nested_fields=['studentInDepartments'])
110
- self.get_zermelo(endpoint='studentsindepartments', fields=['id', 'student', 'departmentOfBranch', 'groupInDepartments', 'mainGroup'])
111
- self.get_zermelo(endpoint='subjectselections', fields=['id', 'selectedSubjects', 'studentCode', 'departmentOfBranch'])
112
- self.get_zermelo(endpoint='subjectselectionsubjects', fields=['id', 'code', 'name', 'scheduleCode'])
113
- self.get_zermelo(endpoint='taskassignments', fields=['branchOfSchool', 'contract', 'employee', 'contract', 'hours', 'hoursReplacement', 'taskGroup', 'taskInBranchOfSchool',
114
- 'type', 'start', 'end'])
115
- self.get_zermelo(endpoint='tasks', fields=['abbreviation', 'id', 'name', 'taskGroup', 'taskGroupAbbreviation'])
116
- self.get_zermelo(endpoint='taskgroups', fields=['abbreviation', 'description', 'id', 'name'])
117
- self.get_zermelo(endpoint='tasksinbranchofschool', fields=['branchOfSchool', 'clockHoursAssigned', 'clockHoursBalance', 'id', 'maxHours', 'task', 'taskAbbreviation'])
118
- self.get_zermelo(endpoint='teacherteams', fields=['id', 'name', 'branchOfSchool', 'departmentOfBranches'], nested=True, nested_fields=['departmentOfBranches'])
119
- self.get_zermelo(endpoint='teachingassignments', fields=['id', 'contract', 'plannedLesson', 'type', 'regular', 'lessonHoursInClassPeriodsManually', 'startWeek', 'endWeek',
120
- 'employee', 'regularContract', 'teachingQualificationStatus', 'lessonHoursNet', 'clockHoursPerLesson', 'clockHoursTotal',
121
- 'sectionOfBranches', 'publicComment', 'privateComment', 'clockHoursAlgorithm', 'replacements',
122
- 'lessonHoursInClassPeriods', 'plannedGroups'],
123
- nested=True, nested_fields=['lessonHoursInClassPeriods', 'plannedGroups', 'sectionOfBranches', 'replacements'])
124
- self.get_zermelo(endpoint='teachingqualifications', fields=['id', 'employee', 'choosable', 'startWeek', 'endWeek', 'diploma', 'teachingLevel', 'choosableAbbreviation', 'status', 'name'])
125
- self.get_zermelo(endpoint='workforceparameters', fields=['defaultclockhoursperlesson', 'id', 'schoolInSchoolYear'])
126
-
127
- def get_zermelo(self, endpoint, fields, nested=False, nested_fields=[]):
128
- """
129
- Database in Zermelo is divided in different endpoints which consist of fields. Some fields are nested, which
130
- means that some data lines have a subdivision.
131
- :param endpoint: name of the endpoint. Not case-sensitive
132
- :param fields: make a selection of the desired fields. Selection of the field(s) is case-sensitive
133
- :param nested: field is nested or not
134
- :param nested_fields: select nested fields
135
- :return: returns error when extract didn't succeed
136
- """
137
- try:
138
- print('{0} - Started with {1}'.format(time.strftime('%H:%M:%S'), endpoint))
139
- url_fields = ','.join(fields)
140
- url = '{0}{1}?access_token={2}&fields={3}'.format(self.url, endpoint, self.access_token, url_fields)
141
-
142
- if nested:
143
- # Get the response without any transformation
144
- response = requests.get(url).json()['response']['data']
145
-
146
- # From all the fields, hold only the meta_fields (the not nested fields)
147
- meta_fields = fields.copy()
148
- for nested_field in nested_fields:
149
- meta_fields.remove(nested_field)
150
-
151
- # From the initial response, create a dataframe with only the meta_fields
152
- df = pd.DataFrame(response)
153
- df = df[meta_fields]
154
-
155
- # Set the columns in df as the same type as in the original df. Sometimes, an empty field will change the column type in df_temp
156
- # to object while the dtype in the original df is int or float. This will give an error when merging
157
- existing_field_types = dict(df.dtypes)
158
- for column in df:
159
- if column in existing_field_types:
160
- existing_dtype = existing_field_types[column]
161
- if existing_dtype == 'int64' or existing_dtype == 'float64':
162
- df[column] = df[column].fillna(0)
163
- df[column] = df[column].astype(existing_dtype)
164
-
165
- # Loop through the nested_fields, create a dataframe for each nested field and join the result to the initial dataframe
166
- for nested_field in nested_fields:
167
- # If the nested_field hold a key, value pair, then the record_prefix is usable. Only a value give a TypeError. Catch this error and rename the column
168
- try:
169
- df_temp = pd.io.json.json_normalize(data=response, meta=meta_fields, record_path=[nested_field], record_prefix='{}_'.format(nested_field))
170
- except TypeError:
171
- df_temp = pd.io.json.json_normalize(data=response, meta=meta_fields, record_path=[nested_field])
172
- df_temp.rename(columns={0: nested_field}, inplace=True)
173
- # Set the columns in df_temp as the same type as in the original df. Sometimes, an empty field will change the column type in df_temp
174
- # to object while the dtype in the original df is int or float. This will give an error when merging
175
- existing_field_types = dict(df.dtypes)
176
- for column in df_temp:
177
- if column in existing_field_types:
178
- existing_dtype = existing_field_types[column]
179
- if existing_dtype == 'int64' or existing_dtype == 'float64':
180
- df_temp[column] = df_temp[column].fillna(0)
181
- df_temp[column] = df_temp[column].astype(existing_dtype)
182
- # Merge the initial dataframe and the new one
183
- df = pd.merge(df, df_temp, how='left', on=meta_fields)
184
- data = df
185
- else:
186
- init_response = json.loads(requests.get(url).content)
187
- status = init_response['response']['status']
188
- if status == 200:
189
- data = pd.DataFrame(init_response['response']['data'])
190
-
191
- # Check each column if the column only holds integers. If yes, and the type is a Float, set type to float. Otherwise, this gives problems in QLik Sense (2 becomes 2.0)
192
- for column in data.columns:
193
- try:
194
- if data.loc[:, column].dtype == np.float64 or data.loc[:, column].dtype == np.int64:
195
- data.loc[:, column].fillna(0, inplace=True)
196
- else:
197
- data.loc[:, column].fillna('', inplace=True)
198
- column_name = 'check_{}'.format(column)
199
- data.loc[:, column_name] = data.apply(lambda x: 'int64' if x[column].is_integer() else 'float', axis=1)
200
- if 'float' in data.loc[:, column_name].values:
201
- pass
202
- else:
203
- data.loc[:, column] = data.loc[:, column].astype('int64')
204
- del data[column_name]
205
- except Exception as e:
206
- continue
207
-
208
- else:
209
- data = init_response['response']['message']
210
- print(data)
211
-
212
- data.index.name = '{0}_id'.format(endpoint)
213
- file = '{0}{1}.csv'.format(self.storage_location, endpoint)
214
- data.to_csv(file, sep='|', decimal=',')
215
- print('{0} - {1} saved'.format(time.strftime('%H:%M:%S'), endpoint))
216
- except Exception as e:
217
- exc_type, exc_obj, exc_tb = sys.exc_info()
218
- error = str(e)[:400].replace('\'', '').replace('\"', '') + ' | Line: {}'.format(exc_tb.tb_lineno)
219
- return error
220
-
221
- def get_zermelo_substituded_lessons(self, endpoint, fields, startdate, enddate):
222
- start = time.time()
223
- fields = ','.join(fields)
224
-
225
- # Loop through the data per 3 days (3600 seconds * 24 hours * 3 days) because the dataset is too big to receive in once. Start three years back
226
- df = pd.DataFrame()
227
- start_epoch = int(time.mktime(startdate))
228
- last_epoch = int(time.mktime(enddate))
229
- while start_epoch < last_epoch:
230
- try:
231
- if (start_epoch + (3600 * 24 * 7)) > last_epoch:
232
- end_epoch = int(last_epoch)
233
- else:
234
- end_epoch = int(start_epoch + (3600 * 24 * 7))
235
-
236
- url = '{0}{1}?access_token={2}&fields={3}&start={4}&end={5}'.format(self.url, endpoint, self.access_token, fields, start_epoch, end_epoch)
237
- data = requests.get(url).json()['response']['data']
238
-
239
- # checks if data is not empty list
240
- if data:
241
- df_new = pd.DataFrame(data)
242
- df_new['changeDescription'] = df_new['changeDescription'].str.replace('\n', '')
243
- df_new['changeDescription'] = df_new['changeDescription'].str.replace('\r', '')
244
- df = pd.concat([df, df_new])
245
-
246
- print('Substituded: Start: {}, End: {}, Length: {}'.format(start_epoch, end_epoch, len(df_new)))
247
-
248
- start_epoch += (3600 * 24 * 7)
249
-
250
- except Exception as e:
251
- print('{} - Error at timestamp {}: {}'.format(time.strftime('%H:%M:%S'), start_epoch, e))
252
- start_epoch += (3600 * 24 * 7)
253
-
254
- # Store the total dataframe to a new csv file
255
- df.drop_duplicates(inplace=True)
256
- df.reset_index(drop=True, inplace=True)
257
- df.index.name = '{0}_id'.format(endpoint)
258
- file = '{}{}_{}.csv'.format(self.storage_location, 'substituded_lessons', datetime.datetime.fromtimestamp(mktime(startdate)).strftime('%Y-%m-%d'))
259
- df.to_csv(file, sep='|', decimal=',')
260
-
261
- print('Done in {} seconds'.format(time.time() - start))
262
-
263
- def get_zermelo_appointments(self, endpoint, fields, startdate, enddate):
264
- start = time.time()
265
- fields = ','.join(fields)
266
-
267
- df = pd.DataFrame()
268
-
269
- start_epoch = int(time.mktime(startdate))
270
- last_epoch = int(time.mktime(enddate))
271
- while start_epoch < last_epoch:
272
- try:
273
- if (start_epoch + (3600 * 24 * 7)) > last_epoch:
274
- end_epoch = int(last_epoch)
275
- else:
276
- end_epoch = int(start_epoch + (3600 * 24 * 7))
277
- print(start_epoch, end_epoch)
278
- url = '{0}{1}?access_token={2}&fields={3}&start={4}&end={5}&includeHidden=True{6}&valid=True'.format(self.url, endpoint, self.access_token, fields, start_epoch, end_epoch, self.cancelled_appointments)
279
- data = requests.get(url).json()['response']['data']
280
-
281
- # checks if data is not empty list
282
- if data:
283
- df_new = pd.DataFrame(data)
284
- df_new['remark'] = df_new['remark'].str.replace('\n', '')
285
- df_new['remark'] = df_new['remark'].str.replace('\r', '')
286
- df = pd.concat([df, df_new])
287
-
288
- print('Appointments: Start: {}, End: {}, Length: {}'.format(start_epoch, end_epoch, len(df_new)))
289
- # Add one week
290
- start_epoch += (3600 * 24 * 7)
291
-
292
- except Exception as e:
293
- print('{} - Error at timestamp {}: {}'.format(time.strftime('%H:%M:%S'), start_epoch, e))
294
- start_epoch += (3600 * 24 * 7)
295
-
296
- # During summer vacation, it can occur that no data call is executed. The df is empty in this case
297
- if len(df) > 0:
298
- # Reset some columns from Float to Int
299
- df.loc[:, 'branchOfSchool'].fillna(0, inplace=True)
300
- df.loc[:, 'branchOfSchool'] = df.loc[:, 'branchOfSchool'].astype('int64')
301
- df.reset_index(inplace=True, drop=True)
302
-
303
- # Subtract all the nested layers from the appointments and save to separate files
304
- self.appointments_create_lookup_table(df, 'students', 'userCode', startdate)
305
- self.appointments_create_lookup_table(df, 'teachers', 'userCode', startdate)
306
- self.appointments_create_lookup_table(df, 'subjects', 'scheduleCode', startdate)
307
- self.appointments_create_lookup_table(df, 'groups', 'code', startdate)
308
- self.appointments_create_lookup_table(df, 'locations', 'code', startdate)
309
- self.appointments_create_lookup_table(df, 'locationsOfBranch', 'id', startdate)
310
- self.appointments_create_lookup_table(df, 'groupsInDepartments', 'id', startdate)
311
-
312
- # Store the total dataframe to a new csv file
313
- df.drop(columns=['students', 'teachers', 'subjects', 'groups', 'locations', 'locationsOfBranch', 'groupsInDepartments'], inplace=True)
314
- df.index.name = '{0}_id'.format(endpoint)
315
- file = '{}{}_{}.csv'.format(self.storage_location, 'appointments', datetime.datetime.fromtimestamp(mktime(startdate)).strftime('%Y-%m-%d'))
316
-
317
- df.to_csv(file, sep='|', decimal=',')
318
- print('Done in {} seconds'.format(time.time() - start))
319
-
320
- def appointments_create_lookup_table(self, df, col_name, link_id, startdate):
321
- df = df[['id', col_name]]
322
- # Only hold rows whith filled arrays
323
- df = df[df[col_name].apply(len) > 0]
324
- appointments_lookup_df = []
325
- for index, row in df.iterrows():
326
- appointmentId = row['id']
327
- to_link = row[col_name]
328
- for item in to_link:
329
- appointments_lookup_df.append({'appointmentsId': appointmentId, link_id: item})
330
- df = pd.DataFrame(appointments_lookup_df)
331
- file = '{0}{1}.csv'.format(self.storage_location, 'appointments_{}_{}'.format(col_name, datetime.datetime.fromtimestamp(mktime(startdate)).strftime('%Y-%m-%d')))
332
- df.index.name = 'appointments_{0}_id'.format(col_name)
333
- df.to_csv(file, sep='|', decimal=',')
334
-
335
- def get_zermelo_filtered(self, endpoint: str, fields: List = None, startdate=None, enddate=None, filters: dict = None) -> pd.DataFrame:
336
- """
337
- :param endpoint: endpoint
338
- :param fields: fields to get, if left empty, all fields are returned
339
- :param startdate: optional startdate to get data from
340
- :param enddate: optional enddate to get data from
341
- :param filters: dict of fields with corresponding values to filter
342
- :return:
343
- """
344
- # Loop through the data per 7 days (3600 seconds * 24 hours * 7 days) because the dataset is too big to receive at once.
345
- df = pd.DataFrame()
346
- url = f'{self.url}{endpoint}'
347
- params = {'access_token': self.access_token}
348
-
349
- if fields is not None:
350
- params.update({'fields': ','.join(fields)})
351
- if filters is not None:
352
- params.update(filters)
353
-
354
- if startdate is not None:
355
- start_epoch = int(time.mktime(startdate))
356
- last_epoch = int(time.mktime(enddate))
357
- # loop epoch is 7 days from start_date except when last_epoch is smaller than start + 7 days
358
- end_epoch = int(start_epoch + (3600 * 24 * 7)) if (start_epoch + (3600 * 24 * 7)) < last_epoch else last_epoch
359
-
360
- while start_epoch < last_epoch:
361
- try:
362
- # merge params with loop params
363
- time_params = params | {'start': start_epoch, 'end': end_epoch}
364
- resp = requests.get(url=url, params=time_params)
365
- resp.raise_for_status()
366
-
367
- data = resp.json()['response']['data']
368
-
369
- # checks if data is not empty list
370
- if len(data) > 0:
371
- df = pd.concat([df, pd.DataFrame(data)])
372
-
373
- # move to next 7-day block
374
- start_epoch += (3600 * 24 * 7)
375
- end_epoch += (3600 * 24 * 7)
376
- # Adjust end_epoch to not exceed last_epoch
377
- if end_epoch > last_epoch:
378
- end_epoch = last_epoch
379
-
380
- except requests.exceptions.HTTPError as http_err:
381
- # Stop the loop for certain HTTP errors like 403 or 401
382
- if resp.status_code == 403:
383
- print('{} - 403 Forbidden at timestamp {}: {}'.format(time.strftime('%H:%M:%S'), start_epoch, http_err))
384
- break # Stop the loop for 403 error
385
- elif resp.status_code == 401:
386
- print('{} - 401 Unauthorized at timestamp {}: {}'.format(time.strftime('%H:%M:%S'), start_epoch, http_err))
387
- break # Stop the loop for 401 error
388
-
389
- # For other HTTP errors, retry the loop
390
- print('{} - HTTP Error at timestamp {}: {}'.format(time.strftime('%H:%M:%S'), start_epoch, http_err))
391
- start_epoch += (3600 * 24 * 7) # Move forward to next block to prevent endless retry
392
-
393
- except Exception as e:
394
- # Handle other types of exceptions (e.g., network errors)
395
- print('{} - General Error at timestamp {}: {}'.format(time.strftime('%H:%M:%S'), start_epoch, e))
396
- start_epoch += (3600 * 24 * 7) # Skip this block to avoid infinite retry
397
- else:
398
- try:
399
- resp = requests.get(url=url, params=params)
400
- resp.raise_for_status()
401
- data = resp.json()['response']['data']
402
- df = pd.DataFrame(data)
403
- except requests.exceptions.HTTPError as http_err:
404
- print('{} - HTTP Error: {}'.format(time.strftime('%H:%M:%S'), http_err))
405
- except Exception as e:
406
- print('{} - General Error: {}'.format(time.strftime('%H:%M:%S'), e))
407
-
408
- return df