brynq-sdk-bob 2.6.2.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,77 @@
1
+ import base64
2
+ import re
3
+ from typing import Union, List, Optional, Literal
4
+ import pandas as pd
5
+ import requests
6
+ import os
7
+ from brynq_sdk_brynq import BrynQ
8
+ from brynq_sdk_functions import Functions
9
+ from .bank import Bank
10
+ from .company import Company
11
+ from .documents import CustomDocuments
12
+ from .employment import Employment
13
+ from .named_lists import NamedLists
14
+ from .payments import Payments
15
+ from .people import People
16
+ from .salaries import Salaries
17
+ from .timeoff import TimeOff
18
+ from .work import Work
19
+ from .custom_tables import CustomTables
20
+
21
+ class Bob(BrynQ):
22
+ def __init__(self, system_type: Optional[Literal['source', 'target']] = None, test_environment: bool = True, debug: bool = False, target_system: str = None):
23
+ super().__init__()
24
+ self.timeout = 3600
25
+ self.headers = self._get_request_headers(system_type)
26
+ if test_environment:
27
+ self.base_url = "https://api.sandbox.hibob.com/v1/"
28
+ else:
29
+ self.base_url = "https://api.hibob.com/v1/"
30
+ self.session = requests.Session()
31
+ self.session.headers.update(self.headers)
32
+ self.people = People(self)
33
+ self.salaries = Salaries(self)
34
+ self.work = Work(self)
35
+ self.bank = Bank(self)
36
+ self.employment = Employment(self)
37
+ self.payments = Payments(self)
38
+ self.time_off = TimeOff(self)
39
+ self.documents = CustomDocuments(self)
40
+ self.companies = Company(self)
41
+ self.named_lists = NamedLists(self)
42
+ self.custom_tables = CustomTables(self)
43
+ self.data_interface_id = os.getenv("DATA_INTERFACE_ID")
44
+ self.debug = debug
45
+
46
+ def _get_request_headers(self, system_type):
47
+ credentials = self.interfaces.credentials.get(system='bob', system_type=system_type)
48
+ if type(credentials) is list:
49
+ credentials = credentials[0]
50
+
51
+ print(credentials)
52
+ auth_token = base64.b64encode(f"{credentials.get('data').get('User ID')}:{credentials.get('data').get('API Token')}".encode()).decode('utf-8')
53
+ headers = {
54
+ "accept": "application/json",
55
+ "Authorization": f"Basic {auth_token}",
56
+ "Partner-Token": "001Vg00000A6FY6IAN"
57
+ }
58
+
59
+ return headers
60
+
61
+ def get_paginated_result(self, request: requests.Request) -> List:
62
+ has_next_page = True
63
+ result_data = []
64
+ while has_next_page:
65
+ prepped = request.prepare()
66
+ prepped.headers.update(self.session.headers)
67
+ resp = self.session.send(prepped, timeout=self.timeout)
68
+ resp.raise_for_status()
69
+ response_data = resp.json()
70
+ result_data += response_data['results']
71
+ next_cursor = response_data.get('response_metadata').get('next_cursor')
72
+ # If there is no next page, set has_next_page to False, we could use the falsy value of None but this is more readable
73
+ has_next_page = next_cursor is not None
74
+ if has_next_page:
75
+ request.params.update({"cursor": next_cursor})
76
+
77
+ return result_data
brynq_sdk_bob/bank.py ADDED
@@ -0,0 +1,31 @@
1
+ import pandas as pd
2
+ from brynq_sdk_functions import Functions
3
+ from .schemas.bank import BankSchema
4
+
5
+ import time
6
+ from tqdm import tqdm
7
+
8
+ class Bank:
9
+ def __init__(self, bob):
10
+ self.bob = bob
11
+ self.schema = BankSchema
12
+
13
+ def get(self, person_ids: pd.Series, field_selection: list[str] = []) -> (pd.DataFrame, pd.DataFrame):
14
+ data = []
15
+ for person_id in tqdm(person_ids, desc="Fetching bank accounts"):
16
+ resp = self.bob.session.get(url=f"{self.bob.base_url}people/{person_id}/bank-accounts", timeout=self.bob.timeout)
17
+ resp.raise_for_status()
18
+ temp_data = resp.json()['values']
19
+ # when an employee has one or more bank accounts, the response is a list of dictionaries.
20
+ for account in temp_data:
21
+ account['employee_id'] = person_id
22
+ data += temp_data
23
+
24
+ # rate limit is 50 per minute
25
+ time.sleep(1.3)
26
+
27
+ df = pd.DataFrame(data)
28
+
29
+ valid_banks, invalid_banks = Functions.validate_data(df=df, schema=BankSchema, debug=True)
30
+
31
+ return valid_banks, invalid_banks
@@ -0,0 +1,23 @@
1
+ import pandas as pd
2
+
3
+
4
+ class Company:
5
+ def __init__(self, bob):
6
+ self.bob = bob
7
+
8
+ def get_variable_values(self, list_name: str = None) -> dict:
9
+ values = {}
10
+
11
+ if list_name is not None:
12
+ resp = self.bob.session.get(url=f"{self.bob.base_url}company/named-lists/{list_name}", timeout=self.bob.timeout)
13
+ resp.raise_for_status()
14
+ data = resp.json()
15
+ values.update({data["name"]: [value['id'] for value in data['values']]})
16
+ else:
17
+ resp = self.bob.session.get(url=f"{self.bob.base_url}company/named-lists", timeout=self.bob.timeout)
18
+ resp.raise_for_status()
19
+ data = resp.json()
20
+ for list_key, list_data in data.items():
21
+ values.update({list_key: [value['id'] for value in list_data['values']]})
22
+
23
+ return values
@@ -0,0 +1,75 @@
1
+ import pandas as pd
2
+ from brynq_sdk_functions import Functions
3
+ from .schemas.custom_tables import CustomTableSchema, CustomTableMetadataSchema
4
+
5
+
6
+ class CustomTables:
7
+ def __init__(self, bob):
8
+ self.bob = bob
9
+ self.schema = CustomTableSchema
10
+
11
+ def get(self, employee_id: str, custom_table_id: str) -> tuple[pd.DataFrame, pd.DataFrame]:
12
+ """
13
+ Get custom table data for an employee
14
+
15
+ Args:
16
+ employee_id: The employee ID
17
+ custom_table_id: The custom table ID
18
+
19
+ Returns:
20
+ A tuple of (valid_data, invalid_data) as pandas DataFrames
21
+ """
22
+ resp = self.bob.session.get(url=f"{self.bob.base_url}people/custom-tables/{employee_id}/{custom_table_id}")
23
+ resp.raise_for_status()
24
+ data = resp.json()
25
+
26
+ # Normalize the nested JSON response
27
+ df = pd.json_normalize(
28
+ data,
29
+ record_path=['values']
30
+ )
31
+
32
+ df['employee_id'] = employee_id
33
+ valid_data, invalid_data = Functions.validate_data(df=df, schema=self.schema, debug=True)
34
+
35
+ return valid_data, invalid_data
36
+
37
+ def get_metadata(self) -> tuple[pd.DataFrame, pd.DataFrame]:
38
+ """
39
+ Get metadata for all custom tables
40
+
41
+ Returns:
42
+ A tuple of (valid_data, invalid_data) as pandas DataFrames containing table and column metadata
43
+ """
44
+ url = f"{self.bob.base_url}people/custom-tables/metadata"
45
+ resp = self.bob.session.get(url=url)
46
+ resp.raise_for_status()
47
+ data = resp.json()
48
+
49
+ # Flatten the nested structure - create one row per column with table info repeated
50
+ rows = []
51
+ for table in data.get('tables', []):
52
+ table_info = {
53
+ 'table_id': table.get('id'),
54
+ 'table_name': table.get('name'),
55
+ 'table_category': table.get('category'),
56
+ 'table_description': table.get('description')
57
+ }
58
+
59
+ for column in table.get('columns', []):
60
+ row = {
61
+ **table_info,
62
+ 'column_id': column.get('id'),
63
+ 'column_name': column.get('name'),
64
+ 'column_description': column.get('description'),
65
+ 'column_mandatory': column.get('mandatory'),
66
+ 'column_type': column.get('type')
67
+ }
68
+ rows.append(row)
69
+
70
+ df = pd.DataFrame(rows)
71
+
72
+ # Validate against the metadata schema
73
+ valid_data, invalid_data = Functions.validate_data(df=df, schema=CustomTableMetadataSchema, debug=True)
74
+
75
+ return valid_data, invalid_data
@@ -0,0 +1,47 @@
1
+ from datetime import datetime
2
+ from io import BytesIO
3
+
4
+ import pandas as pd
5
+ from brynq_sdk_functions import Functions
6
+
7
+
8
+ class CustomDocuments:
9
+ def __init__(self, bob):
10
+ self.bob = bob
11
+ # self.headers_upload = self.bob.headers.copy()
12
+ # self.headers_upload['Content-Type'] = 'multipart/form-data'
13
+ # self.headers_upload['Accept'] = 'application/json'
14
+
15
+ def get(self, person_id: datetime) -> pd.DataFrame:
16
+ resp = self.bob.session.get(url=f"{self.bob.base_url}docs/people/{person_id}", timeout=self.bob.timeout)
17
+ resp.raise_for_status()
18
+ data = resp.json()['documents']
19
+ df = pd.DataFrame(data)
20
+ # data = self.bob.get_paginated_result(request)
21
+ # df = pd.json_normalize(
22
+ # data,
23
+ # record_path='changes',
24
+ # meta=['employeeId']
25
+ # )
26
+ df = self.bob.rename_camel_columns_to_snake_case(df)
27
+ # valid_documents, invalid_documents = Functions.validate_data(df=df, schema=DocumentsSchema, debug=True)
28
+
29
+ return df
30
+
31
+ def get_folders(self) -> dict:
32
+ resp = self.bob.session.get(url=f"{self.bob.base_url}docs/folders/metadata", timeout=self.bob.timeout)
33
+ resp.raise_for_status()
34
+ data = resp.json()
35
+
36
+ return data
37
+
38
+ def create(self,
39
+ person_id: datetime,
40
+ folder_id: str,
41
+ file_name: str,
42
+ file_object: BytesIO):
43
+ files = {"file": (file_name, file_object, "application/pdf")}
44
+ resp = self.bob.session.post(url=f"{self.bob.base_url}docs/people/{person_id}/folders/{folder_id}/upload",
45
+ files=files,
46
+ timeout=self.bob.timeout)
47
+ resp.raise_for_status()
@@ -0,0 +1,25 @@
1
+ import pandas as pd
2
+ import requests
3
+
4
+ from brynq_sdk_functions import Functions
5
+
6
+ from .schemas.employment import EmploymentSchema
7
+
8
+
9
+ class Employment:
10
+ def __init__(self, bob):
11
+ self.bob = bob
12
+ self.schema = EmploymentSchema
13
+
14
+ def get(self) -> (pd.DataFrame, pd.DataFrame):
15
+ request = requests.Request(method='GET',
16
+ url=f"{self.bob.base_url}bulk/people/employment")
17
+ data = self.bob.get_paginated_result(request)
18
+ df = pd.json_normalize(
19
+ data,
20
+ record_path='values',
21
+ meta=['employeeId']
22
+ )
23
+ valid_contracts, invalid_contracts = Functions.validate_data(df=df, schema=self.schema, debug=True)
24
+
25
+ return valid_contracts, invalid_contracts
@@ -0,0 +1,37 @@
1
+ from datetime import datetime
2
+ import pandas as pd
3
+ from brynq_sdk_functions import Functions
4
+ from .schemas.named_lists import NamedListSchema
5
+
6
+
7
+ class NamedLists:
8
+ def __init__(self, bob):
9
+ self.bob = bob
10
+ self.schema = NamedListSchema
11
+
12
+ def get(self) -> (pd.DataFrame, pd.DataFrame):
13
+ """
14
+ Get custom table data for an employee
15
+
16
+ Args:
17
+ list_name: The list name
18
+
19
+ Returns:
20
+ A tuple of (valid_data, invalid_data) as pandas DataFrames
21
+ """
22
+ url = f"{self.bob.base_url}company/named-lists/"
23
+ resp = self.bob.session.get(url=url)
24
+ resp.raise_for_status()
25
+ data = resp.json()
26
+
27
+ df = pd.DataFrame([
28
+ {**item, "type": key}
29
+ for key, group in data.items()
30
+ for item in group["values"]
31
+ ])
32
+
33
+ # Normalize the nested JSON response
34
+ # df = pd.DataFrame(data.get('values'))
35
+ valid_data, invalid_data = Functions.validate_data(df=df, schema=NamedListSchema, debug=True)
36
+
37
+ return valid_data, invalid_data
@@ -0,0 +1,161 @@
1
+ import pandas as pd
2
+ from typing import Optional, List
3
+ from brynq_sdk_functions import Functions
4
+ from .schemas.payments import VariablePaymentSchema, ActualPaymentsSchema
5
+
6
+ import time
7
+ from tqdm import tqdm
8
+
9
+
10
+ class Payments:
11
+ def __init__(self, bob):
12
+ self.bob = bob
13
+ self.schema = VariablePaymentSchema
14
+
15
+ def _apply_named_list_mappings(self, df: pd.DataFrame) -> pd.DataFrame:
16
+ """Apply named list ID-to-value mappings to dataframe columns."""
17
+ if df.empty:
18
+ return df
19
+
20
+ # Fetch named lists from Bob API
21
+ resp_named_lists = self.bob.session.get(
22
+ url=f"{self.bob.base_url}company/named-lists",
23
+ timeout=self.bob.timeout,
24
+ headers=self.bob.headers
25
+ )
26
+ named_lists = resp_named_lists.json()
27
+
28
+ # Transform named_lists to create id-to-value mappings for each field
29
+ named_lists = {
30
+ key.split('.')[-1]: {item['id']: item['value'] for item in value['values']}
31
+ for key, value in named_lists.items()
32
+ }
33
+
34
+ for field in df.columns:
35
+ # Fields in the response and in the named-list have different building blocks
36
+ # but they both end with the same last block
37
+ field_df = field.split('.')[-1].split('work_')[-1]
38
+ if field_df in named_lists.keys() and field_df not in ['site']:
39
+ mapping = named_lists[field_df]
40
+ df[field] = df[field].apply(
41
+ lambda v: [mapping.get(x, x) for x in v] if isinstance(v, list) else mapping.get(v, v)
42
+ )
43
+
44
+ return df
45
+
46
+ def get(self, person_ids: List[str]) -> (pd.DataFrame, pd.DataFrame):
47
+ df = pd.DataFrame()
48
+ for person_id in tqdm(person_ids, desc="Fetching variable payments"):
49
+ resp = self.bob.session.get(url=f"{self.bob.base_url}people/{person_id}/variable", timeout=self.bob.timeout)
50
+ resp.raise_for_status()
51
+ data = resp.json()
52
+ df = pd.concat([df, pd.json_normalize(
53
+ data,
54
+ record_path='values'
55
+ )])
56
+ df['employee_id'] = person_id
57
+
58
+ # Rate limit is 50 per minute
59
+ time.sleep(1.3)
60
+
61
+ df = df.reset_index(drop=True)
62
+
63
+ # Apply named list mappings
64
+ df = self._apply_named_list_mappings(df)
65
+
66
+ valid_payments, invalid_payments = Functions.validate_data(df=df, schema=self.schema, debug=True)
67
+ return valid_payments, invalid_payments
68
+
69
+ def get_actual_payments(
70
+ self,
71
+ limit: int = 200,
72
+ employee_ids: Optional[List[str]] = None,
73
+ pay_date_from: Optional[str] = None,
74
+ pay_date_to: Optional[str] = None
75
+ ) -> (pd.DataFrame, pd.DataFrame):
76
+ """
77
+ Search for actual payments with optional employee and pay date filters.
78
+ This method auto-paginates until all results are fetched.
79
+
80
+ See Bob API: https://apidocs.hibob.com/reference/post_people-actual-payments-search
81
+ See Pagination: https://apidocs.hibob.com/docs/pagination
82
+
83
+ Args:
84
+ limit (int): Number of records per page (default: 50, max: 200).
85
+ employee_ids (Optional[List[str]]): Filter by employee IDs.
86
+ pay_date_from (Optional[str]): Inclusive start date filter (YYYY-MM-DD).
87
+ pay_date_to (Optional[str]): Inclusive end date filter (YYYY-MM-DD).
88
+
89
+ Returns:
90
+ tuple: (valid_payments DataFrame, invalid_payments DataFrame)
91
+ """
92
+ base_payload = {
93
+ "pagination": {
94
+ "limit": limit
95
+ }
96
+ }
97
+
98
+ filters = []
99
+ if employee_ids:
100
+ filters.append({
101
+ "fieldPath": "employeeId",
102
+ "operator": "equals",
103
+ "values": employee_ids
104
+ })
105
+ if pay_date_from:
106
+ filters.append({
107
+ "fieldPath": "payDate",
108
+ "operator": "greaterThanOrEquals",
109
+ "value": pay_date_from
110
+ })
111
+ if pay_date_to:
112
+ filters.append({
113
+ "fieldPath": "payDate",
114
+ "operator": "lessThanOrEquals",
115
+ "value": pay_date_to
116
+ })
117
+
118
+ if filters:
119
+ base_payload["filters"] = filters
120
+
121
+ all_results = []
122
+ next_cursor = None
123
+
124
+ while True:
125
+ payload = dict(base_payload)
126
+ payload["pagination"] = dict(base_payload["pagination"])
127
+ if next_cursor:
128
+ payload["pagination"]["cursor"] = next_cursor
129
+
130
+ resp = self.bob.session.post(
131
+ url=f"{self.bob.base_url}people/actual-payments/search",
132
+ json=payload,
133
+ timeout=self.bob.timeout
134
+ )
135
+ resp.raise_for_status()
136
+ data = resp.json()
137
+
138
+ page_results = data.get('results') or []
139
+ if page_results:
140
+ all_results.extend(page_results)
141
+
142
+ next_cursor = (data.get('response_metadata') or {}).get('next_cursor')
143
+ if not next_cursor:
144
+ break
145
+
146
+ if not all_results:
147
+ empty_df = pd.DataFrame()
148
+ return empty_df, empty_df
149
+
150
+ df = pd.json_normalize(all_results)
151
+
152
+ # Apply named list mappings
153
+ df = self._apply_named_list_mappings(df)
154
+
155
+ valid_payments, invalid_payments = Functions.validate_data(
156
+ df=df,
157
+ schema=ActualPaymentsSchema,
158
+ debug=True
159
+ )
160
+
161
+ return valid_payments, invalid_payments
@@ -0,0 +1,71 @@
1
+ import pandas as pd
2
+ from brynq_sdk_functions import Functions
3
+ from .schemas.people import PeopleSchema
4
+
5
+ class History:
6
+ def __init__(self, bob):
7
+ self.bob = bob
8
+ self.schema = PeopleSchema
9
+ self.field_name_in_body, self.field_name_in_response, self.endpoint_to_response = self._init_fields()
10
+
11
+ def get(self, additional_fields: list[str] = [], field_selection: list[str] = []) -> tuple[pd.DataFrame, pd.DataFrame]:
12
+ """
13
+ Get people from Bob
14
+
15
+ Args:
16
+ additional_fields (list[str]): Additional fields to get (not defined in the schema)
17
+ field_selection (list[str]): Fields to get (defined in the schema), if not provided, all fields are returned
18
+ """
19
+ #resp = self.bob.session.get(url=f"{self.bob.base_url}profiles", timeout=self.bob.timeout)
20
+ body_fields = list(set(self.field_name_in_body + additional_fields))
21
+ response_fields = list(set(self.field_name_in_response + additional_fields))
22
+
23
+ if field_selection:
24
+ body_fields = [field for field in body_fields if field in field_selection]
25
+ response_fields = [self.endpoint_to_response.get(field) for field in field_selection if field in self.endpoint_to_response]
26
+
27
+ # Bob sucks with default fields so you need to do a search call to retrieve additional fields.
28
+ resp_additional_fields = self.bob.session.post(url=f"{self.bob.base_url}people/search",
29
+ json={
30
+ "fields": body_fields,
31
+ "filters": []
32
+ },
33
+ timeout=self.bob.timeout)
34
+ json_response = resp_additional_fields.json()
35
+ df = pd.json_normalize(resp_additional_fields.json()['employees'])
36
+ df = df[[col for col in response_fields if col in df.columns]]
37
+ # Get the valid column names from PeopleSchema
38
+ valid_people, invalid_people = Functions.validate_data(df=df, schema=PeopleSchema, debug=True)
39
+ return valid_people, invalid_people
40
+
41
+
42
+ def _init_fields(self) -> tuple[list[str], list[str], dict[str, str]]:
43
+ resp_fields = self.bob.session.get(
44
+ url=f"{self.bob.base_url}company/people/fields",
45
+ timeout=self.bob.timeout,
46
+ headers=self.bob.headers
47
+ )
48
+ fields = resp_fields.json()
49
+ field_name_in_body = [field.get('id') for field in fields]
50
+ field_name_in_response = [field['jsonPath'] for field in fields]
51
+ endpoint_to_response = {field['id']: field['jsonPath'] for field in fields}
52
+ return field_name_in_body, field_name_in_response, endpoint_to_response
53
+
54
+ def _get_employee_id_to_person_id_mapping(self) -> tuple[pd.DataFrame, pd.DataFrame]:
55
+ employee_id_in_company = "work.employeeIdInCompany"
56
+ person_id = "root.id"
57
+
58
+ body_fields = [employee_id_in_company, person_id]
59
+ response_fields = [self.endpoint_to_response.get(field) for field in body_fields if field in self.endpoint_to_response]
60
+
61
+ resp_additional_fields = self.bob.session.post(url=f"{self.bob.base_url}people/search",
62
+ json={
63
+ "fields": body_fields,
64
+ "filters": []
65
+ },
66
+ timeout=self.bob.timeout)
67
+ df = pd.json_normalize(resp_additional_fields.json()['employees'])
68
+ df = df[[col for col in response_fields if col in df.columns]]
69
+ # Get the valid column names from PeopleSchema
70
+ valid_people, invalid_people = Functions.validate_data(df=df, schema=PeopleSchema, debug=True)
71
+ return valid_people, invalid_people
@@ -0,0 +1,104 @@
1
+ import pandas as pd
2
+ from typing import Optional, List
3
+ from brynq_sdk_functions import Functions
4
+ from brynq_sdk_functions import BrynQPanderaDataFrameModel
5
+ from .bank import Bank
6
+ from .employment import Employment
7
+ from .salaries import Salaries
8
+ from .schemas.people import PeopleSchema
9
+ from .work import Work
10
+ from .custom_tables import CustomTables
11
+
12
+
13
+ class People:
14
+ def __init__(self, bob):
15
+ self.bob = bob
16
+ self.salaries = Salaries(bob)
17
+ self.employment = Employment(bob)
18
+ self.bank = Bank(bob)
19
+ self.work = Work(bob)
20
+ self.custom_tables = CustomTables(bob)
21
+ self.schema = PeopleSchema
22
+
23
+
24
+ # Build API fields using column metadata if present (api_field), otherwise use the column (alias) name
25
+ def __build_api_fields(self, schema_model: BrynQPanderaDataFrameModel) -> list[str]:
26
+ schema = schema_model.to_schema()
27
+ return [
28
+ ((getattr(col, "metadata", None) or {}).get("api_field")) or col_name
29
+ for col_name, col in schema.columns.items()
30
+ ]
31
+
32
+ def get(self, schema_custom_fields: Optional[BrynQPanderaDataFrameModel] = None, employee_ids: Optional[List[str]] = None, show_inactive: bool = False) -> pd.DataFrame:
33
+ core_fields = self.__build_api_fields(PeopleSchema)
34
+ custom_fields = self.__build_api_fields(schema_custom_fields) if schema_custom_fields is not None else []
35
+ fields = core_fields + custom_fields
36
+
37
+ # Build filters based on employee_ids if provided
38
+ filters = []
39
+ if employee_ids is not None:
40
+ filters = [
41
+ {
42
+ "fieldPath": "root.id",
43
+ "operator": "equals",
44
+ "values": employee_ids
45
+ }
46
+ ]
47
+
48
+ resp = self.bob.session.post(url=f"{self.bob.base_url}people/search",
49
+ json={
50
+ "fields": fields,
51
+ "filters": filters,
52
+ "showInactive": show_inactive,
53
+ #"humanReadable": "REPLACE"
54
+ },
55
+ timeout=self.bob.timeout)
56
+ resp.raise_for_status()
57
+ df = pd.json_normalize(resp.json()['employees'])
58
+ if df.empty and employee_ids is not None and resp.status_code == 200:
59
+ raise Exception(f"No employees found in HiBob for employee_ids: {employee_ids}")
60
+
61
+ df = df.loc[:, ~df.columns.str.contains('value')]
62
+
63
+ # Normalize separators in incoming data: convert '/' to '.' to match schema aliases
64
+ df.columns = df.columns.str.replace('/', '.', regex=False)
65
+
66
+ # A lot of fields from Bob are returned with only ID's. Those fields should be mapped to names. Therefore, we need to get the mapping from the named-lists endpoint.
67
+ resp_named_lists = self.bob.session.get(url=f"{self.bob.base_url}company/named-lists", timeout=self.bob.timeout, headers=self.bob.headers)
68
+ named_lists = resp_named_lists.json()
69
+ # save json to file
70
+ # import json
71
+ # with open('named_lists.json', 'w') as f:
72
+ # json.dump(named_lists, f, indent=4)
73
+
74
+ # Transform named_lists to create id-to-value mappings for each field
75
+ named_lists = {key.split('.')[-1]: {item['id']: item['value'] for item in value['values']} for key, value in named_lists.items()}
76
+
77
+ deviating_named_list_cols_mapping = {
78
+ 'payroll.employment.type': 'payrollEmploymentType',
79
+ 'home.familyStatus': 'familystatus',
80
+ 'personal.nationality': 'nationalities',
81
+ }
82
+
83
+ for field in df.columns:
84
+ # Fields in the response and in the named-list does have different building blocks (e.g. people.payroll.entitlement. or people.entitlement.). But they both end with the same last block
85
+ field_df = field.split('.')[-1].split('work_')[-1]
86
+
87
+ # Check if this field has a deviating mapping
88
+ named_list_key = deviating_named_list_cols_mapping.get(field, field_df)
89
+
90
+ if named_list_key in named_lists.keys() and named_list_key not in ['site']:
91
+ mapping = named_lists[named_list_key]
92
+ df[field] = df[field].apply(
93
+ lambda v: [mapping.get(x, x) for x in v] if isinstance(v, list) else mapping.get(v, v)
94
+ )
95
+
96
+ if schema_custom_fields is not None:
97
+ valid_people, invalid_people_custom = Functions.validate_data(df=df, schema=schema_custom_fields, debug=True)
98
+ else:
99
+ valid_people = df
100
+ invalid_people_custom = pd.DataFrame()
101
+
102
+ valid_people, invalid_people = Functions.validate_data(df=valid_people, schema=PeopleSchema, debug=True)
103
+
104
+ return valid_people, pd.concat([invalid_people, invalid_people_custom])