brynq-sdk-bob 2.4.4__tar.gz → 2.5.1.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/PKG-INFO +1 -1
- brynq_sdk_bob-2.5.1.dev0/brynq_sdk_bob/__init__.py +349 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/payments.py +12 -10
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/people.py +11 -35
- brynq_sdk_bob-2.5.1.dev0/brynq_sdk_bob/salaries.py +38 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/schemas/employment.py +1 -1
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/schemas/payments.py +3 -2
- brynq_sdk_bob-2.5.1.dev0/brynq_sdk_bob/schemas/people.py +285 -0
- brynq_sdk_bob-2.5.1.dev0/brynq_sdk_bob/schemas/salary.py +49 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/schemas/timeoff.py +13 -13
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/schemas/work.py +8 -8
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob.egg-info/PKG-INFO +1 -1
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/setup.py +1 -1
- brynq_sdk_bob-2.4.4/brynq_sdk_bob/__init__.py +0 -73
- brynq_sdk_bob-2.4.4/brynq_sdk_bob/salaries.py +0 -24
- brynq_sdk_bob-2.4.4/brynq_sdk_bob/schemas/people.py +0 -100
- brynq_sdk_bob-2.4.4/brynq_sdk_bob/schemas/salary.py +0 -25
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/bank.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/company.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/custom_tables.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/documents.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/employment.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/named_lists.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/payroll_history.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/schemas/__init__.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/schemas/bank.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/schemas/custom_tables.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/schemas/named_lists.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/schemas/payroll_history.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/timeoff.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob/work.py +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob.egg-info/SOURCES.txt +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob.egg-info/dependency_links.txt +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob.egg-info/not-zip-safe +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob.egg-info/requires.txt +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/brynq_sdk_bob.egg-info/top_level.txt +0 -0
- {brynq_sdk_bob-2.4.4 → brynq_sdk_bob-2.5.1.dev0}/setup.cfg +0 -0
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
import base64
|
|
2
|
+
import re
|
|
3
|
+
import inspect
|
|
4
|
+
from typing import Union, List, Optional, Literal
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import requests
|
|
7
|
+
import os
|
|
8
|
+
from brynq_sdk_brynq import BrynQ
|
|
9
|
+
from brynq_sdk_functions import Functions
|
|
10
|
+
from .bank import Bank
|
|
11
|
+
from .company import Company
|
|
12
|
+
from .documents import CustomDocuments
|
|
13
|
+
from .employment import Employment
|
|
14
|
+
from .named_lists import NamedLists
|
|
15
|
+
from .payments import Payments
|
|
16
|
+
from .people import People
|
|
17
|
+
from .salaries import Salaries
|
|
18
|
+
from .timeoff import TimeOff
|
|
19
|
+
from .work import Work
|
|
20
|
+
from .custom_tables import CustomTables
|
|
21
|
+
from .payroll_history import History
|
|
22
|
+
|
|
23
|
+
class Bob(BrynQ):
|
|
24
|
+
def __init__(self, system_type: Optional[Literal['source', 'target']] = None, test_environment: bool = True, debug: bool = False, target_system: str = None):
|
|
25
|
+
super().__init__()
|
|
26
|
+
self.timeout = 3600
|
|
27
|
+
self.headers = self._get_request_headers(system_type)
|
|
28
|
+
if test_environment:
|
|
29
|
+
self.base_url = "https://api.sandbox.hibob.com/v1/"
|
|
30
|
+
else:
|
|
31
|
+
self.base_url = "https://api.hibob.com/v1/"
|
|
32
|
+
self.session = requests.Session()
|
|
33
|
+
self.session.headers.update(self.headers)
|
|
34
|
+
self.people = People(self)
|
|
35
|
+
self.salaries = Salaries(self)
|
|
36
|
+
self.work = Work(self)
|
|
37
|
+
self.bank = Bank(self)
|
|
38
|
+
self.employment = Employment(self)
|
|
39
|
+
self.payments = Payments(self)
|
|
40
|
+
self.time_off = TimeOff(self)
|
|
41
|
+
self.documents = CustomDocuments(self)
|
|
42
|
+
self.companies = Company(self)
|
|
43
|
+
self.named_lists = NamedLists(self)
|
|
44
|
+
self.custom_tables = CustomTables(self)
|
|
45
|
+
self.payroll_history = History(self)
|
|
46
|
+
self.data_interface_id = os.getenv("DATA_INTERFACE_ID")
|
|
47
|
+
self.debug = debug
|
|
48
|
+
self.bob_dir = "bob_data" # Directory to save Bob data files
|
|
49
|
+
self.setup_schema_endpoint_mapping()
|
|
50
|
+
|
|
51
|
+
def _get_request_headers(self, system_type):
|
|
52
|
+
credentials = self.interfaces.credentials.get(system='bob', system_type=system_type)
|
|
53
|
+
auth_token = base64.b64encode(f"{credentials.get('data').get('User ID')}:{credentials.get('data').get('API Token')}".encode()).decode('utf-8')
|
|
54
|
+
headers = {
|
|
55
|
+
"accept": "application/json",
|
|
56
|
+
"Authorization": f"Basic {auth_token}",
|
|
57
|
+
"Partner-Token": "001Vg00000A6FY6IAN"
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
return headers
|
|
61
|
+
|
|
62
|
+
def get_paginated_result(self, request: requests.Request) -> List:
|
|
63
|
+
has_next_page = True
|
|
64
|
+
result_data = []
|
|
65
|
+
while has_next_page:
|
|
66
|
+
prepped = request.prepare()
|
|
67
|
+
prepped.headers.update(self.session.headers)
|
|
68
|
+
resp = self.session.send(prepped, timeout=self.timeout)
|
|
69
|
+
resp.raise_for_status()
|
|
70
|
+
response_data = resp.json()
|
|
71
|
+
result_data += response_data['results']
|
|
72
|
+
next_cursor = response_data.get('response_metadata').get('next_cursor')
|
|
73
|
+
# If there is no next page, set has_next_page to False, we could use the falsy value of None but this is more readable
|
|
74
|
+
has_next_page = next_cursor is not None
|
|
75
|
+
if has_next_page:
|
|
76
|
+
request.params.update({"cursor": next_cursor})
|
|
77
|
+
|
|
78
|
+
return result_data
|
|
79
|
+
|
|
80
|
+
#methods to be used in conjunction with teh scenario sdk. scenario sdks collects all schemas and correpodnign fields and passes it to the get_data_per_schema method, which needs this method to map the schema name to the corresponding endpoint.
|
|
81
|
+
def setup_schema_endpoint_mapping(self):
|
|
82
|
+
self.schema_endpoint_map = {
|
|
83
|
+
"PeopleSchema": self.people,
|
|
84
|
+
"SalarySchema": self.salaries,
|
|
85
|
+
"WorkSchema": self.work,
|
|
86
|
+
"BankSchema": self.bank,
|
|
87
|
+
"EmploymentSchema": self.employment,
|
|
88
|
+
"VariablePaymentSchema": self.payments,
|
|
89
|
+
"ActualPaymentsSchema": self.payments,
|
|
90
|
+
"TimeOffSchema": self.time_off,
|
|
91
|
+
"TimeOffBalanceSchema": self.time_off,
|
|
92
|
+
"PayrollHistorySchema": self.payroll_history,
|
|
93
|
+
"CustomTableSchema": self.custom_tables,
|
|
94
|
+
"CustomTableMetadataSchema": self.custom_tables,
|
|
95
|
+
"NamedListSchema": self.named_lists,
|
|
96
|
+
# Note: DocumentsSchema and CompanySchema don't have corresponding schema classes yet
|
|
97
|
+
# but keeping them for backward compatibility
|
|
98
|
+
"DocumentsSchema": self.documents,
|
|
99
|
+
"CompanySchema": self.companies,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
def get_data_for_schemas(self, schemas: dict[str, set], save_dir = None) -> dict:
|
|
103
|
+
"""
|
|
104
|
+
Get data for each schema using the schema-to-fields mapping from the scenario SDK.
|
|
105
|
+
|
|
106
|
+
This method integrates with the BrynQ scenario SDK to retrieve data based on schema
|
|
107
|
+
definitions. It automatically maps schema names to the appropriate Bob API endpoints
|
|
108
|
+
and retrieves only the fields specified in the schema-to-fields mapping.
|
|
109
|
+
|
|
110
|
+
NOTE:
|
|
111
|
+
"endpoint_obj" is just a variable that represents the specific Bob API client (or "endpoint") for a given type of data.
|
|
112
|
+
For example, if you want to get people data, endpoint_obj would be self.people.
|
|
113
|
+
If you want salary data, endpoint_obj would be self.salaries, and so on.
|
|
114
|
+
Each of these endpoint objects knows how to fetch data for its specific schema/table from Bob.
|
|
115
|
+
So, "endpoint_obj" is basically a shortcut to the right part of the Bob SDK that knows how to get the data you want.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
schemas: Dictionary mapping schema names to sets of fields
|
|
119
|
+
Example: {'PeopleSchema': {'firstName', 'lastName', 'email'},
|
|
120
|
+
'WorkSchema': {'title', 'department', 'site'}}
|
|
121
|
+
save_dir: Optional directory path to save parquet files. Can be a string or path object
|
|
122
|
+
(e.g., os.path.join(self.basedir, "data", "bob_to_zenegy")). If None, files are not saved to disk.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Dictionary with results for each schema containing:
|
|
126
|
+
- 'dataframe': The retrieved data as pandas DataFrame
|
|
127
|
+
- 'filepath': Path where the data was saved as parquet file (None if save_dir is None)
|
|
128
|
+
- 'fields': List of fields that were requested
|
|
129
|
+
- 'status_message': Status message about field retrieval
|
|
130
|
+
- 'status_level': Status level (INFO/WARNING/ERROR)
|
|
131
|
+
|
|
132
|
+
Integration with Scenario SDK:
|
|
133
|
+
This method is designed to work seamlessly with the BrynQ scenario SDK:
|
|
134
|
+
1. Use scenarios.get_schema_field_mapping() to get schema-to-fields mapping
|
|
135
|
+
2. Pass the mapping to this method to retrieve data
|
|
136
|
+
3. The method automatically handles endpoint mapping and field selection
|
|
137
|
+
4. Field tracking shows exactly which requested fields were returned vs missing
|
|
138
|
+
|
|
139
|
+
Example usage:
|
|
140
|
+
# Initialize Bob SDK
|
|
141
|
+
bob = Bob(system_type='source')
|
|
142
|
+
|
|
143
|
+
# Get schema-to-fields mapping from scenarios
|
|
144
|
+
schema_fields = bob.interfaces.scenarios.get_schema_field_mapping()
|
|
145
|
+
|
|
146
|
+
# Get data for specific schemas
|
|
147
|
+
results = bob.get_data_for_schemas({
|
|
148
|
+
'PeopleSchema': schema_fields['PeopleSchema'],
|
|
149
|
+
'WorkSchema': schema_fields['WorkSchema']
|
|
150
|
+
}, save_dir=os.path.join('data', 'bob_to_zenegy'))
|
|
151
|
+
|
|
152
|
+
# Access results and status messages
|
|
153
|
+
for schema_name, result in results.items():
|
|
154
|
+
print(f"Schema: {schema_name}")
|
|
155
|
+
print(f"Status: {result['status_message']}")
|
|
156
|
+
print(f"Level: {result['status_level']}")
|
|
157
|
+
print(f"Data shape: {result['dataframe'].shape}")
|
|
158
|
+
print(f"Saved to: {result['filepath']}")
|
|
159
|
+
|
|
160
|
+
# Process the data
|
|
161
|
+
people_data = results['PeopleSchema']['dataframe']
|
|
162
|
+
work_data = results['WorkSchema']['dataframe']
|
|
163
|
+
|
|
164
|
+
# Example with path object
|
|
165
|
+
custom_path = os.path.join('data', 'bob_to_zenegy')
|
|
166
|
+
results_with_path = bob.get_data_for_schemas({
|
|
167
|
+
'PeopleSchema': schema_fields['PeopleSchema']
|
|
168
|
+
}, save_dir=custom_path)
|
|
169
|
+
"""
|
|
170
|
+
results = {}
|
|
171
|
+
|
|
172
|
+
# Validate input
|
|
173
|
+
if not schemas:
|
|
174
|
+
print("Warning: No schemas provided")
|
|
175
|
+
return results
|
|
176
|
+
|
|
177
|
+
# Process each schema
|
|
178
|
+
for schema_name, fields in schemas.items():
|
|
179
|
+
# Validate schema name and fields
|
|
180
|
+
if not schema_name:
|
|
181
|
+
print("Warning: Empty schema name provided, skipping")
|
|
182
|
+
continue
|
|
183
|
+
|
|
184
|
+
if not fields:
|
|
185
|
+
print(f"Warning: No fields provided for schema '{schema_name}', skipping")
|
|
186
|
+
continue
|
|
187
|
+
|
|
188
|
+
# Get the endpoint/service for this schema
|
|
189
|
+
endpoint_obj = self.schema_endpoint_map.get(schema_name)
|
|
190
|
+
|
|
191
|
+
if endpoint_obj is None:
|
|
192
|
+
print(f"Warning: No endpoint found for schema '{schema_name}'. Available schemas: {list(self.schema_endpoint_map.keys())}")
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
try:
|
|
196
|
+
# Get data using the service endpoint
|
|
197
|
+
df_bob, status_message, status_level = self._handle_endpoint(endpoint_obj, list(fields), schema_name)
|
|
198
|
+
except Exception as e:
|
|
199
|
+
print(f"Error processing schema '{schema_name}': {str(e)}")
|
|
200
|
+
results[schema_name] = {
|
|
201
|
+
'dataframe': pd.DataFrame(),
|
|
202
|
+
'filepath': None,
|
|
203
|
+
'fields': list(fields),
|
|
204
|
+
'status_message': f"Error processing schema '{schema_name}': {str(e)}",
|
|
205
|
+
'status_level': 'ERROR'
|
|
206
|
+
}
|
|
207
|
+
continue
|
|
208
|
+
|
|
209
|
+
# Save the result
|
|
210
|
+
if save_dir:
|
|
211
|
+
filename = f"bob_{schema_name.replace(' ', '_')}.parquet"
|
|
212
|
+
output_dir = save_dir if save_dir is not None else self.bob_dir
|
|
213
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
214
|
+
filepath = os.path.join(output_dir, filename)
|
|
215
|
+
df_bob.to_parquet(filepath)
|
|
216
|
+
else:
|
|
217
|
+
filepath = None
|
|
218
|
+
|
|
219
|
+
results[schema_name] = {
|
|
220
|
+
'dataframe': df_bob,
|
|
221
|
+
'filepath': filepath,
|
|
222
|
+
'fields': list(fields),
|
|
223
|
+
'status_message': status_message,
|
|
224
|
+
'status_level': status_level
|
|
225
|
+
}
|
|
226
|
+
return results
|
|
227
|
+
|
|
228
|
+
def _handle_endpoint(self, endpoint_obj, body_fields: List[str], schema_name: str) -> tuple[pd.DataFrame, str, str]:
|
|
229
|
+
"""
|
|
230
|
+
Handle data retrieval for a given endpoint object (e.g., self.people, self.work, etc.).
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
endpoint_obj: The endpoint object responsible for fetching data for a specific schema.
|
|
234
|
+
For example, this could be self.people, self.work, self.salaries, etc.
|
|
235
|
+
(Think of these as "API clients" or "data access classes" for each schema/table.)
|
|
236
|
+
body_fields: List of fields to retrieve
|
|
237
|
+
schema_name: Name of the schema being processed
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
tuple[pd.DataFrame, str, str]: Dataframe, status message, and status level
|
|
241
|
+
"""
|
|
242
|
+
get_method = endpoint_obj.get
|
|
243
|
+
|
|
244
|
+
# Check if the method accepts field_selection parameter
|
|
245
|
+
sig = inspect.signature(get_method)
|
|
246
|
+
if 'field_selection' in sig.parameters and 'person_ids' not in sig.parameters:
|
|
247
|
+
bob_data_valid, _ = get_method(field_selection=body_fields)
|
|
248
|
+
# elif 'person_id' in sig.parameters:
|
|
249
|
+
# bob_data_valid, _ = self._fetch_data_with_person_id(get_method)
|
|
250
|
+
# elif 'person_ids' in sig.parameters and 'field_selection' in sig.parameters:
|
|
251
|
+
# bob_data_valid, _ = self._fetch_data_with_person_ids(get_method, body_fields)
|
|
252
|
+
else:
|
|
253
|
+
bob_data_valid, _ = get_method()
|
|
254
|
+
df_bob = pd.DataFrame(bob_data_valid)
|
|
255
|
+
|
|
256
|
+
# Track field retrieval success/failure and handle missing fields
|
|
257
|
+
status_message, status_level = self._log_field_retrieval_status(df_bob, body_fields, schema_name)
|
|
258
|
+
|
|
259
|
+
return df_bob, status_message, status_level
|
|
260
|
+
|
|
261
|
+
def _log_field_retrieval_status(self, df_bob: pd.DataFrame, body_fields: List[str], schema_name: str) -> tuple[str, str]:
|
|
262
|
+
"""
|
|
263
|
+
Checks if the data returned from the Bob API actually contains all the fields you asked for.
|
|
264
|
+
|
|
265
|
+
This function counts how many fields you requested (body_fields)
|
|
266
|
+
and how many columns you actually got back in the DataFrame (df_bob).
|
|
267
|
+
|
|
268
|
+
- If the numbers are different, it means some fields you wanted are missing from the result.
|
|
269
|
+
- If the numbers match, you got everything you asked for.
|
|
270
|
+
- If the DataFrame is empty, then Bob API returned no data at all.
|
|
271
|
+
|
|
272
|
+
Args:
|
|
273
|
+
df_bob: The DataFrame you got back from the Bob API (could be empty or missing columns).
|
|
274
|
+
body_fields: The list of field names you told the API you wanted.
|
|
275
|
+
schema_name: The name of the schema/table you were trying to get.
|
|
276
|
+
|
|
277
|
+
Returns:
|
|
278
|
+
tuple[str, str]:
|
|
279
|
+
- A human-readable status message (for logs or debugging).
|
|
280
|
+
- A status level string: "DEBUG" (all good or minor mismatch), or "ERROR" (no data at all).
|
|
281
|
+
"""
|
|
282
|
+
if not df_bob.empty:
|
|
283
|
+
requested_count = len(body_fields)
|
|
284
|
+
returned_count = len(df_bob.columns)
|
|
285
|
+
|
|
286
|
+
if requested_count != returned_count:
|
|
287
|
+
status_message = (f"Schema '{schema_name}' [INFO]:\n"
|
|
288
|
+
f"Requested {requested_count} fields, got {returned_count} fields\n"
|
|
289
|
+
f"Total records: {len(df_bob)}")
|
|
290
|
+
return status_message, "DEBUG"
|
|
291
|
+
else:
|
|
292
|
+
status_message = (f"Schema '{schema_name}': All {requested_count} requested fields "
|
|
293
|
+
f"successfully retrieved from Bob API ({len(df_bob)} records)")
|
|
294
|
+
return status_message, "DEBUG"
|
|
295
|
+
else:
|
|
296
|
+
return f"Schema '{schema_name}' [ERROR]: No data returned from Bob API", "ERROR"
|
|
297
|
+
|
|
298
|
+
def initialize_person_id_mapping(self) -> pd.DataFrame:
|
|
299
|
+
"""
|
|
300
|
+
Creates a mapping DataFrame between Bob's internal person ID (`root.id`) and the employee ID in the company
|
|
301
|
+
(`work.employeeIdInCompany`).
|
|
302
|
+
|
|
303
|
+
This is a utility function for situations where you need to join or map data between endpoints/scenarios that use different
|
|
304
|
+
identifiers for people. In scenarios maybe root.id is used as primary key, but in Bob, some API endpoints require you to use the employee ID.
|
|
305
|
+
This function helps you convert between them.
|
|
306
|
+
|
|
307
|
+
Note:
|
|
308
|
+
- This is NOT required for the Bob SDK to function, but is a convenience tool you can call from the interface
|
|
309
|
+
whenever you need to perform such a mapping.
|
|
310
|
+
- The mapping is especially useful when you have data from other sources (e.g., payroll, HRIS exports) that use
|
|
311
|
+
employee IDs, and you want to join or compare them with data from Bob, which often uses person IDs.
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
pd.DataFrame: A DataFrame with two columns:
|
|
315
|
+
- 'person_id': The unique person identifier in Bob (formerly `root.id`)
|
|
316
|
+
- 'employee_id_in_company': The employee ID as used in your company (formerly `work.employeeIdInCompany`)
|
|
317
|
+
|
|
318
|
+
If no people are found, returns an empty DataFrame with these columns.
|
|
319
|
+
|
|
320
|
+
Example:
|
|
321
|
+
>>> df = sdk.initialize_person_id_mapping()
|
|
322
|
+
>>> # Now you can merge/join on 'person_id' or 'employee_id_in_company' as needed
|
|
323
|
+
|
|
324
|
+
"""
|
|
325
|
+
# Only fetch the two fields needed for the mapping
|
|
326
|
+
field_selection = ['work.employeeIdInCompany', 'root.id']
|
|
327
|
+
|
|
328
|
+
# Use the Bob SDK to get the people data with just those fields
|
|
329
|
+
valid_people, _ = self.people.get(field_selection=field_selection)
|
|
330
|
+
|
|
331
|
+
# The SDK renames:
|
|
332
|
+
# root.id -> id
|
|
333
|
+
# work.employeeIdInCompany -> work_employee_id_in_company
|
|
334
|
+
|
|
335
|
+
if not valid_people.empty:
|
|
336
|
+
# Rename columns to standard names for mapping
|
|
337
|
+
valid_people = valid_people.rename(
|
|
338
|
+
columns={
|
|
339
|
+
'id': 'person_id',
|
|
340
|
+
'work_employee_id_in_company': 'employee_id_in_company'
|
|
341
|
+
}
|
|
342
|
+
)
|
|
343
|
+
self.person_id_to_employee_id_in_company = valid_people[['person_id', 'employee_id_in_company']].copy()
|
|
344
|
+
else:
|
|
345
|
+
# Return empty DataFrame with expected columns if no data
|
|
346
|
+
self.person_id_to_employee_id_in_company = pd.DataFrame(
|
|
347
|
+
columns=['person_id', 'employee_id_in_company']
|
|
348
|
+
)
|
|
349
|
+
return self.person_id_to_employee_id_in_company
|
|
@@ -9,17 +9,19 @@ class Payments:
|
|
|
9
9
|
self.bob = bob
|
|
10
10
|
self.schema = VariablePaymentSchema
|
|
11
11
|
|
|
12
|
-
def get(self,
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
data
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
12
|
+
def get(self, person_ids: List[str]) -> (pd.DataFrame, pd.DataFrame):
|
|
13
|
+
df = pd.DataFrame()
|
|
14
|
+
for person_id in person_ids:
|
|
15
|
+
resp = self.bob.session.get(url=f"{self.bob.base_url}people/{person_id}/variable", timeout=self.bob.timeout)
|
|
16
|
+
resp.raise_for_status()
|
|
17
|
+
data = resp.json()
|
|
18
|
+
df = pd.concat([df, pd.json_normalize(
|
|
19
|
+
data,
|
|
20
|
+
record_path='values'
|
|
21
|
+
)])
|
|
22
|
+
df['employee_id'] = person_id
|
|
23
|
+
df = df.reset_index(drop=True)
|
|
21
24
|
valid_payments, invalid_payments = Functions.validate_data(df=df, schema=self.schema, debug=True)
|
|
22
|
-
|
|
23
25
|
return valid_payments, invalid_payments
|
|
24
26
|
|
|
25
27
|
def get_actual_payments(
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
|
-
import
|
|
2
|
+
from typing import Optional
|
|
3
3
|
from brynq_sdk_functions import Functions
|
|
4
|
+
from brynq_sdk_functions import BrynQPanderaDataFrameModel
|
|
4
5
|
from .bank import Bank
|
|
5
6
|
from .employment import Employment
|
|
6
7
|
from .salaries import Salaries
|
|
@@ -127,7 +128,7 @@ class People:
|
|
|
127
128
|
|
|
128
129
|
if isinstance(sample_value, dict):
|
|
129
130
|
# Flatten nested structure
|
|
130
|
-
nested_df = pd.json_normalize(df_result[col].tolist())
|
|
131
|
+
nested_df = pd.json_normalize(df_result[col].tolist(), max_level=10)
|
|
131
132
|
|
|
132
133
|
# Rename columns to include the original column name as prefix
|
|
133
134
|
nested_df.columns = [f"{col}.{subcol}" for subcol in nested_df.columns]
|
|
@@ -228,7 +229,7 @@ class People:
|
|
|
228
229
|
"filters": []
|
|
229
230
|
},
|
|
230
231
|
timeout=self.bob.timeout)
|
|
231
|
-
df = pd.json_normalize(resp_additional_fields.json()['employees'])
|
|
232
|
+
df = pd.json_normalize(resp_additional_fields.json()['employees'], max_level=10)
|
|
232
233
|
|
|
233
234
|
# Validate payroll types if requested
|
|
234
235
|
valid_payroll_types = []
|
|
@@ -246,6 +247,9 @@ class People:
|
|
|
246
247
|
# Keep if it's in response_fields
|
|
247
248
|
if col in response_fields:
|
|
248
249
|
columns_to_keep.append(col)
|
|
250
|
+
# Or if it starts with any response_field followed by a dot (for nested fields)
|
|
251
|
+
elif any(col.startswith(field + '.') for field in response_fields):
|
|
252
|
+
columns_to_keep.append(col)
|
|
249
253
|
# Or if it's a payroll column (original or flattened)
|
|
250
254
|
elif valid_payroll_types:
|
|
251
255
|
for payroll_type in valid_payroll_types:
|
|
@@ -256,36 +260,8 @@ class People:
|
|
|
256
260
|
|
|
257
261
|
df = df[columns_to_keep]
|
|
258
262
|
|
|
259
|
-
#
|
|
260
|
-
|
|
261
|
-
df = df.rename(columns={col: response_to_endpoint[col] for col in df.columns if col in response_to_endpoint})
|
|
262
|
-
|
|
263
|
-
# Extract payroll information if requested
|
|
264
|
-
payroll_dataframes = {}
|
|
265
|
-
if valid_payroll_types:
|
|
266
|
-
for payroll_type in valid_payroll_types:
|
|
267
|
-
# Extract payroll columns into separate DataFrame
|
|
268
|
-
df_payroll = self._extract_payroll_columns(df, payroll_type)
|
|
269
|
-
if not df_payroll.empty:
|
|
270
|
-
payroll_dataframes[payroll_type] = df_payroll
|
|
271
|
-
|
|
272
|
-
# Remove payroll columns from main DataFrame for validation
|
|
273
|
-
pattern = self.payroll_types[payroll_type]['pattern']
|
|
274
|
-
payroll_columns = [col for col in df.columns if pattern in col.lower()]
|
|
275
|
-
if payroll_columns:
|
|
276
|
-
df = df.drop(columns=payroll_columns)
|
|
277
|
-
|
|
278
|
-
# Validate the data (without payroll information)
|
|
279
|
-
valid_people, invalid_people = Functions.validate_data(df=df, schema=PeopleSchema, debug=True)
|
|
280
|
-
|
|
281
|
-
# Append all payroll information to valid_people if they exist
|
|
282
|
-
if payroll_dataframes and not valid_people.empty:
|
|
283
|
-
for payroll_type, df_payroll in payroll_dataframes.items():
|
|
284
|
-
# Only include payroll data for valid rows
|
|
285
|
-
df_payroll_valid = df_payroll.loc[valid_people.index]
|
|
286
|
-
# Append payroll columns to valid_people
|
|
287
|
-
valid_people = pd.concat([valid_people, df_payroll_valid], axis=1)
|
|
288
|
-
|
|
263
|
+
# Normalize separators in incoming data: convert '/' to '.' to match schema aliases
|
|
264
|
+
df.columns = df.columns.str.replace('/', '.', regex=False)
|
|
289
265
|
|
|
290
266
|
# A lot of fields from Bob are returned with only ID's. Those fields should be mapped to names. Therefore, we need to get the mapping from the named-lists endpoint.
|
|
291
267
|
resp_named_lists = self.bob.session.get(url=f"{self.bob.base_url}company/named-lists", timeout=self.bob.timeout, headers=self.bob.headers)
|
|
@@ -294,7 +270,7 @@ class People:
|
|
|
294
270
|
# Transform named_lists to create id-to-value mappings for each field
|
|
295
271
|
named_lists = {key.split('.')[-1]: {item['id']: item['value'] for item in value['values']} for key, value in named_lists.items()}
|
|
296
272
|
|
|
297
|
-
for field in
|
|
273
|
+
for field in df.columns:
|
|
298
274
|
# Fields in the response and in the named-list does have different building blocks (e.g. people.payroll.entitlement. or people.entitlement.). But they both end with the same last block
|
|
299
275
|
field_df = field.split('.')[-1].split('work_')[-1]
|
|
300
276
|
if field_df in named_lists.keys() and field_df not in ['site']:
|
|
@@ -333,7 +309,7 @@ class People:
|
|
|
333
309
|
"filters": []
|
|
334
310
|
},
|
|
335
311
|
timeout=self.bob.timeout)
|
|
336
|
-
df = pd.json_normalize(resp_additional_fields.json()['employees'])
|
|
312
|
+
df = pd.json_normalize(resp_additional_fields.json()['employees'], max_level=10)
|
|
337
313
|
df = df[[col for col in response_fields if col in df.columns]]
|
|
338
314
|
# Get the valid column names from PeopleSchema
|
|
339
315
|
valid_people, invalid_people = Functions.validate_data(df=df, schema=PeopleSchema, debug=True)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import requests
|
|
3
|
+
from brynq_sdk_functions import Functions
|
|
4
|
+
from .schemas.salary import SalarySchema, SalaryCreateSchema
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Salaries:
|
|
8
|
+
def __init__(self, bob):
|
|
9
|
+
self.bob = bob
|
|
10
|
+
self.schema = SalarySchema
|
|
11
|
+
|
|
12
|
+
def get(self) -> tuple[pd.DataFrame, pd.DataFrame]:
|
|
13
|
+
request = requests.Request(method='GET',
|
|
14
|
+
url=f"{self.bob.base_url}bulk/people/salaries",
|
|
15
|
+
params={"limit": 100})
|
|
16
|
+
data = self.bob.get_paginated_result(request)
|
|
17
|
+
df = pd.json_normalize(
|
|
18
|
+
data,
|
|
19
|
+
record_path='values',
|
|
20
|
+
meta=['employeeId']
|
|
21
|
+
)
|
|
22
|
+
valid_salaries, invalid_salaries = Functions.validate_data(df=df, schema=SalarySchema, debug=True)
|
|
23
|
+
|
|
24
|
+
return valid_salaries, invalid_salaries
|
|
25
|
+
|
|
26
|
+
def create(self, salary_data: dict) -> requests.Response:
|
|
27
|
+
nested_data = self.nmbrs.flat_dict_to_nested_dict(salary_data, SalaryCreateSchema)
|
|
28
|
+
salary_data = SalaryCreateSchema(**nested_data)
|
|
29
|
+
payload = salary_data.model_dump(exclude_none=True, by_alias=True)
|
|
30
|
+
|
|
31
|
+
resp = self.bob.session.post(url=f"{self.bob.base_url}people/{salary_data.employee_id}/salaries", json=payload)
|
|
32
|
+
resp.raise_for_status()
|
|
33
|
+
return resp
|
|
34
|
+
|
|
35
|
+
def delete(self, employee_id: str, salary_id: str) -> requests.Response:
|
|
36
|
+
resp = self.bob.session.delete(url=f"{self.bob.base_url}people/{employee_id}/salaries/{salary_id}")
|
|
37
|
+
resp.raise_for_status()
|
|
38
|
+
return resp
|
|
@@ -6,7 +6,7 @@ from brynq_sdk_functions import BrynQPanderaDataFrameModel
|
|
|
6
6
|
|
|
7
7
|
class EmploymentSchema(BrynQPanderaDataFrameModel):
|
|
8
8
|
id: Series[pd.Int64Dtype] = pa.Field(coerce=True, description="Employment ID", alias="id")
|
|
9
|
-
employee_id: Series[
|
|
9
|
+
employee_id: Series[pd.Int64Dtype] = pa.Field(coerce=True, description="Employee ID", alias="employeeId")
|
|
10
10
|
active_effective_date: Series[DateTime] = pa.Field(coerce=True, description="Active Effective Date", alias="activeEffectiveDate")
|
|
11
11
|
contract: Series[String] = pa.Field(coerce=True, nullable=True, description="Contract", alias="contract") # has a list of possible values
|
|
12
12
|
creation_date: Series[DateTime] = pa.Field(coerce=True, nullable=True, description="Creation Date", alias="creationDate")
|
|
@@ -21,8 +21,9 @@ class VariablePaymentSchema(BrynQPanderaDataFrameModel):
|
|
|
21
21
|
end_effective_date: Series[DateTime] = pa.Field(nullable=True, coerce=True, description="End Effective Date", alias="endEffectiveDate")
|
|
22
22
|
payment_period: Series[String] = pa.Field(coerce=True, description="Payment Period", alias="paymentPeriod")
|
|
23
23
|
effective_date: Series[DateTime] = pa.Field(coerce=True, description="Effective Date", alias="effectiveDate")
|
|
24
|
-
amount_value: Series[Float] = pa.Field(coerce=True, description="Amount Value", alias="amount.value")
|
|
25
|
-
|
|
24
|
+
amount_value: Optional[Series[Float]] = pa.Field(coerce=True, description="Amount Value", alias="amount.value")
|
|
25
|
+
amount_alternative_value: Optional[Series[Float]] = pa.Field(coerce=True, description="Amount Value", alias="amount")
|
|
26
|
+
amount_currency: Optional[Series[String]] = pa.Field(coerce=True, description="Amount Currency", alias="amount.currency")
|
|
26
27
|
change_reason: Series[String] = pa.Field(nullable=True, coerce=True, description="Change Reason", alias="change.reason")
|
|
27
28
|
change_changed_by: Series[String] = pa.Field(nullable=True, coerce=True, description="Change Changed By", alias="change.changedBy")
|
|
28
29
|
change_changed_by_id: Series[pd.Int64Dtype] = pa.Field(nullable=True, coerce=True, description="Change Changed By ID", alias="change.changedById")
|