quickbase-extract 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quickbase_extract/__init__.py +98 -0
- quickbase_extract/api_handlers.py +210 -0
- quickbase_extract/cache_freshness.py +199 -0
- quickbase_extract/cache_manager.py +234 -0
- quickbase_extract/cache_sync.py +74 -0
- quickbase_extract/client.py +61 -0
- quickbase_extract/py.typed +0 -0
- quickbase_extract/report_data.py +253 -0
- quickbase_extract/report_metadata.py +316 -0
- quickbase_extract/utils.py +42 -0
- quickbase_extract-0.1.0.dist-info/METADATA +1735 -0
- quickbase_extract-0.1.0.dist-info/RECORD +14 -0
- quickbase_extract-0.1.0.dist-info/WHEEL +4 -0
- quickbase_extract-0.1.0.dist-info/licenses/LICENSE.txt +21 -0
|
@@ -0,0 +1,1735 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: quickbase-extract
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Extract and cache Quickbase report data with built-in error handling and S3 support
|
|
5
|
+
Project-URL: Homepage, https://github.com/tbrezler/quickbase-extract
|
|
6
|
+
Project-URL: Repository, https://github.com/tbrezler/quickbase-extract.git
|
|
7
|
+
Project-URL: Documentation, https://github.com/tbrezler/quickbase-extract#readme
|
|
8
|
+
Author-email: Tyler Brezler <tbrezler@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE.txt
|
|
11
|
+
Keywords: caching,data-extraction,etl,quickbase
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Requires-Python: >=3.12
|
|
19
|
+
Requires-Dist: boto3>=1.26.0
|
|
20
|
+
Requires-Dist: quickbase-api>=0.3.1
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: black>=23.0; extra == 'dev'
|
|
23
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: ruff>=0.1.0; extra == 'dev'
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# Quickbase Extract
|
|
29
|
+
|
|
30
|
+
A Python package for efficiently extracting, transforming, and caching data from Quickbase reports with built-in error handling, retry logic, and S3 support for AWS Lambda environments.
|
|
31
|
+
|
|
32
|
+
## Features
|
|
33
|
+
|
|
34
|
+
- **🚀 Parallel Processing** - Fetch multiple reports concurrently for improved performance
|
|
35
|
+
- **💾 Smart Caching** - Local and S3-backed caching to minimize API calls
|
|
36
|
+
- **🔄 Automatic Retries** - Built-in retry logic with exponential backoff for rate limits
|
|
37
|
+
- **☁️ Lambda Ready** - First-class support for AWS Lambda with S3 cache sync
|
|
38
|
+
- **🎯 Type Safe** - Full type hints with TypedDict for better IDE support
|
|
39
|
+
- **📊 Cache Monitoring** - Tools to check cache freshness and manage stale data
|
|
40
|
+
- **🛡️ Robust Error Handling** - Comprehensive error handling with detailed logging
|
|
41
|
+
- **🔍 Data Transformation** - Automatically converts field IDs to human-readable labels
|
|
42
|
+
|
|
43
|
+
## Installation
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install quickbase-extract
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Requirements
|
|
50
|
+
|
|
51
|
+
- Python 3.9+
|
|
52
|
+
- `quickbase-api` - Quickbase API client
|
|
53
|
+
- `boto3` - AWS SDK (for Lambda/S3 support)
|
|
54
|
+
|
|
55
|
+
## Quick Start
|
|
56
|
+
|
|
57
|
+
### Basic Usage
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
from quickbase_extract import (
|
|
61
|
+
get_qb_client,
|
|
62
|
+
refresh_all,
|
|
63
|
+
load_report_metadata_batch,
|
|
64
|
+
get_data_parallel
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Initialize Quickbase client
|
|
68
|
+
client = get_qb_client(
|
|
69
|
+
realm="your-realm.quickbase.com",
|
|
70
|
+
user_token="YOUR_USER_TOKEN"
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# Define report configurations
|
|
74
|
+
report_configs = [
|
|
75
|
+
{
|
|
76
|
+
"Description": "active_customers",
|
|
77
|
+
"App": "Sales Tracker",
|
|
78
|
+
"App ID": "bq8xyx9z",
|
|
79
|
+
"Table": "Customers",
|
|
80
|
+
"Report": "Active Customers"
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
"Description": "open_deals",
|
|
84
|
+
"App": "Sales Tracker",
|
|
85
|
+
"App ID": "bq8xyx9z",
|
|
86
|
+
"Table": "Opportunities",
|
|
87
|
+
"Report": "Open Deals"
|
|
88
|
+
}
|
|
89
|
+
]
|
|
90
|
+
|
|
91
|
+
# Step 1: Refresh metadata cache (do this once or when reports change)
|
|
92
|
+
refresh_all(client, report_configs)
|
|
93
|
+
|
|
94
|
+
# Step 2: Load metadata from cache
|
|
95
|
+
metadata = load_report_metadata_batch(report_configs)
|
|
96
|
+
|
|
97
|
+
# Step 3: Fetch data for multiple reports in parallel
|
|
98
|
+
descriptions = ["active_customers", "open_deals"]
|
|
99
|
+
data = get_data_parallel(
|
|
100
|
+
client,
|
|
101
|
+
metadata,
|
|
102
|
+
descriptions,
|
|
103
|
+
cache=True # Cache the data for later use
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
# Access the data
|
|
107
|
+
customers = data["active_customers"]
|
|
108
|
+
deals = data["open_deals"]
|
|
109
|
+
|
|
110
|
+
print(f"Found {len(customers)} active customers")
|
|
111
|
+
print(f"Found {len(deals)} open deals")
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Single Report Fetch
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
from quickbase_extract import get_data, load_data
|
|
118
|
+
|
|
119
|
+
# Fetch a single report
|
|
120
|
+
customer_data = get_data(
|
|
121
|
+
client,
|
|
122
|
+
metadata,
|
|
123
|
+
"active_customers",
|
|
124
|
+
cache=True
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Later, load from cache without API call
|
|
128
|
+
cached_data = load_data(
|
|
129
|
+
metadata,
|
|
130
|
+
"active_customers"
|
|
131
|
+
)
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Report Configuration
|
|
135
|
+
|
|
136
|
+
### Configuration Structure
|
|
137
|
+
|
|
138
|
+
Each report configuration is a dictionary with the following keys:
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
{
|
|
142
|
+
"Description": "unique_identifier", # Unique key to reference this report
|
|
143
|
+
"App": "App Display Name", # Quickbase app name (for organization)
|
|
144
|
+
"App ID": "bq8xyx9z", # Quickbase app ID (required for API calls)
|
|
145
|
+
"Table": "Table Name", # Table name in Quickbase
|
|
146
|
+
"Report": "Report Name" # Report name within the table
|
|
147
|
+
}
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### Basic Example
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
# config/reports.py
|
|
154
|
+
"""Quickbase report configurations."""
|
|
155
|
+
|
|
156
|
+
import os
|
|
157
|
+
|
|
158
|
+
# Load app IDs from environment variables
|
|
159
|
+
SALES_APP_ID = os.environ.get("QB_SALES_APP_ID", "bq8xyx9z")
|
|
160
|
+
|
|
161
|
+
REPORTS = [
|
|
162
|
+
{
|
|
163
|
+
"Description": "active_customers",
|
|
164
|
+
"App": "Sales Tracker",
|
|
165
|
+
"App ID": SALES_APP_ID,
|
|
166
|
+
"Table": "Customers",
|
|
167
|
+
"Report": "Active Customers"
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
"Description": "open_deals",
|
|
171
|
+
"App": "Sales Tracker",
|
|
172
|
+
"App ID": SALES_APP_ID,
|
|
173
|
+
"Table": "Opportunities",
|
|
174
|
+
"Report": "Open Deals"
|
|
175
|
+
},
|
|
176
|
+
{
|
|
177
|
+
"Description": "recent_orders",
|
|
178
|
+
"App": "Sales Tracker",
|
|
179
|
+
"App ID": SALES_APP_ID,
|
|
180
|
+
"Table": "Orders",
|
|
181
|
+
"Report": "Last 30 Days"
|
|
182
|
+
}
|
|
183
|
+
]
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Pattern 1: Single App, Multiple Tables (DRY Approach)
|
|
187
|
+
|
|
188
|
+
When all reports come from the same app and use the same report name:
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
# report-config.py
|
|
192
|
+
"""Customer Portal Quickbase report configurations."""
|
|
193
|
+
|
|
194
|
+
import os
|
|
195
|
+
|
|
196
|
+
# Load app ID from environment
|
|
197
|
+
CUSTOMER_PORTAL_APP_ID = os.environ.get("QB_CUSTOMER_PORTAL_APP_ID", "bq8xyz123")
|
|
198
|
+
|
|
199
|
+
# Default values for all reports
|
|
200
|
+
_DEFAULTS = {
|
|
201
|
+
"App": "Customer Portal",
|
|
202
|
+
"App ID": CUSTOMER_PORTAL_APP_ID,
|
|
203
|
+
"Report": "Python" # All reports use "Python" report
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
# List of tables to fetch
|
|
207
|
+
_TABLES = [
|
|
208
|
+
"Customers",
|
|
209
|
+
"Orders",
|
|
210
|
+
"Products",
|
|
211
|
+
"Invoices",
|
|
212
|
+
"Payments",
|
|
213
|
+
"Shipping Addresses",
|
|
214
|
+
"Support Tickets",
|
|
215
|
+
"Reviews",
|
|
216
|
+
"Promotions",
|
|
217
|
+
]
|
|
218
|
+
|
|
219
|
+
# Generate report configs (Description and Table match)
|
|
220
|
+
REPORTS = [
|
|
221
|
+
{"Description": table, "Table": table, **_DEFAULTS}
|
|
222
|
+
for table in _TABLES
|
|
223
|
+
]
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def get_reports():
|
|
227
|
+
"""Return the list of Quickbase report configurations.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
List of dicts containing report description, app, table, and report info.
|
|
231
|
+
|
|
232
|
+
Example:
|
|
233
|
+
>>> reports = get_reports()
|
|
234
|
+
>>> print(reports[0])
|
|
235
|
+
{
|
|
236
|
+
'Description': 'Customers',
|
|
237
|
+
'Table': 'Customers',
|
|
238
|
+
'App': 'Customer Portal',
|
|
239
|
+
'App ID': 'bq8xyz123',
|
|
240
|
+
'Report': 'Python'
|
|
241
|
+
}
|
|
242
|
+
"""
|
|
243
|
+
return REPORTS
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
### Pattern 2: Multiple Apps - Grouped by App
|
|
247
|
+
|
|
248
|
+
For better organization when dealing with many apps:
|
|
249
|
+
|
|
250
|
+
```python
|
|
251
|
+
# report-config.py
|
|
252
|
+
"""Organized report configurations by application."""
|
|
253
|
+
|
|
254
|
+
import os
|
|
255
|
+
|
|
256
|
+
# Load app IDs
|
|
257
|
+
SALES_APP_ID = os.environ.get("QB_SALES_APP_ID", "bq8abc123")
|
|
258
|
+
HR_APP_ID = os.environ.get("QB_HR_APP_ID", "bq9def456")
|
|
259
|
+
INVENTORY_APP_ID = os.environ.get("QB_INVENTORY_APP_ID", "bq7ghi789")
|
|
260
|
+
|
|
261
|
+
def _create_report(description, app, app_id, table, report="Python"):
|
|
262
|
+
"""Helper to create report config dict."""
|
|
263
|
+
return {
|
|
264
|
+
"Description": description,
|
|
265
|
+
"App": app,
|
|
266
|
+
"App ID": app_id,
|
|
267
|
+
"Table": table,
|
|
268
|
+
"Report": report
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
# Sales reports
|
|
272
|
+
SALES_REPORTS = [
|
|
273
|
+
_create_report("customers", "Sales", SALES_APP_ID, "Customers", "Active"),
|
|
274
|
+
_create_report("orders", "Sales", SALES_APP_ID, "Orders", "All Orders"),
|
|
275
|
+
_create_report("invoices", "Sales", SALES_APP_ID, "Invoices", "Unpaid"),
|
|
276
|
+
]
|
|
277
|
+
|
|
278
|
+
# HR reports
|
|
279
|
+
HR_REPORTS = [
|
|
280
|
+
_create_report("employees", "HR", HR_APP_ID, "Employees", "Active"),
|
|
281
|
+
_create_report("timesheets", "HR", HR_APP_ID, "Timesheets", "Current Period"),
|
|
282
|
+
_create_report("benefits", "HR", HR_APP_ID, "Benefits", "All"),
|
|
283
|
+
]
|
|
284
|
+
|
|
285
|
+
# Inventory reports
|
|
286
|
+
INVENTORY_REPORTS = [
|
|
287
|
+
_create_report("products", "Inventory", INVENTORY_APP_ID, "Products"),
|
|
288
|
+
_create_report("warehouses", "Inventory", INVENTORY_APP_ID, "Warehouses"),
|
|
289
|
+
_create_report("stock_levels", "Inventory", INVENTORY_APP_ID, "Stock Levels"),
|
|
290
|
+
]
|
|
291
|
+
|
|
292
|
+
# Combine all reports
|
|
293
|
+
REPORTS = SALES_REPORTS + HR_REPORTS + INVENTORY_REPORTS
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def get_reports(app=None):
|
|
297
|
+
"""Get report configurations, optionally filtered by app.
|
|
298
|
+
|
|
299
|
+
Args:
|
|
300
|
+
app: Optional app name to filter by (e.g., "Sales", "HR")
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
List of report configuration dicts.
|
|
304
|
+
"""
|
|
305
|
+
if app is None:
|
|
306
|
+
return REPORTS
|
|
307
|
+
|
|
308
|
+
return [r for r in REPORTS if r["App"] == app]
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def get_report_by_description(description):
|
|
312
|
+
"""Get a single report config by description.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
description: Report description to find
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
Report config dict or None if not found.
|
|
319
|
+
"""
|
|
320
|
+
return next((r for r in REPORTS if r["Description"] == description), None)
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
### Using Configurations
|
|
324
|
+
|
|
325
|
+
```python
|
|
326
|
+
# main.py
|
|
327
|
+
"""Main application using report configurations."""
|
|
328
|
+
|
|
329
|
+
from quickbase_extract import (
|
|
330
|
+
get_qb_client,
|
|
331
|
+
refresh_all,
|
|
332
|
+
load_report_metadata_batch,
|
|
333
|
+
get_data_parallel
|
|
334
|
+
)
|
|
335
|
+
from config.reports import get_reports
|
|
336
|
+
import os
|
|
337
|
+
|
|
338
|
+
# Initialize client
|
|
339
|
+
client = get_qb_client(
|
|
340
|
+
realm=os.environ["QB_REALM"],
|
|
341
|
+
user_token=os.environ["QB_USER_TOKEN"]
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# Get all report configurations
|
|
345
|
+
report_configs = get_reports()
|
|
346
|
+
|
|
347
|
+
# One-time: Refresh metadata (run when reports change)
|
|
348
|
+
if os.environ.get("REFRESH_METADATA") == "true":
|
|
349
|
+
refresh_all(client, report_configs)
|
|
350
|
+
|
|
351
|
+
# Load metadata
|
|
352
|
+
metadata = load_report_metadata_batch(report_configs)
|
|
353
|
+
|
|
354
|
+
# Fetch data for specific reports
|
|
355
|
+
descriptions = ["customers", "orders", "invoices"]
|
|
356
|
+
data = get_data_parallel(client, metadata, descriptions, cache=True)
|
|
357
|
+
|
|
358
|
+
# Process data
|
|
359
|
+
for desc, records in data.items():
|
|
360
|
+
print(f"{desc}: {len(records)} records")
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
### Environment Configuration
|
|
364
|
+
|
|
365
|
+
Create a `.env` file for local development:
|
|
366
|
+
|
|
367
|
+
```bash
|
|
368
|
+
# .env
|
|
369
|
+
QB_REALM=example.quickbase.com
|
|
370
|
+
QB_USER_TOKEN=b5xy8x_abc123_token_here
|
|
371
|
+
|
|
372
|
+
# App IDs
|
|
373
|
+
QB_SALES_APP_ID=bq8abc123
|
|
374
|
+
QB_HR_APP_ID=bq9def456
|
|
375
|
+
QB_INVENTORY_APP_ID=bq7ghi789
|
|
376
|
+
|
|
377
|
+
# Optional
|
|
378
|
+
REFRESH_METADATA=false
|
|
379
|
+
QUICKBASE_CACHE_ROOT=./.quickbase-cache/dev
|
|
380
|
+
ENV=dev
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
Load with python-dotenv:
|
|
384
|
+
|
|
385
|
+
```python
|
|
386
|
+
# main.py
|
|
387
|
+
from dotenv import load_dotenv
|
|
388
|
+
load_dotenv() # Load .env file
|
|
389
|
+
|
|
390
|
+
import os
|
|
391
|
+
from config.reports import get_reports
|
|
392
|
+
|
|
393
|
+
# Now environment variables are available
|
|
394
|
+
reports = get_reports()
|
|
395
|
+
```
|
|
396
|
+
|
|
397
|
+
### Best Practices
|
|
398
|
+
|
|
399
|
+
1. **Use descriptive Description keys**
|
|
400
|
+
- Use lowercase with underscores: `"active_customers"` not `"Active Customers"`
|
|
401
|
+
- Make them unique and memorable
|
|
402
|
+
- Consider using them as variable names: `data["active_customers"]`
|
|
403
|
+
|
|
404
|
+
2. **Store App IDs in environment variables**
|
|
405
|
+
- Never hardcode credentials or IDs in source code
|
|
406
|
+
- Use `.env` for local development
|
|
407
|
+
- Use Lambda environment variables or Secrets Manager for production
|
|
408
|
+
|
|
409
|
+
3. **Keep configurations in a separate module**
|
|
410
|
+
- Easy to maintain and update
|
|
411
|
+
- Can be imported by multiple scripts
|
|
412
|
+
- Version control friendly
|
|
413
|
+
|
|
414
|
+
4. **Group related reports**
|
|
415
|
+
- By app for multi-app projects
|
|
416
|
+
- By function (e.g., all billing reports)
|
|
417
|
+
- Makes it easier to run subsets
|
|
418
|
+
|
|
419
|
+
5. **Document your reports**
|
|
420
|
+
```python
|
|
421
|
+
REPORTS = [
|
|
422
|
+
{
|
|
423
|
+
"Description": "active_customers",
|
|
424
|
+
"App": "Sales",
|
|
425
|
+
"App ID": SALES_APP_ID,
|
|
426
|
+
"Table": "Customers",
|
|
427
|
+
"Report": "Active",
|
|
428
|
+
# Optional: Add custom metadata
|
|
429
|
+
"notes": "Customers with activity in last 90 days",
|
|
430
|
+
"refresh_frequency": "daily"
|
|
431
|
+
}
|
|
432
|
+
]
|
|
433
|
+
```
|
|
434
|
+
|
|
435
|
+
6. **Validate configurations on startup**
|
|
436
|
+
```python
|
|
437
|
+
def validate_reports(reports):
|
|
438
|
+
"""Validate report configurations."""
|
|
439
|
+
required_keys = ["Description", "App", "App ID", "Table", "Report"]
|
|
440
|
+
descriptions = set()
|
|
441
|
+
|
|
442
|
+
for report in reports:
|
|
443
|
+
# Check required keys
|
|
444
|
+
missing = [k for k in required_keys if k not in report]
|
|
445
|
+
if missing:
|
|
446
|
+
raise ValueError(f"Report missing keys {missing}: {report}")
|
|
447
|
+
|
|
448
|
+
# Check for duplicate descriptions
|
|
449
|
+
desc = report["Description"]
|
|
450
|
+
if desc in descriptions:
|
|
451
|
+
raise ValueError(f"Duplicate description: {desc}")
|
|
452
|
+
descriptions.add(desc)
|
|
453
|
+
|
|
454
|
+
return True
|
|
455
|
+
|
|
456
|
+
# Use it
|
|
457
|
+
from config.reports import get_reports
|
|
458
|
+
reports = get_reports()
|
|
459
|
+
validate_reports(reports)
|
|
460
|
+
```
|
|
461
|
+
|
|
462
|
+
### Dynamic Configuration (Advanced)
|
|
463
|
+
|
|
464
|
+
For very large or dynamic report lists:
|
|
465
|
+
|
|
466
|
+
```python
|
|
467
|
+
# config/reports.py
|
|
468
|
+
"""Dynamic report configuration from database or API."""
|
|
469
|
+
|
|
470
|
+
import os
|
|
471
|
+
import json
|
|
472
|
+
from pathlib import Path
|
|
473
|
+
|
|
474
|
+
def load_reports_from_file(filepath):
|
|
475
|
+
"""Load report configs from JSON file.
|
|
476
|
+
|
|
477
|
+
Args:
|
|
478
|
+
filepath: Path to JSON file with report configs
|
|
479
|
+
|
|
480
|
+
Returns:
|
|
481
|
+
List of report configuration dicts.
|
|
482
|
+
"""
|
|
483
|
+
with open(filepath) as f:
|
|
484
|
+
return json.load(f)
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def load_reports_from_database(connection_string):
|
|
488
|
+
"""Load report configs from database.
|
|
489
|
+
|
|
490
|
+
Args:
|
|
491
|
+
connection_string: Database connection string
|
|
492
|
+
|
|
493
|
+
Returns:
|
|
494
|
+
List of report configuration dicts.
|
|
495
|
+
"""
|
|
496
|
+
import psycopg2
|
|
497
|
+
|
|
498
|
+
conn = psycopg2.connect(connection_string)
|
|
499
|
+
cursor = conn.cursor()
|
|
500
|
+
|
|
501
|
+
cursor.execute("""
|
|
502
|
+
SELECT description, app_name, app_id, table_name, report_name
|
|
503
|
+
FROM quickbase_report_configs
|
|
504
|
+
WHERE active = true
|
|
505
|
+
""")
|
|
506
|
+
|
|
507
|
+
reports = []
|
|
508
|
+
for row in cursor.fetchall():
|
|
509
|
+
reports.append({
|
|
510
|
+
"Description": row[0],
|
|
511
|
+
"App": row[1],
|
|
512
|
+
"App ID": row[2],
|
|
513
|
+
"Table": row[3],
|
|
514
|
+
"Report": row[4]
|
|
515
|
+
})
|
|
516
|
+
|
|
517
|
+
conn.close()
|
|
518
|
+
return reports
|
|
519
|
+
|
|
520
|
+
|
|
521
|
+
def get_reports():
|
|
522
|
+
"""Get reports from configured source.
|
|
523
|
+
|
|
524
|
+
Checks for reports in this order:
|
|
525
|
+
1. JSON file (if REPORTS_FILE env var set)
|
|
526
|
+
2. Database (if DATABASE_URL env var set)
|
|
527
|
+
3. Hardcoded defaults
|
|
528
|
+
"""
|
|
529
|
+
# Try JSON file
|
|
530
|
+
reports_file = os.environ.get("REPORTS_FILE")
|
|
531
|
+
if reports_file and Path(reports_file).exists():
|
|
532
|
+
return load_reports_from_file(reports_file)
|
|
533
|
+
|
|
534
|
+
# Try database
|
|
535
|
+
db_url = os.environ.get("DATABASE_URL")
|
|
536
|
+
if db_url:
|
|
537
|
+
return load_reports_from_database(db_url)
|
|
538
|
+
|
|
539
|
+
# Fallback to defaults
|
|
540
|
+
return [
|
|
541
|
+
# ... hardcoded reports ...
|
|
542
|
+
]
|
|
543
|
+
```
|
|
544
|
+
|
|
545
|
+
## Architecture
|
|
546
|
+
|
|
547
|
+
### How It Works
|
|
548
|
+
|
|
549
|
+
```
|
|
550
|
+
┌─────────────────┐
|
|
551
|
+
│ Report Config │
|
|
552
|
+
└────────┬────────┘
|
|
553
|
+
│
|
|
554
|
+
▼
|
|
555
|
+
┌─────────────────────┐ ┌──────────────┐
|
|
556
|
+
│ Fetch Metadata │─────▶│ Cache (JSON) │
|
|
557
|
+
│ - Table ID │ │ Local or S3 │
|
|
558
|
+
│ - Field mappings │ └──────────────┘
|
|
559
|
+
│ - Report filters │
|
|
560
|
+
└─────────┬───────────┘
|
|
561
|
+
│
|
|
562
|
+
▼
|
|
563
|
+
┌─────────────────────┐ ┌──────────────┐
|
|
564
|
+
│ Fetch Data │─────▶│ Cache (JSON) │
|
|
565
|
+
│ - Query Quickbase │ │ Local or S3 │
|
|
566
|
+
│ - Transform records │ └──────────────┘
|
|
567
|
+
│ - Apply labels │
|
|
568
|
+
└─────────────────────┘
|
|
569
|
+
```
|
|
570
|
+
|
|
571
|
+
### Cache Structure
|
|
572
|
+
|
|
573
|
+
```
|
|
574
|
+
.quickbase-cache/
|
|
575
|
+
├── report_metadata/
|
|
576
|
+
│ └── sales_tracker/
|
|
577
|
+
│ ├── customers_active_customers.json
|
|
578
|
+
│ └── opportunities_open_deals.json
|
|
579
|
+
└── report_data/
|
|
580
|
+
└── sales_tracker/
|
|
581
|
+
├── customers_active_customers_data.json
|
|
582
|
+
└── opportunities_open_deals_data.json
|
|
583
|
+
```
|
|
584
|
+
|
|
585
|
+
## Configuration
|
|
586
|
+
|
|
587
|
+
### Environment Variables
|
|
588
|
+
|
|
589
|
+
| Variable | Description | Default |
|
|
590
|
+
|----------|-------------|---------|
|
|
591
|
+
| `QUICKBASE_CACHE_ROOT` | Local cache directory | `.quickbase-cache/dev` |
|
|
592
|
+
| `ENV` | Environment name (dev/prod) | `dev` |
|
|
593
|
+
| `AWS_LAMBDA_FUNCTION_NAME` | Set by Lambda (auto-detected) | - |
|
|
594
|
+
| `CACHE_BUCKET` | S3 bucket for Lambda cache | - |
|
|
595
|
+
|
|
596
|
+
### Custom Cache Location
|
|
597
|
+
|
|
598
|
+
```python
|
|
599
|
+
from quickbase_extract import CacheManager
|
|
600
|
+
|
|
601
|
+
# Use custom cache directory
|
|
602
|
+
cache_mgr = CacheManager(cache_root="/path/to/cache")
|
|
603
|
+
|
|
604
|
+
# Or set via environment
|
|
605
|
+
import os
|
|
606
|
+
os.environ["QUICKBASE_CACHE_ROOT"] = "/path/to/cache"
|
|
607
|
+
```
|
|
608
|
+
|
|
609
|
+
## AWS Lambda Deployment
|
|
610
|
+
|
|
611
|
+
### Setup
|
|
612
|
+
|
|
613
|
+
1. **Set environment variables in Lambda:**
|
|
614
|
+
```
|
|
615
|
+
CACHE_BUCKET=my-quickbase-cache-bucket
|
|
616
|
+
ENV=prod
|
|
617
|
+
```
|
|
618
|
+
|
|
619
|
+
2. **Lambda handler example:**
|
|
620
|
+
|
|
621
|
+
```python
|
|
622
|
+
from quickbase_extract import (
|
|
623
|
+
get_qb_client,
|
|
624
|
+
sync_from_s3_once,
|
|
625
|
+
load_report_metadata_batch,
|
|
626
|
+
get_data_parallel
|
|
627
|
+
)
|
|
628
|
+
import os
|
|
629
|
+
|
|
630
|
+
# Initialize client (reuse across warm starts)
|
|
631
|
+
client = get_qb_client(
|
|
632
|
+
realm=os.environ["QB_REALM"],
|
|
633
|
+
user_token=os.environ["QB_USER_TOKEN"]
|
|
634
|
+
)
|
|
635
|
+
|
|
636
|
+
# Load configs
|
|
637
|
+
report_configs = [...] # Your configs
|
|
638
|
+
|
|
639
|
+
def lambda_handler(event, context):
|
|
640
|
+
# Sync cache from S3 on cold start
|
|
641
|
+
sync_from_s3_once()
|
|
642
|
+
|
|
643
|
+
# Load metadata from cache
|
|
644
|
+
metadata = load_report_metadata_batch(report_configs)
|
|
645
|
+
|
|
646
|
+
# Fetch fresh data
|
|
647
|
+
descriptions = event.get("reports", ["active_customers"])
|
|
648
|
+
data = get_data_parallel(
|
|
649
|
+
client,
|
|
650
|
+
metadata,
|
|
651
|
+
descriptions,
|
|
652
|
+
cache=True # Will sync to S3
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
return {
|
|
656
|
+
"statusCode": 200,
|
|
657
|
+
"body": f"Fetched {len(data)} reports"
|
|
658
|
+
}
|
|
659
|
+
```
|
|
660
|
+
|
|
661
|
+
### S3 Bucket Structure
|
|
662
|
+
|
|
663
|
+
```
|
|
664
|
+
my-quickbase-cache-bucket/
|
|
665
|
+
├── dev/
|
|
666
|
+
│ ├── report_metadata/...
|
|
667
|
+
│ └── report_data/...
|
|
668
|
+
└── prod/
|
|
669
|
+
├── report_metadata/...
|
|
670
|
+
└── report_data/...
|
|
671
|
+
```
|
|
672
|
+
|
|
673
|
+
## API Reference
|
|
674
|
+
|
|
675
|
+
### Client Management
|
|
676
|
+
|
|
677
|
+
#### `get_qb_client(realm, user_token, cache=True)`
|
|
678
|
+
|
|
679
|
+
Create or retrieve a cached Quickbase client.
|
|
680
|
+
|
|
681
|
+
**Parameters:**
|
|
682
|
+
- `realm` (str): Quickbase realm (e.g., "example.quickbase.com")
|
|
683
|
+
- `user_token` (str): Quickbase user token
|
|
684
|
+
- `cache` (bool): Whether to cache and reuse client (default: True)
|
|
685
|
+
|
|
686
|
+
**Returns:** Quickbase API client instance
|
|
687
|
+
|
|
688
|
+
**Raises:**
|
|
689
|
+
- `ValueError`: If realm or token is empty
|
|
690
|
+
|
|
691
|
+
```python
|
|
692
|
+
client = get_qb_client("example.quickbase.com", "YOUR_TOKEN")
|
|
693
|
+
```
|
|
694
|
+
|
|
695
|
+
### Metadata Operations
|
|
696
|
+
|
|
697
|
+
#### `refresh_all(client, report_configs, cache_root=None)`
|
|
698
|
+
|
|
699
|
+
Fetch and cache metadata for all configured reports.
|
|
700
|
+
|
|
701
|
+
**Parameters:**
|
|
702
|
+
- `client`: Quickbase API client
|
|
703
|
+
- `report_configs` (list[dict]): List of report configurations
|
|
704
|
+
- `cache_root` (Path, optional): Custom cache directory
|
|
705
|
+
|
|
706
|
+
```python
|
|
707
|
+
refresh_all(client, report_configs)
|
|
708
|
+
```
|
|
709
|
+
|
|
710
|
+
#### `load_report_metadata_batch(report_configs, cache_root=None)`
|
|
711
|
+
|
|
712
|
+
Load metadata for all reports from cache.
|
|
713
|
+
|
|
714
|
+
**Returns:** Dict mapping report descriptions to metadata
|
|
715
|
+
|
|
716
|
+
```python
|
|
717
|
+
metadata = load_report_metadata_batch(report_configs)
|
|
718
|
+
# Returns: {"active_customers": {...}, "open_deals": {...}}
|
|
719
|
+
```
|
|
720
|
+
|
|
721
|
+
### Data Operations
|
|
722
|
+
|
|
723
|
+
#### `get_data(client, report_metadata, report_desc, cache=False, cache_root=None)`
|
|
724
|
+
|
|
725
|
+
Fetch data for a single report.
|
|
726
|
+
|
|
727
|
+
**Parameters:**
|
|
728
|
+
- `client`: Quickbase API client
|
|
729
|
+
- `report_metadata` (dict): Metadata from `load_report_metadata_batch()`
|
|
730
|
+
- `report_desc` (str): Report description key
|
|
731
|
+
- `cache` (bool): Whether to cache the data (default: False)
|
|
732
|
+
- `cache_root` (Path, optional): Custom cache directory
|
|
733
|
+
|
|
734
|
+
**Returns:** List of record dicts with field labels as keys
|
|
735
|
+
|
|
736
|
+
```python
|
|
737
|
+
customers = get_data(client, metadata, "active_customers", cache=True)
|
|
738
|
+
# Returns: [{"Name": "Alice", "Email": "alice@example.com"}, ...]
|
|
739
|
+
```
|
|
740
|
+
|
|
741
|
+
#### `get_data_parallel(client, report_metadata, report_descriptions, cache=False, cache_root=None, max_workers=8)`
|
|
742
|
+
|
|
743
|
+
Fetch data for multiple reports in parallel.
|
|
744
|
+
|
|
745
|
+
**Parameters:**
|
|
746
|
+
- `client`: Quickbase API client
|
|
747
|
+
- `report_metadata` (dict): Metadata from `load_report_metadata_batch()`
|
|
748
|
+
- `report_descriptions` (list[str]): List of report description keys
|
|
749
|
+
- `cache` (bool): Whether to cache the data (default: False)
|
|
750
|
+
- `cache_root` (Path, optional): Custom cache directory
|
|
751
|
+
- `max_workers` (int): Maximum concurrent threads (default: 8)
|
|
752
|
+
|
|
753
|
+
**Returns:** Dict mapping report descriptions to data lists
|
|
754
|
+
|
|
755
|
+
```python
|
|
756
|
+
data = get_data_parallel(
|
|
757
|
+
client,
|
|
758
|
+
metadata,
|
|
759
|
+
["active_customers", "open_deals"],
|
|
760
|
+
cache=True,
|
|
761
|
+
max_workers=4
|
|
762
|
+
)
|
|
763
|
+
```
|
|
764
|
+
|
|
765
|
+
#### `load_data(report_metadata, report_desc, cache_root=None)`
|
|
766
|
+
|
|
767
|
+
Load cached data for a single report.
|
|
768
|
+
|
|
769
|
+
```python
|
|
770
|
+
customers = load_data(metadata, "active_customers")
|
|
771
|
+
```
|
|
772
|
+
|
|
773
|
+
#### `load_data_batch(report_metadata, report_descriptions, cache_root=None)`
|
|
774
|
+
|
|
775
|
+
Load cached data for multiple reports.
|
|
776
|
+
|
|
777
|
+
```python
|
|
778
|
+
data = load_data_batch(metadata, ["active_customers", "open_deals"])
|
|
779
|
+
```
|
|
780
|
+
|
|
781
|
+
### Cache Management
|
|
782
|
+
|
|
783
|
+
#### `sync_from_s3_once(force=False)`
|
|
784
|
+
|
|
785
|
+
Download cache from S3 to /tmp on Lambda cold start.
|
|
786
|
+
|
|
787
|
+
**Parameters:**
|
|
788
|
+
- `force` (bool): Force sync even if already synced (default: False)
|
|
789
|
+
|
|
790
|
+
```python
|
|
791
|
+
sync_from_s3_once() # On cold start
|
|
792
|
+
sync_from_s3_once(force=True) # Force re-sync
|
|
793
|
+
```
|
|
794
|
+
|
|
795
|
+
#### `is_cache_synced()`
|
|
796
|
+
|
|
797
|
+
Check if cache has been synced in this invocation.
|
|
798
|
+
|
|
799
|
+
```python
|
|
800
|
+
if not is_cache_synced():
|
|
801
|
+
print("Cache needs syncing")
|
|
802
|
+
```
|
|
803
|
+
|
|
804
|
+
### Cache Monitoring
|
|
805
|
+
|
|
806
|
+
#### `check_cache_freshness(threshold_hours=36, cache_root=None)`
|
|
807
|
+
|
|
808
|
+
Check for stale cache files.
|
|
809
|
+
|
|
810
|
+
**Parameters:**
|
|
811
|
+
- `threshold_hours` (float): Files older than this are stale (default: 36)
|
|
812
|
+
- `cache_root` (Path, optional): Custom cache directory
|
|
813
|
+
|
|
814
|
+
**Returns:** List of stale file info dicts
|
|
815
|
+
|
|
816
|
+
```python
|
|
817
|
+
stale_files = check_cache_freshness(threshold_hours=24)
|
|
818
|
+
if stale_files:
|
|
819
|
+
print(f"Found {len(stale_files)} stale files")
|
|
820
|
+
```
|
|
821
|
+
|
|
822
|
+
#### `get_cache_summary(cache_root=None)`
|
|
823
|
+
|
|
824
|
+
Get summary statistics for cache directory.
|
|
825
|
+
|
|
826
|
+
**Returns:** Dict with total files, size, oldest/newest file info
|
|
827
|
+
|
|
828
|
+
```python
|
|
829
|
+
summary = get_cache_summary()
|
|
830
|
+
print(f"Cache: {summary['total_files']} files, {summary['total_size_mb']} MB")
|
|
831
|
+
print(f"Oldest: {summary['oldest_file']} ({summary['oldest_age_hours']}h old)")
|
|
832
|
+
```
|
|
833
|
+
|
|
834
|
+
### Error Handling
|
|
835
|
+
|
|
836
|
+
#### `handle_query(client, table_id, *, select=None, where=None, sort_by=None, group_by=None, options=None, description="", max_retries=3)`
|
|
837
|
+
|
|
838
|
+
Execute a Quickbase query with retry logic.
|
|
839
|
+
|
|
840
|
+
```python
|
|
841
|
+
from quickbase_extract import handle_query
|
|
842
|
+
|
|
843
|
+
result = handle_query(
|
|
844
|
+
client,
|
|
845
|
+
"tblABC123",
|
|
846
|
+
select=[3, 6, 7],
|
|
847
|
+
where="{8.EX.'Active'}",
|
|
848
|
+
description="active customers",
|
|
849
|
+
max_retries=3
|
|
850
|
+
)
|
|
851
|
+
```
|
|
852
|
+
|
|
853
|
+
#### `handle_upsert(client, table_id, data, description="", max_retries=3)`
|
|
854
|
+
|
|
855
|
+
Execute a Quickbase upsert with retry logic.
|
|
856
|
+
|
|
857
|
+
```python
|
|
858
|
+
from quickbase_extract import handle_upsert
|
|
859
|
+
|
|
860
|
+
records = [{"6": {"value": "Alice"}, "7": {"value": "alice@example.com"}}]
|
|
861
|
+
result = handle_upsert(
|
|
862
|
+
client,
|
|
863
|
+
"tblABC123",
|
|
864
|
+
records,
|
|
865
|
+
description="customer records"
|
|
866
|
+
)
|
|
867
|
+
```
|
|
868
|
+
|
|
869
|
+
#### `handle_delete(client, table_id, where, description="", max_retries=3)`
|
|
870
|
+
|
|
871
|
+
Execute a Quickbase delete with retry logic.
|
|
872
|
+
|
|
873
|
+
```python
|
|
874
|
+
from quickbase_extract import handle_delete
|
|
875
|
+
|
|
876
|
+
deleted = handle_delete(
|
|
877
|
+
client,
|
|
878
|
+
"tblABC123",
|
|
879
|
+
where="{8.EX.'Inactive'}",
|
|
880
|
+
description="inactive customers"
|
|
881
|
+
)
|
|
882
|
+
print(f"Deleted {deleted} records")
|
|
883
|
+
```
|
|
884
|
+
|
|
885
|
+
## Development
|
|
886
|
+
|
|
887
|
+
### Setup
|
|
888
|
+
|
|
889
|
+
```bash
|
|
890
|
+
# Clone repository
|
|
891
|
+
git clone https://github.com/yourusername/quickbase-extract.git
|
|
892
|
+
cd quickbase-extract
|
|
893
|
+
|
|
894
|
+
# Create virtual environment
|
|
895
|
+
python -m venv venv
|
|
896
|
+
source venv/bin/activate # On Windows: venv\Scripts\activate
|
|
897
|
+
|
|
898
|
+
# Install in development mode with test dependencies
|
|
899
|
+
pip install -e ".[dev]"
|
|
900
|
+
```
|
|
901
|
+
|
|
902
|
+
### Running Tests
|
|
903
|
+
|
|
904
|
+
```bash
|
|
905
|
+
# Run all tests
|
|
906
|
+
pytest
|
|
907
|
+
|
|
908
|
+
# Run with coverage
|
|
909
|
+
pytest --cov=quickbase_extract --cov-report=html
|
|
910
|
+
|
|
911
|
+
# Run specific test file
|
|
912
|
+
pytest tests/test_report_data.py
|
|
913
|
+
|
|
914
|
+
# Run specific test
|
|
915
|
+
pytest tests/test_report_data.py::TestGetData::test_get_data_with_cache
|
|
916
|
+
```
|
|
917
|
+
|
|
918
|
+
### Code Quality
|
|
919
|
+
|
|
920
|
+
```bash
|
|
921
|
+
# Format code
|
|
922
|
+
black quickbase_extract tests
|
|
923
|
+
|
|
924
|
+
# Lint
|
|
925
|
+
flake8 quickbase_extract tests
|
|
926
|
+
|
|
927
|
+
# Type check
|
|
928
|
+
mypy quickbase_extract
|
|
929
|
+
```
|
|
930
|
+
|
|
931
|
+
## Cache Freshness Thresholds
|
|
932
|
+
|
|
933
|
+
Different cache types have different recommended freshness thresholds:
|
|
934
|
+
|
|
935
|
+
| Cache Type | Default | Recommended | Reason |
|
|
936
|
+
|------------|---------|-------------|--------|
|
|
937
|
+
| Metadata | 168 hours (7 days) | Manual refresh | Table structure rarely changes |
|
|
938
|
+
| Data | 24 hours (1 day) | Varies by use case | Data changes frequently |
|
|
939
|
+
|
|
940
|
+
```python
|
|
941
|
+
from quickbase_extract.cache_freshness import (
|
|
942
|
+
DEFAULT_METADATA_STALE_HOURS,
|
|
943
|
+
DEFAULT_DATA_STALE_HOURS
|
|
944
|
+
)
|
|
945
|
+
|
|
946
|
+
# Check metadata freshness (rarely changes)
|
|
947
|
+
stale_metadata = check_cache_freshness(
|
|
948
|
+
threshold_hours=DEFAULT_METADATA_STALE_HOURS
|
|
949
|
+
)
|
|
950
|
+
|
|
951
|
+
# Check data freshness (changes often)
|
|
952
|
+
stale_data = check_cache_freshness(
|
|
953
|
+
threshold_hours=DEFAULT_DATA_STALE_HOURS
|
|
954
|
+
)
|
|
955
|
+
```
|
|
956
|
+
|
|
957
|
+
## Best Practices
|
|
958
|
+
|
|
959
|
+
### 1. Metadata Refresh Strategy
|
|
960
|
+
|
|
961
|
+
Metadata (table structure, field mappings) changes infrequently. Only refresh when:
|
|
962
|
+
- Adding new reports
|
|
963
|
+
- Report configurations change
|
|
964
|
+
- Field definitions change
|
|
965
|
+
|
|
966
|
+
```python
|
|
967
|
+
# Manual metadata refresh (not in production loop)
|
|
968
|
+
if metadata_changed:
|
|
969
|
+
refresh_all(client, report_configs)
|
|
970
|
+
```
|
|
971
|
+
|
|
972
|
+
### 2. Data Caching Strategy
|
|
973
|
+
|
|
974
|
+
For Lambda, cache data during the function execution to avoid repeated API calls:
|
|
975
|
+
|
|
976
|
+
```python
|
|
977
|
+
# Good: Fetch once, cache, reuse
|
|
978
|
+
metadata = load_report_metadata_batch(report_configs)
|
|
979
|
+
data = get_data_parallel(client, metadata, descriptions, cache=True)
|
|
980
|
+
|
|
981
|
+
# Later in same invocation
|
|
982
|
+
cached_data = load_data_batch(metadata, descriptions)
|
|
983
|
+
```
|
|
984
|
+
|
|
985
|
+
### 3. Rate Limit Management
|
|
986
|
+
|
|
987
|
+
Adjust `max_workers` based on your Quickbase API rate limits:
|
|
988
|
+
|
|
989
|
+
```python
|
|
990
|
+
# Conservative (better for rate limits)
|
|
991
|
+
data = get_data_parallel(client, metadata, descriptions, max_workers=4)
|
|
992
|
+
|
|
993
|
+
# Aggressive (faster but may hit rate limits)
|
|
994
|
+
data = get_data_parallel(client, metadata, descriptions, max_workers=16)
|
|
995
|
+
```
|
|
996
|
+
|
|
997
|
+
### 4. Error Handling
|
|
998
|
+
|
|
999
|
+
All operations include retry logic for rate limits (429 errors) but fail fast on other errors:
|
|
1000
|
+
|
|
1001
|
+
```python
|
|
1002
|
+
from quickbase_extract import QuickbaseOperationError
|
|
1003
|
+
|
|
1004
|
+
try:
|
|
1005
|
+
data = get_data(client, metadata, "report_name")
|
|
1006
|
+
except QuickbaseOperationError as e:
|
|
1007
|
+
print(f"Operation {e.operation} failed: {e.details}")
|
|
1008
|
+
except KeyError:
|
|
1009
|
+
print("Report not found in metadata")
|
|
1010
|
+
```
|
|
1011
|
+
|
|
1012
|
+
## Troubleshooting
|
|
1013
|
+
|
|
1014
|
+
### Issue: "Report metadata not found"
|
|
1015
|
+
|
|
1016
|
+
**Solution:** Run `refresh_all()` to cache metadata first:
|
|
1017
|
+
|
|
1018
|
+
```python
|
|
1019
|
+
refresh_all(client, report_configs)
|
|
1020
|
+
metadata = load_report_metadata_batch(report_configs)
|
|
1021
|
+
```
|
|
1022
|
+
|
|
1023
|
+
### Issue: "Rate limit exceeded" (429 errors)
|
|
1024
|
+
|
|
1025
|
+
**Solution:** Reduce `max_workers` or increase retry delays:
|
|
1026
|
+
|
|
1027
|
+
```python
|
|
1028
|
+
# Reduce concurrency
|
|
1029
|
+
data = get_data_parallel(client, metadata, descriptions, max_workers=2)
|
|
1030
|
+
|
|
1031
|
+
# Increase max retries
|
|
1032
|
+
from quickbase_extract import handle_query
|
|
1033
|
+
result = handle_query(client, table_id, max_retries=5)
|
|
1034
|
+
```
|
|
1035
|
+
|
|
1036
|
+
### Issue: Lambda "Cache not synced from S3"
|
|
1037
|
+
|
|
1038
|
+
**Solution:** Ensure `CACHE_BUCKET` is set and bucket exists:
|
|
1039
|
+
|
|
1040
|
+
```python
|
|
1041
|
+
import os
|
|
1042
|
+
print(os.environ.get("CACHE_BUCKET")) # Should not be None
|
|
1043
|
+
|
|
1044
|
+
# Call sync explicitly
|
|
1045
|
+
from quickbase_extract import sync_from_s3_once
|
|
1046
|
+
sync_from_s3_once()
|
|
1047
|
+
```
|
|
1048
|
+
|
|
1049
|
+
### Issue: "Cache directory does not exist"
|
|
1050
|
+
|
|
1051
|
+
**Solution:** The cache directory is created automatically, but ensure parent directory is writable:
|
|
1052
|
+
|
|
1053
|
+
```python
|
|
1054
|
+
from quickbase_extract import CacheManager
|
|
1055
|
+
import os
|
|
1056
|
+
|
|
1057
|
+
cache_path = os.path.expanduser("~/.quickbase-cache")
|
|
1058
|
+
cache_mgr = CacheManager(cache_root=cache_path)
|
|
1059
|
+
```
|
|
1060
|
+
|
|
1061
|
+
## Advanced Usage
|
|
1062
|
+
|
|
1063
|
+
### Custom Report Configurations
|
|
1064
|
+
|
|
1065
|
+
#### Using Field IDs Instead of Report Names
|
|
1066
|
+
|
|
1067
|
+
```python
|
|
1068
|
+
from quickbase_extract import handle_query
|
|
1069
|
+
|
|
1070
|
+
# Query specific fields directly without a report
|
|
1071
|
+
result = handle_query(
|
|
1072
|
+
client,
|
|
1073
|
+
table_id="tblABC123",
|
|
1074
|
+
select=[3, 6, 7, 8], # Field IDs
|
|
1075
|
+
where="{8.EX.'Active'}AND{12.GT.'2024-01-01'}",
|
|
1076
|
+
sort_by=[{"fieldId": 6, "order": "ASC"}],
|
|
1077
|
+
description="custom query"
|
|
1078
|
+
)
|
|
1079
|
+
|
|
1080
|
+
data = result["data"]
|
|
1081
|
+
```
|
|
1082
|
+
|
|
1083
|
+
#### Dynamic Report Filtering
|
|
1084
|
+
|
|
1085
|
+
```python
|
|
1086
|
+
def get_filtered_customers(client, metadata, status, min_date):
|
|
1087
|
+
"""Fetch customers with dynamic filters."""
|
|
1088
|
+
info = metadata["active_customers"]
|
|
1089
|
+
|
|
1090
|
+
# Build custom filter
|
|
1091
|
+
custom_filter = f"{{8.EX.'{status}'}}AND{{12.GT.'{min_date}'}}"
|
|
1092
|
+
|
|
1093
|
+
result = handle_query(
|
|
1094
|
+
client,
|
|
1095
|
+
info["table_id"],
|
|
1096
|
+
select=info["fields"],
|
|
1097
|
+
where=custom_filter,
|
|
1098
|
+
sort_by=info["report"]["query"]["sortBy"],
|
|
1099
|
+
description=f"customers_{status}_{min_date}"
|
|
1100
|
+
)
|
|
1101
|
+
|
|
1102
|
+
return result["data"]
|
|
1103
|
+
|
|
1104
|
+
# Usage
|
|
1105
|
+
active = get_filtered_customers(client, metadata, "Active", "2024-01-01")
|
|
1106
|
+
```
|
|
1107
|
+
|
|
1108
|
+
### Batch Processing with Progress Tracking
|
|
1109
|
+
|
|
1110
|
+
```python
|
|
1111
|
+
from quickbase_extract import get_data
|
|
1112
|
+
import logging
|
|
1113
|
+
|
|
1114
|
+
logging.basicConfig(level=logging.INFO)
|
|
1115
|
+
logger = logging.getLogger(__name__)
|
|
1116
|
+
|
|
1117
|
+
def process_reports_with_progress(client, metadata, descriptions):
|
|
1118
|
+
"""Process multiple reports with progress tracking."""
|
|
1119
|
+
results = {}
|
|
1120
|
+
total = len(descriptions)
|
|
1121
|
+
|
|
1122
|
+
for i, desc in enumerate(descriptions, 1):
|
|
1123
|
+
logger.info(f"Processing {i}/{total}: {desc}")
|
|
1124
|
+
|
|
1125
|
+
try:
|
|
1126
|
+
data = get_data(client, metadata, desc, cache=True)
|
|
1127
|
+
results[desc] = {
|
|
1128
|
+
"status": "success",
|
|
1129
|
+
"records": len(data),
|
|
1130
|
+
"data": data
|
|
1131
|
+
}
|
|
1132
|
+
logger.info(f"✓ {desc}: {len(data)} records")
|
|
1133
|
+
except Exception as e:
|
|
1134
|
+
results[desc] = {
|
|
1135
|
+
"status": "error",
|
|
1136
|
+
"error": str(e)
|
|
1137
|
+
}
|
|
1138
|
+
logger.error(f"✗ {desc}: {e}")
|
|
1139
|
+
|
|
1140
|
+
return results
|
|
1141
|
+
|
|
1142
|
+
# Usage
|
|
1143
|
+
results = process_reports_with_progress(
|
|
1144
|
+
client,
|
|
1145
|
+
metadata,
|
|
1146
|
+
["customers", "orders", "products"]
|
|
1147
|
+
)
|
|
1148
|
+
```
|
|
1149
|
+
|
|
1150
|
+
### Incremental Data Updates
|
|
1151
|
+
|
|
1152
|
+
```python
|
|
1153
|
+
from datetime import datetime, timedelta
|
|
1154
|
+
from quickbase_extract import handle_query, handle_upsert
|
|
1155
|
+
|
|
1156
|
+
def sync_recent_changes(client, source_table_id, target_table_id):
|
|
1157
|
+
"""Sync only records modified in last 24 hours."""
|
|
1158
|
+
yesterday = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
|
|
1159
|
+
|
|
1160
|
+
# Fetch recent changes
|
|
1161
|
+
result = handle_query(
|
|
1162
|
+
client,
|
|
1163
|
+
source_table_id,
|
|
1164
|
+
where=f"{{1.AFT.'{yesterday}'}}", # Date Modified after yesterday
|
|
1165
|
+
description="recent changes"
|
|
1166
|
+
)
|
|
1167
|
+
|
|
1168
|
+
if result["data"]:
|
|
1169
|
+
# Transform and upsert
|
|
1170
|
+
records = [transform_record(r) for r in result["data"]]
|
|
1171
|
+
handle_upsert(
|
|
1172
|
+
client,
|
|
1173
|
+
target_table_id,
|
|
1174
|
+
records,
|
|
1175
|
+
description="sync recent changes"
|
|
1176
|
+
)
|
|
1177
|
+
print(f"Synced {len(records)} recent changes")
|
|
1178
|
+
else:
|
|
1179
|
+
print("No recent changes found")
|
|
1180
|
+
```
|
|
1181
|
+
|
|
1182
|
+
### Multi-Environment Configuration
|
|
1183
|
+
|
|
1184
|
+
```python
|
|
1185
|
+
import os
|
|
1186
|
+
from quickbase_extract import get_qb_client, CacheManager
|
|
1187
|
+
|
|
1188
|
+
class QuickbaseConfig:
|
|
1189
|
+
"""Environment-aware Quickbase configuration."""
|
|
1190
|
+
|
|
1191
|
+
def __init__(self, env=None):
|
|
1192
|
+
self.env = env or os.environ.get("ENV", "dev")
|
|
1193
|
+
self.config = self._load_config()
|
|
1194
|
+
|
|
1195
|
+
def _load_config(self):
|
|
1196
|
+
configs = {
|
|
1197
|
+
"dev": {
|
|
1198
|
+
"realm": "dev-realm.quickbase.com",
|
|
1199
|
+
"token": os.environ.get("QB_TOKEN_DEV"),
|
|
1200
|
+
"cache_root": "./.quickbase-cache/dev"
|
|
1201
|
+
},
|
|
1202
|
+
"staging": {
|
|
1203
|
+
"realm": "staging-realm.quickbase.com",
|
|
1204
|
+
"token": os.environ.get("QB_TOKEN_STAGING"),
|
|
1205
|
+
"cache_root": "./.quickbase-cache/staging"
|
|
1206
|
+
},
|
|
1207
|
+
"prod": {
|
|
1208
|
+
"realm": "prod-realm.quickbase.com",
|
|
1209
|
+
"token": os.environ.get("QB_TOKEN_PROD"),
|
|
1210
|
+
"cache_root": "/tmp/quickbase-cache"
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
return configs[self.env]
|
|
1214
|
+
|
|
1215
|
+
def get_client(self):
|
|
1216
|
+
return get_qb_client(
|
|
1217
|
+
realm=self.config["realm"],
|
|
1218
|
+
user_token=self.config["token"]
|
|
1219
|
+
)
|
|
1220
|
+
|
|
1221
|
+
def get_cache_manager(self):
|
|
1222
|
+
return CacheManager(cache_root=self.config["cache_root"])
|
|
1223
|
+
|
|
1224
|
+
# Usage
|
|
1225
|
+
config = QuickbaseConfig(env="prod")
|
|
1226
|
+
client = config.get_client()
|
|
1227
|
+
cache_mgr = config.get_cache_manager()
|
|
1228
|
+
```
|
|
1229
|
+
|
|
1230
|
+
### Data Transformation Pipeline
|
|
1231
|
+
|
|
1232
|
+
```python
|
|
1233
|
+
from quickbase_extract import get_data
|
|
1234
|
+
from typing import List, Dict, Callable
|
|
1235
|
+
|
|
1236
|
+
class DataPipeline:
|
|
1237
|
+
"""Pipeline for transforming Quickbase data."""
|
|
1238
|
+
|
|
1239
|
+
def __init__(self, client, metadata):
|
|
1240
|
+
self.client = client
|
|
1241
|
+
self.metadata = metadata
|
|
1242
|
+
self.transformers: List[Callable] = []
|
|
1243
|
+
|
|
1244
|
+
def add_transformer(self, func: Callable):
|
|
1245
|
+
"""Add a transformation function to the pipeline."""
|
|
1246
|
+
self.transformers.append(func)
|
|
1247
|
+
return self
|
|
1248
|
+
|
|
1249
|
+
def execute(self, report_desc: str) -> List[Dict]:
|
|
1250
|
+
"""Execute pipeline for a report."""
|
|
1251
|
+
# Fetch data
|
|
1252
|
+
data = get_data(self.client, self.metadata, report_desc)
|
|
1253
|
+
|
|
1254
|
+
# Apply transformations
|
|
1255
|
+
for transformer in self.transformers:
|
|
1256
|
+
data = transformer(data)
|
|
1257
|
+
|
|
1258
|
+
return data
|
|
1259
|
+
|
|
1260
|
+
# Example transformers
|
|
1261
|
+
def filter_active(data):
|
|
1262
|
+
"""Keep only active records."""
|
|
1263
|
+
return [r for r in data if r.get("Status") == "Active"]
|
|
1264
|
+
|
|
1265
|
+
def add_full_name(data):
|
|
1266
|
+
"""Add computed full name field."""
|
|
1267
|
+
for record in data:
|
|
1268
|
+
first = record.get("First Name", "")
|
|
1269
|
+
last = record.get("Last Name", "")
|
|
1270
|
+
record["Full Name"] = f"{first} {last}".strip()
|
|
1271
|
+
return data
|
|
1272
|
+
|
|
1273
|
+
def convert_dates(data):
|
|
1274
|
+
"""Convert date strings to datetime objects."""
|
|
1275
|
+
from dateutil import parser
|
|
1276
|
+
for record in data:
|
|
1277
|
+
if "Date Created" in record:
|
|
1278
|
+
record["Date Created"] = parser.parse(record["Date Created"])
|
|
1279
|
+
return data
|
|
1280
|
+
|
|
1281
|
+
# Usage
|
|
1282
|
+
pipeline = (
|
|
1283
|
+
DataPipeline(client, metadata)
|
|
1284
|
+
.add_transformer(filter_active)
|
|
1285
|
+
.add_transformer(add_full_name)
|
|
1286
|
+
.add_transformer(convert_dates)
|
|
1287
|
+
)
|
|
1288
|
+
|
|
1289
|
+
customers = pipeline.execute("active_customers")
|
|
1290
|
+
```
|
|
1291
|
+
|
|
1292
|
+
## Performance Optimization
|
|
1293
|
+
|
|
1294
|
+
### Benchmarking Your Setup
|
|
1295
|
+
|
|
1296
|
+
```python
|
|
1297
|
+
import time
|
|
1298
|
+
from quickbase_extract import get_data_parallel
|
|
1299
|
+
|
|
1300
|
+
def benchmark_parallel_fetch(client, metadata, descriptions, workers_list):
|
|
1301
|
+
"""Test different worker counts to find optimal setting."""
|
|
1302
|
+
results = {}
|
|
1303
|
+
|
|
1304
|
+
for workers in workers_list:
|
|
1305
|
+
start = time.time()
|
|
1306
|
+
data = get_data_parallel(
|
|
1307
|
+
client,
|
|
1308
|
+
metadata,
|
|
1309
|
+
descriptions,
|
|
1310
|
+
max_workers=workers
|
|
1311
|
+
)
|
|
1312
|
+
elapsed = time.time() - start
|
|
1313
|
+
|
|
1314
|
+
total_records = sum(len(d) for d in data.values())
|
|
1315
|
+
results[workers] = {
|
|
1316
|
+
"time": elapsed,
|
|
1317
|
+
"records": total_records,
|
|
1318
|
+
"records_per_second": total_records / elapsed
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1321
|
+
print(f"Workers={workers}: {elapsed:.2f}s, {total_records} records")
|
|
1322
|
+
|
|
1323
|
+
return results
|
|
1324
|
+
|
|
1325
|
+
# Usage
|
|
1326
|
+
descriptions = ["customers", "orders", "products", "invoices"]
|
|
1327
|
+
results = benchmark_parallel_fetch(
|
|
1328
|
+
client,
|
|
1329
|
+
metadata,
|
|
1330
|
+
descriptions,
|
|
1331
|
+
workers_list=[2, 4, 8, 16]
|
|
1332
|
+
)
|
|
1333
|
+
|
|
1334
|
+
# Find optimal worker count
|
|
1335
|
+
optimal = max(results.items(), key=lambda x: x[1]["records_per_second"])
|
|
1336
|
+
print(f"Optimal workers: {optimal[0]} ({optimal[1]['records_per_second']:.0f} rec/sec)")
|
|
1337
|
+
```
|
|
1338
|
+
|
|
1339
|
+
### Memory-Efficient Processing for Large Datasets
|
|
1340
|
+
|
|
1341
|
+
```python
|
|
1342
|
+
from quickbase_extract import handle_query
|
|
1343
|
+
import json
|
|
1344
|
+
|
|
1345
|
+
def fetch_large_dataset_in_chunks(client, table_id, chunk_size=1000):
|
|
1346
|
+
"""Fetch large datasets in chunks to avoid memory issues."""
|
|
1347
|
+
skip = 0
|
|
1348
|
+
all_data = []
|
|
1349
|
+
|
|
1350
|
+
while True:
|
|
1351
|
+
result = handle_query(
|
|
1352
|
+
client,
|
|
1353
|
+
table_id,
|
|
1354
|
+
options={"skip": skip, "top": chunk_size},
|
|
1355
|
+
description=f"chunk at {skip}"
|
|
1356
|
+
)
|
|
1357
|
+
|
|
1358
|
+
data = result["data"]
|
|
1359
|
+
if not data:
|
|
1360
|
+
break
|
|
1361
|
+
|
|
1362
|
+
all_data.extend(data)
|
|
1363
|
+
print(f"Fetched {len(all_data)} records so far...")
|
|
1364
|
+
|
|
1365
|
+
if len(data) < chunk_size:
|
|
1366
|
+
break # Last chunk
|
|
1367
|
+
|
|
1368
|
+
skip += chunk_size
|
|
1369
|
+
|
|
1370
|
+
return all_data
|
|
1371
|
+
|
|
1372
|
+
# Stream to file instead of loading in memory
|
|
1373
|
+
def stream_to_file(client, table_id, output_file):
|
|
1374
|
+
"""Stream large dataset directly to file."""
|
|
1375
|
+
skip = 0
|
|
1376
|
+
chunk_size = 1000
|
|
1377
|
+
|
|
1378
|
+
with open(output_file, 'w') as f:
|
|
1379
|
+
f.write('[')
|
|
1380
|
+
first_chunk = True
|
|
1381
|
+
|
|
1382
|
+
while True:
|
|
1383
|
+
result = handle_query(
|
|
1384
|
+
client,
|
|
1385
|
+
table_id,
|
|
1386
|
+
options={"skip": skip, "top": chunk_size}
|
|
1387
|
+
)
|
|
1388
|
+
|
|
1389
|
+
data = result["data"]
|
|
1390
|
+
if not data:
|
|
1391
|
+
break
|
|
1392
|
+
|
|
1393
|
+
# Write chunk
|
|
1394
|
+
if not first_chunk:
|
|
1395
|
+
f.write(',')
|
|
1396
|
+
f.write(json.dumps(data)[1:-1]) # Remove outer brackets
|
|
1397
|
+
first_chunk = False
|
|
1398
|
+
|
|
1399
|
+
if len(data) < chunk_size:
|
|
1400
|
+
break
|
|
1401
|
+
|
|
1402
|
+
skip += chunk_size
|
|
1403
|
+
|
|
1404
|
+
f.write(']')
|
|
1405
|
+
```
|
|
1406
|
+
|
|
1407
|
+
### Caching Strategies
|
|
1408
|
+
|
|
1409
|
+
```python
|
|
1410
|
+
from quickbase_extract import load_data, get_data
|
|
1411
|
+
from datetime import datetime, timedelta
|
|
1412
|
+
import os
|
|
1413
|
+
|
|
1414
|
+
def get_data_with_ttl(client, metadata, report_desc, ttl_hours=24):
|
|
1415
|
+
"""Get data from cache if fresh, otherwise fetch new."""
|
|
1416
|
+
from quickbase_extract.cache_manager import get_cache_manager
|
|
1417
|
+
|
|
1418
|
+
cache_mgr = get_cache_manager()
|
|
1419
|
+
info = metadata[report_desc]
|
|
1420
|
+
data_path = cache_mgr.get_data_path(
|
|
1421
|
+
info["app_name"],
|
|
1422
|
+
info["table_name"],
|
|
1423
|
+
info["report_name"]
|
|
1424
|
+
)
|
|
1425
|
+
|
|
1426
|
+
# Check if cache exists and is fresh
|
|
1427
|
+
if data_path.exists():
|
|
1428
|
+
mtime = datetime.fromtimestamp(data_path.stat().st_mtime)
|
|
1429
|
+
age = datetime.now() - mtime
|
|
1430
|
+
|
|
1431
|
+
if age < timedelta(hours=ttl_hours):
|
|
1432
|
+
print(f"Using cached data ({age.seconds / 3600:.1f}h old)")
|
|
1433
|
+
return load_data(metadata, report_desc)
|
|
1434
|
+
|
|
1435
|
+
# Cache miss or stale, fetch new
|
|
1436
|
+
print("Fetching fresh data from Quickbase")
|
|
1437
|
+
return get_data(client, metadata, report_desc, cache=True)
|
|
1438
|
+
|
|
1439
|
+
# Usage
|
|
1440
|
+
customers = get_data_with_ttl(client, metadata, "customers", ttl_hours=6)
|
|
1441
|
+
```
|
|
1442
|
+
|
|
1443
|
+
## Real-World Use Cases
|
|
1444
|
+
|
|
1445
|
+
### Use Case 1: Daily Sales Report
|
|
1446
|
+
|
|
1447
|
+
```python
|
|
1448
|
+
from quickbase_extract import *
|
|
1449
|
+
from datetime import datetime
|
|
1450
|
+
import smtplib
|
|
1451
|
+
from email.mime.text import MIMEText
|
|
1452
|
+
|
|
1453
|
+
def generate_daily_sales_report(client, metadata):
|
|
1454
|
+
"""Generate and email daily sales report."""
|
|
1455
|
+
# Fetch today's data
|
|
1456
|
+
today = datetime.now().strftime("%Y-%m-%d")
|
|
1457
|
+
|
|
1458
|
+
orders = get_data(client, metadata, "todays_orders")
|
|
1459
|
+
revenue = sum(float(o.get("Order Total", 0)) for o in orders)
|
|
1460
|
+
|
|
1461
|
+
# Generate report
|
|
1462
|
+
report = f"""
|
|
1463
|
+
Daily Sales Report - {today}
|
|
1464
|
+
================================
|
|
1465
|
+
Total Orders: {len(orders)}
|
|
1466
|
+
Total Revenue: ${revenue:,.2f}
|
|
1467
|
+
Average Order: ${revenue / len(orders) if orders else 0:,.2f}
|
|
1468
|
+
|
|
1469
|
+
Top 5 Orders:
|
|
1470
|
+
"""
|
|
1471
|
+
|
|
1472
|
+
top_orders = sorted(orders, key=lambda x: float(x.get("Order Total", 0)), reverse=True)[:5]
|
|
1473
|
+
for i, order in enumerate(top_orders, 1):
|
|
1474
|
+
report += f"\n{i}. Order #{order['Order ID']}: ${order['Order Total']}"
|
|
1475
|
+
|
|
1476
|
+
# Email report
|
|
1477
|
+
send_email_report(report, "sales-team@company.com")
|
|
1478
|
+
|
|
1479
|
+
return {"orders": len(orders), "revenue": revenue}
|
|
1480
|
+
|
|
1481
|
+
def send_email_report(body, to_email):
|
|
1482
|
+
"""Send email report."""
|
|
1483
|
+
msg = MIMEText(body)
|
|
1484
|
+
msg['Subject'] = f"Daily Sales Report - {datetime.now().strftime('%Y-%m-%d')}"
|
|
1485
|
+
msg['From'] = "quickbase-bot@company.com"
|
|
1486
|
+
msg['To'] = to_email
|
|
1487
|
+
|
|
1488
|
+
# Send email (configure SMTP settings)
|
|
1489
|
+
# ...
|
|
1490
|
+
```
|
|
1491
|
+
|
|
1492
|
+
### Use Case 2: Data Warehouse ETL
|
|
1493
|
+
|
|
1494
|
+
```python
|
|
1495
|
+
from quickbase_extract import *
|
|
1496
|
+
import psycopg2
|
|
1497
|
+
from datetime import datetime
|
|
1498
|
+
|
|
1499
|
+
class QuickbaseToPostgresETL:
|
|
1500
|
+
"""ETL pipeline from Quickbase to PostgreSQL."""
|
|
1501
|
+
|
|
1502
|
+
def __init__(self, qb_client, pg_conn_string):
|
|
1503
|
+
self.qb_client = qb_client
|
|
1504
|
+
self.pg_conn = psycopg2.connect(pg_conn_string)
|
|
1505
|
+
|
|
1506
|
+
def extract(self, metadata, report_descs):
|
|
1507
|
+
"""Extract data from Quickbase."""
|
|
1508
|
+
return get_data_parallel(
|
|
1509
|
+
self.qb_client,
|
|
1510
|
+
metadata,
|
|
1511
|
+
report_descs,
|
|
1512
|
+
cache=False # Always fresh for ETL
|
|
1513
|
+
)
|
|
1514
|
+
|
|
1515
|
+
def transform(self, data):
|
|
1516
|
+
"""Transform data for warehouse schema."""
|
|
1517
|
+
transformed = {}
|
|
1518
|
+
|
|
1519
|
+
for report_desc, records in data.items():
|
|
1520
|
+
# Clean and transform each record
|
|
1521
|
+
cleaned = []
|
|
1522
|
+
for record in records:
|
|
1523
|
+
cleaned_record = {
|
|
1524
|
+
# Normalize field names
|
|
1525
|
+
k.lower().replace(" ", "_"): v
|
|
1526
|
+
for k, v in record.items()
|
|
1527
|
+
}
|
|
1528
|
+
# Add metadata
|
|
1529
|
+
cleaned_record["_extracted_at"] = datetime.now().isoformat()
|
|
1530
|
+
cleaned_record["_source"] = report_desc
|
|
1531
|
+
cleaned.append(cleaned_record)
|
|
1532
|
+
|
|
1533
|
+
transformed[report_desc] = cleaned
|
|
1534
|
+
|
|
1535
|
+
return transformed
|
|
1536
|
+
|
|
1537
|
+
def load(self, data, schema="quickbase"):
|
|
1538
|
+
"""Load data into PostgreSQL."""
|
|
1539
|
+
cursor = self.pg_conn.cursor()
|
|
1540
|
+
|
|
1541
|
+
for table_name, records in data.items():
|
|
1542
|
+
if not records:
|
|
1543
|
+
continue
|
|
1544
|
+
|
|
1545
|
+
# Create/truncate table
|
|
1546
|
+
columns = list(records[0].keys())
|
|
1547
|
+
col_defs = ", ".join([f'"{col}" TEXT' for col in columns])
|
|
1548
|
+
|
|
1549
|
+
cursor.execute(f'CREATE SCHEMA IF NOT EXISTS {schema}')
|
|
1550
|
+
cursor.execute(f'DROP TABLE IF EXISTS {schema}.{table_name}')
|
|
1551
|
+
cursor.execute(f'CREATE TABLE {schema}.{table_name} ({col_defs})')
|
|
1552
|
+
|
|
1553
|
+
# Bulk insert
|
|
1554
|
+
for record in records:
|
|
1555
|
+
placeholders = ", ".join(["%s" for _ in columns])
|
|
1556
|
+
values = [record.get(col) for col in columns]
|
|
1557
|
+
cursor.execute(
|
|
1558
|
+
f'INSERT INTO {schema}.{table_name} VALUES ({placeholders})',
|
|
1559
|
+
values
|
|
1560
|
+
)
|
|
1561
|
+
|
|
1562
|
+
self.pg_conn.commit()
|
|
1563
|
+
print(f"Loaded {len(records)} records into {schema}.{table_name}")
|
|
1564
|
+
|
|
1565
|
+
cursor.close()
|
|
1566
|
+
|
|
1567
|
+
def run(self, metadata, report_descs):
|
|
1568
|
+
"""Run full ETL pipeline."""
|
|
1569
|
+
print("Starting ETL pipeline...")
|
|
1570
|
+
|
|
1571
|
+
# Extract
|
|
1572
|
+
print("Extracting...")
|
|
1573
|
+
data = self.extract(metadata, report_descs)
|
|
1574
|
+
|
|
1575
|
+
# Transform
|
|
1576
|
+
print("Transforming...")
|
|
1577
|
+
transformed = self.transform(data)
|
|
1578
|
+
|
|
1579
|
+
# Load
|
|
1580
|
+
print("Loading...")
|
|
1581
|
+
self.load(transformed)
|
|
1582
|
+
|
|
1583
|
+
print("ETL complete!")
|
|
1584
|
+
|
|
1585
|
+
return {
|
|
1586
|
+
"tables_loaded": len(transformed),
|
|
1587
|
+
"total_records": sum(len(records) for records in transformed.values())
|
|
1588
|
+
}
|
|
1589
|
+
|
|
1590
|
+
# Usage
|
|
1591
|
+
etl = QuickbaseToPostgresETL(
|
|
1592
|
+
qb_client=client,
|
|
1593
|
+
pg_conn_string="postgresql://user:pass@localhost/warehouse"
|
|
1594
|
+
)
|
|
1595
|
+
|
|
1596
|
+
result = etl.run(
|
|
1597
|
+
metadata,
|
|
1598
|
+
["customers", "orders", "products"]
|
|
1599
|
+
)
|
|
1600
|
+
```
|
|
1601
|
+
|
|
1602
|
+
### Use Case 3: Automated Data Quality Checks
|
|
1603
|
+
|
|
1604
|
+
```python
|
|
1605
|
+
from quickbase_extract import get_data
|
|
1606
|
+
from typing import List, Dict, Any
|
|
1607
|
+
|
|
1608
|
+
class DataQualityChecker:
|
|
1609
|
+
"""Run data quality checks on Quickbase data."""
|
|
1610
|
+
|
|
1611
|
+
def __init__(self, client, metadata):
|
|
1612
|
+
self.client = client
|
|
1613
|
+
self.metadata = metadata
|
|
1614
|
+
self.issues = []
|
|
1615
|
+
|
|
1616
|
+
def check_required_fields(self, report_desc, required_fields):
|
|
1617
|
+
"""Check that required fields are not empty."""
|
|
1618
|
+
data = get_data(self.client, self.metadata, report_desc)
|
|
1619
|
+
|
|
1620
|
+
for i, record in enumerate(data):
|
|
1621
|
+
for field in required_fields:
|
|
1622
|
+
if not record.get(field):
|
|
1623
|
+
self.issues.append({
|
|
1624
|
+
"report": report_desc,
|
|
1625
|
+
"record_index": i,
|
|
1626
|
+
"record_id": record.get("Record ID#"),
|
|
1627
|
+
"issue": f"Missing required field: {field}"
|
|
1628
|
+
})
|
|
1629
|
+
|
|
1630
|
+
def check_duplicates(self, report_desc, unique_field):
|
|
1631
|
+
"""Check for duplicate values in unique fields."""
|
|
1632
|
+
data = get_data(self.client, self.metadata, report_desc)
|
|
1633
|
+
|
|
1634
|
+
seen = {}
|
|
1635
|
+
for i, record in enumerate(data):
|
|
1636
|
+
value = record.get(unique_field)
|
|
1637
|
+
if value in seen:
|
|
1638
|
+
self.issues.append({
|
|
1639
|
+
"report": report_desc,
|
|
1640
|
+
"record_index": i,
|
|
1641
|
+
"record_id": record.get("Record ID#"),
|
|
1642
|
+
"issue": f"Duplicate {unique_field}: {value} (also at index {seen[value]})"
|
|
1643
|
+
})
|
|
1644
|
+
else:
|
|
1645
|
+
seen[value] = i
|
|
1646
|
+
|
|
1647
|
+
def check_value_range(self, report_desc, field, min_val=None, max_val=None):
|
|
1648
|
+
"""Check that numeric values are within expected range."""
|
|
1649
|
+
data = get_data(self.client, self.metadata, report_desc)
|
|
1650
|
+
|
|
1651
|
+
for i, record in enumerate(data):
|
|
1652
|
+
value = record.get(field)
|
|
1653
|
+
if value is not None:
|
|
1654
|
+
try:
|
|
1655
|
+
num_value = float(value)
|
|
1656
|
+
if min_val is not None and num_value < min_val:
|
|
1657
|
+
self.issues.append({
|
|
1658
|
+
"report": report_desc,
|
|
1659
|
+
"record_index": i,
|
|
1660
|
+
"record_id": record.get("Record ID#"),
|
|
1661
|
+
"issue": f"{field} below minimum: {value} < {min_val}"
|
|
1662
|
+
})
|
|
1663
|
+
if max_val is not None and num_value > max_val:
|
|
1664
|
+
self.issues.append({
|
|
1665
|
+
"report": report_desc,
|
|
1666
|
+
"record_index": i,
|
|
1667
|
+
"record_id": record.get("Record ID#"),
|
|
1668
|
+
"issue": f"{field} above maximum: {value} > {max_val}"
|
|
1669
|
+
})
|
|
1670
|
+
except ValueError:
|
|
1671
|
+
self.issues.append({
|
|
1672
|
+
"report": report_desc,
|
|
1673
|
+
"record_index": i,
|
|
1674
|
+
"record_id": record.get("Record ID#"),
|
|
1675
|
+
"issue": f"{field} is not numeric: {value}"
|
|
1676
|
+
})
|
|
1677
|
+
|
|
1678
|
+
def generate_report(self):
|
|
1679
|
+
"""Generate data quality report."""
|
|
1680
|
+
if not self.issues:
|
|
1681
|
+
return "✓ All data quality checks passed!"
|
|
1682
|
+
|
|
1683
|
+
report = f"Found {len(self.issues)} data quality issues:\n\n"
|
|
1684
|
+
for issue in self.issues:
|
|
1685
|
+
report += f"- Record {issue['record_id']} in {issue['report']}: {issue['issue']}\n"
|
|
1686
|
+
|
|
1687
|
+
return report
|
|
1688
|
+
|
|
1689
|
+
# Usage
|
|
1690
|
+
checker = DataQualityChecker(client, metadata)
|
|
1691
|
+
|
|
1692
|
+
# Run checks
|
|
1693
|
+
checker.check_required_fields("customers", ["Name", "Email"])
|
|
1694
|
+
checker.check_duplicates("customers", "Email")
|
|
1695
|
+
checker.check_value_range("orders", "Order Total", min_val=0, max_val=1000000)
|
|
1696
|
+
|
|
1697
|
+
# Get report
|
|
1698
|
+
print(checker.generate_report())
|
|
1699
|
+
```
|
|
1700
|
+
|
|
1701
|
+
## FAQ
|
|
1702
|
+
|
|
1703
|
+
### Q: How do I handle Quickbase API rate limits?
|
|
1704
|
+
|
|
1705
|
+
**A:** The package automatically retries on 429 (rate limit) errors with exponential backoff. You can also:
|
|
1706
|
+
- Reduce `max_workers` in parallel operations
|
|
1707
|
+
- Increase `max_retries` for more attempts
|
|
1708
|
+
- Cache data aggressively to reduce API calls
|
|
1709
|
+
|
|
1710
|
+
```python
|
|
1711
|
+
# Conservative approach
|
|
1712
|
+
data = get_data_parallel(client, metadata, descriptions, max_workers=2)
|
|
1713
|
+
|
|
1714
|
+
# More retries
|
|
1715
|
+
result = handle_query(client, table_id, max_retries=5)
|
|
1716
|
+
```
|
|
1717
|
+
|
|
1718
|
+
### Q: What's the difference between cache=True and loading from cache?
|
|
1719
|
+
|
|
1720
|
+
**A:**
|
|
1721
|
+
- `get_data(..., cache=True)` - Fetches from API and saves to cache
|
|
1722
|
+
- `load_data(...)` - Loads from cache only (no API call)
|
|
1723
|
+
|
|
1724
|
+
```python
|
|
1725
|
+
# Scenario 1: Fresh data needed
|
|
1726
|
+
data = get_data(client, metadata, "customers", cache=True) # API call + cache
|
|
1727
|
+
# Later in same execution
|
|
1728
|
+
data = load_data(metadata, "customers") # From cache, no API call
|
|
1729
|
+
|
|
1730
|
+
# Scenario 2: Cache-first approach
|
|
1731
|
+
try:
|
|
1732
|
+
data = load_data(metadata, "customers") # Try cache first
|
|
1733
|
+
except FileNotFoundError:
|
|
1734
|
+
data = get_data(client, metadata, "customers", cache=True) # Fallback to API
|
|
1735
|
+
```
|