quickbase-extract 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,316 @@
1
+ """Quickbase report metadata fetching and caching.
2
+
3
+ Retrieves table and report metadata from Quickbase (field mappings, report
4
+ configurations, filters) and caches them as JSON files for use by report_data.py.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ import time
10
+ from concurrent.futures import ThreadPoolExecutor, as_completed
11
+
12
+ from quickbase_extract.cache_manager import get_cache_manager
13
+ from quickbase_extract.utils import find_report, normalize_name
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def fetch_report_data(client, app_id: str, table_name: str, report_name: str) -> dict:
19
+ """Fetch report metadata from Quickbase API.
20
+
21
+ Retrieves table ID, field mappings, and report configuration from Quickbase
22
+ for the specified report.
23
+
24
+ Args:
25
+ client: Quickbase API client.
26
+ app_id: The Quickbase application ID.
27
+ table_name: Name of the table containing the report.
28
+ report_name: Name of the report to fetch.
29
+
30
+ Returns:
31
+ Dict containing:
32
+ - table_id: The table's Quickbase ID
33
+ - field_label: Mapping of field labels to field IDs
34
+ - report_id: The report's Quickbase ID
35
+ - report: Full report object with query configuration
36
+
37
+ Raises:
38
+ ValueError: If report_name is not found in the specified table.
39
+ Exception: If any Quickbase API call fails.
40
+ """
41
+ table_id = client.get_table_id(app_id, table_name=table_name)
42
+ field_label = client.get_field_label_id_map(table_id)
43
+ reports = client.get_reports(table_id)
44
+
45
+ report_id = next((r["id"] for r in reports if r["name"] == report_name), None)
46
+
47
+ if not report_id:
48
+ available = [r["name"] for r in reports]
49
+ raise ValueError(
50
+ f"Report '{report_name}' not found in table '{table_name}'. " f"Available reports: {available}"
51
+ )
52
+
53
+ report = client.get_report(table_id, report_id=report_id)
54
+
55
+ return {
56
+ "table_id": table_id,
57
+ "field_label": field_label,
58
+ "report_id": report_id,
59
+ "report": report,
60
+ }
61
+
62
+
63
+ def get_report_metadata(client, report_config: dict, cache_root=None) -> None:
64
+ """Fetch and cache table/report metadata from Quickbase.
65
+
66
+ Queries Quickbase for table ID, field mappings, report configuration,
67
+ and filter settings, then saves the result as a JSON file in the cache.
68
+
69
+ The cached metadata includes normalized names, field mappings, report
70
+ configuration, and query filters that can be used for subsequent data
71
+ operations without additional API calls.
72
+
73
+ Args:
74
+ client: Quickbase API client.
75
+ report_config: Dict with keys:
76
+ - App: Application display name
77
+ - App ID: Quickbase application ID
78
+ - Table: Table name within the application
79
+ - Report: Report name within the table
80
+ cache_root: Optional cache root path. If not provided, uses
81
+ CacheManager default.
82
+
83
+ Returns:
84
+ None. Writes metadata to cache as JSON file.
85
+
86
+ Raises:
87
+ ValueError: If report is not found in the specified table.
88
+ KeyError: If report_config is missing required keys.
89
+ Exception: If any Quickbase API call fails.
90
+
91
+ Example:
92
+ >>> config = {
93
+ ... "App": "Sales Tracker",
94
+ ... "App ID": "bq8xyx9z",
95
+ ... "Table": "Opportunities",
96
+ ... "Report": "Open Deals"
97
+ ... }
98
+ >>> get_report_metadata(qb_client, config)
99
+ """
100
+ app_name = report_config["App"]
101
+ app_id = report_config["App ID"]
102
+ table_name = report_config["Table"]
103
+ report_name = report_config["Report"]
104
+
105
+ logger.info(f"Fetching {app_name}: {table_name} - {report_name}")
106
+
107
+ # Fetch from API
108
+ data = fetch_report_data(client, app_id, table_name, report_name)
109
+
110
+ # Build metadata structure with normalized names
111
+ report_md = {
112
+ "app_name": normalize_name(app_name),
113
+ "table_name": normalize_name(table_name),
114
+ "table_id": data["table_id"],
115
+ "field_label": data["field_label"],
116
+ "report_name": normalize_name(report_name),
117
+ "report_id": data["report_id"],
118
+ "report": data["report"],
119
+ "fields": data["report"]["query"]["fields"],
120
+ "filter": data["report"]["query"]["filter"],
121
+ }
122
+
123
+ # Cache the metadata
124
+ cache_mgr = get_cache_manager(cache_root=cache_root)
125
+ md_path = cache_mgr.get_metadata_path(report_md["app_name"], report_md["table_name"], report_md["report_name"])
126
+ cache_mgr.write_file(md_path, json.dumps(report_md, indent=4))
127
+
128
+
129
+ def get_report_metadata_parallel(client, report_configs: list[dict], cache_root=None, max_workers: int = 8) -> None:
130
+ """Fetch multiple report metadata in parallel using thread pool.
131
+
132
+ Executes metadata fetching for multiple reports concurrently to improve
133
+ performance when processing large numbers of reports. Uses a fail-fast
134
+ approach: if any report fetch fails, all remaining tasks are cancelled
135
+ and the exception is raised immediately.
136
+
137
+ Args:
138
+ client: Quickbase API client. Should be thread-safe for concurrent use.
139
+ report_configs: List of dicts, each containing:
140
+ - App: Application display name
141
+ - App ID: Quickbase application ID
142
+ - Table: Table name within the application
143
+ - Report: Report name within the table
144
+ cache_root: Optional cache root path. If not provided, uses
145
+ CacheManager default.
146
+ max_workers: Maximum number of concurrent threads. Default is 8.
147
+ Adjust based on API rate limits and system resources.
148
+
149
+ Returns:
150
+ None. Each report's metadata is written to cache as a JSON file.
151
+
152
+ Raises:
153
+ ValueError: If any report is not found in its specified table.
154
+ KeyError: If any report_config is missing required keys.
155
+ Exception: First exception encountered during parallel execution.
156
+ All pending tasks are cancelled when an error occurs.
157
+
158
+ Example:
159
+ >>> configs = [
160
+ ... {
161
+ ... "App": "Sales Tracker",
162
+ ... "App ID": "bq8xyx9z",
163
+ ... "Table": "Opportunities",
164
+ ... "Report": "Open Deals"
165
+ ... },
166
+ ... {
167
+ ... "App": "Sales Tracker",
168
+ ... "App ID": "bq8xyx9z",
169
+ ... "Table": "Contacts",
170
+ ... "Report": "Active Contacts"
171
+ ... }
172
+ ... ]
173
+ >>> get_report_metadata_parallel(qb_client, configs)
174
+
175
+ Note:
176
+ - Ensure the Quickbase client can handle concurrent requests
177
+ - Consider API rate limits when setting max_workers
178
+ - All tasks are cancelled on first failure (fail-fast behavior)
179
+ """
180
+ if not report_configs:
181
+ logger.warning("No report configs provided, nothing to fetch")
182
+ return
183
+
184
+ total_reports = len(report_configs)
185
+ logger.info(f"Starting parallel fetch for {total_reports} reports with {max_workers} workers")
186
+
187
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
188
+ # Submit all tasks
189
+ future_to_config = {
190
+ executor.submit(
191
+ get_report_metadata,
192
+ client,
193
+ report_config,
194
+ cache_root,
195
+ ): f"{report_config['App']}:{report_config['Table']}:{report_config['Report']}"
196
+ for report_config in report_configs
197
+ }
198
+
199
+ # Process as they complete, fail fast on first error
200
+ for future in as_completed(future_to_config):
201
+ config_key = future_to_config[future]
202
+ try:
203
+ future.result() # Individual fetches are logged in get_report_metadata
204
+ except Exception as e:
205
+ # Cancel all remaining tasks
206
+ executor.shutdown(wait=False, cancel_futures=True)
207
+ logger.error(f"Failed to fetch metadata for {config_key}: {e}")
208
+ raise
209
+
210
+ logger.info(f"Successfully completed parallel fetch for all {total_reports} reports")
211
+
212
+
213
+ def load_report_metadata(report_desc: str, report_configs: list[dict], cache_root=None) -> dict:
214
+ """Load cached report metadata from disk.
215
+
216
+ Args:
217
+ report_desc: Unique description of a specific table report.
218
+ report_configs: List of report configuration dicts (used to find matching report).
219
+ cache_root: Optional cache root path. If not provided, uses CacheManager default.
220
+
221
+ Returns:
222
+ Dict containing table ID, field mappings, report config, and filter.
223
+
224
+ Raises:
225
+ ValueError: If no report matches the description.
226
+ FileNotFoundError: If cached metadata does not exist.
227
+ """
228
+ report = find_report(report_configs, report_desc)
229
+
230
+ cache_mgr = get_cache_manager(cache_root=cache_root)
231
+ # Must normalize names to match how they were saved
232
+ md_path = cache_mgr.get_metadata_path(
233
+ normalize_name(report["App"]), normalize_name(report["Table"]), normalize_name(report["Report"])
234
+ )
235
+
236
+ if not md_path.exists():
237
+ raise FileNotFoundError(
238
+ f"Report metadata not found for '{report_desc}'. " f"Run refresh_all() first. Expected: {md_path}"
239
+ )
240
+
241
+ return json.loads(cache_mgr.read_file(md_path))
242
+
243
+
244
+ def load_report_metadata_batch(report_configs: list[dict], cache_root=None) -> dict:
245
+ """Load metadata for all reports from cache.
246
+
247
+ Sequentially loads cached metadata for each report configuration.
248
+ This is a simple wrapper around load_report_metadata for convenience.
249
+
250
+ Args:
251
+ report_configs: List of report configuration dicts, each containing
252
+ a 'Description' key used as the lookup key.
253
+ cache_root: Optional cache root path. If not provided, uses
254
+ CacheManager default.
255
+
256
+ Returns:
257
+ Dict mapping report descriptions to their metadata dicts.
258
+
259
+ Raises:
260
+ ValueError: If any report description is not found in report_configs.
261
+ FileNotFoundError: If any report metadata is not cached.
262
+
263
+ Example:
264
+ >>> configs = [{"Description": "sales_open_deals", ...}, ...]
265
+ >>> all_metadata = load_all_report_metadata(configs)
266
+ >>> sales_metadata = all_metadata["sales_open_deals"]
267
+ """
268
+ if not report_configs:
269
+ return {}
270
+
271
+ metadata = {}
272
+ for config in report_configs:
273
+ report_desc = config["Description"]
274
+ metadata[report_desc] = load_report_metadata(report_desc, report_configs, cache_root=cache_root)
275
+ return metadata
276
+
277
+
278
+ def refresh_all(client, report_configs: list[dict], cache_root=None) -> None:
279
+ """Refresh all report metadata from Quickbase.
280
+
281
+ Fetches fresh metadata from Quickbase API for all configured reports
282
+ and updates the local cache. Uses parallel fetching for improved performance.
283
+
284
+ Args:
285
+ client: Quickbase API client.
286
+ report_configs: List of report configuration dicts with keys:
287
+ - App: Application display name
288
+ - App ID: Quickbase application ID
289
+ - Table: Table name
290
+ - Report: Report name
291
+ cache_root: Optional cache root path. If not provided, uses
292
+ CacheManager default.
293
+
294
+ Returns:
295
+ None. Updates cached metadata files on disk.
296
+
297
+ Raises:
298
+ Exception: If any metadata fetch fails. Uses fail-fast behavior.
299
+
300
+ Example:
301
+ >>> refresh_all(qb_client, report_configs)
302
+ # INFO: Refreshing metadata for 15 reports.
303
+ # INFO: Report metadata refresh time: 3.421s
304
+ """
305
+ if not report_configs:
306
+ logger.warning("No report configs provided, nothing to refresh")
307
+ return
308
+
309
+ logger.info(f"Refreshing metadata for {len(report_configs)} reports")
310
+ report_md_start = time.time()
311
+
312
+ # Use parallel fetching for performance
313
+ get_report_metadata_parallel(client, report_configs, cache_root=cache_root)
314
+
315
+ elapsed = round(time.time() - report_md_start, 3)
316
+ logger.info(f"Report metadata refresh completed in {elapsed}s")
@@ -0,0 +1,42 @@
1
+ """Utility functions for Quickbase data extraction."""
2
+
3
+ import logging
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+
8
+ def normalize_name(name: str) -> str:
9
+ """Convert a display name to a file-safe format.
10
+
11
+ Args:
12
+ name: Display name string (e.g., 'Billing Records')
13
+
14
+ Returns:
15
+ Normalized string (e.g., 'billing_records')
16
+ """
17
+ return name.lower().replace(" ", "_")
18
+
19
+
20
+ def find_report(reports: list[dict], report_desc: str) -> dict:
21
+ """Find a report by description from the report list.
22
+
23
+ Args:
24
+ reports: List of report configuration dicts.
25
+ report_desc: Description string to match.
26
+
27
+ Returns:
28
+ The first matching report dict.
29
+
30
+ Raises:
31
+ ValueError: If no report matches the description.
32
+ """
33
+ matches = [r for r in reports if r["Description"] == report_desc]
34
+
35
+ if not matches:
36
+ available = [r["Description"] for r in reports]
37
+ raise ValueError(f"No report found matching description: '{report_desc}'. Available reports: {available}")
38
+
39
+ if len(matches) > 1:
40
+ logger.warning(f"Multiple reports match '{report_desc}', using first match")
41
+
42
+ return matches[0]