metabase-migration-toolkit 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
export_metabase.py ADDED
@@ -0,0 +1,466 @@
1
+ """
2
+ Metabase Export Tool
3
+
4
+ This script connects to a source Metabase instance, traverses its collections,
5
+ and exports cards (questions) and dashboards into a structured directory layout.
6
+ It produces a `manifest.json` file that indexes the exported content, which is
7
+ used by the import script.
8
+ """
9
+
10
+ import dataclasses
11
+ import datetime
12
+ import sys
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ from tqdm import tqdm
17
+
18
+ from lib.client import MetabaseAPIError, MetabaseClient
19
+ from lib.config import ExportConfig, get_export_args
20
+ from lib.models import Card, Collection, Dashboard, Manifest, ManifestMeta
21
+ from lib.utils import (
22
+ TOOL_VERSION,
23
+ calculate_checksum,
24
+ sanitize_filename,
25
+ setup_logging,
26
+ write_json_file,
27
+ )
28
+
29
+ # Initialize logger
30
+ logger = setup_logging(__name__)
31
+
32
+
33
+ class MetabaseExporter:
34
+ """Handles the logic for exporting content from a Metabase instance."""
35
+
36
+ def __init__(self, config: ExportConfig):
37
+ self.config = config
38
+ self.client = MetabaseClient(
39
+ base_url=config.source_url,
40
+ username=config.source_username,
41
+ password=config.source_password,
42
+ session_token=config.source_session_token,
43
+ personal_token=config.source_personal_token,
44
+ )
45
+ self.export_dir = Path(config.export_dir)
46
+ self.manifest = self._initialize_manifest()
47
+ self._collection_path_map: dict[int, str] = {}
48
+ self._processed_collections: set[int] = set()
49
+ self._exported_cards: set[int] = set() # Track exported cards to prevent duplicates
50
+ self._dependency_chain: list[int] = (
51
+ []
52
+ ) # Track current dependency chain for circular detection
53
+
54
+ def _initialize_manifest(self) -> Manifest:
55
+ """Initializes the manifest with metadata."""
56
+ cli_args = dataclasses.asdict(self.config)
57
+ # Redact secrets from the manifest
58
+ for secret in ["source_password", "source_session_token", "source_personal_token"]:
59
+ if cli_args.get(secret):
60
+ cli_args[secret] = "********"
61
+
62
+ meta = ManifestMeta(
63
+ source_url=self.config.source_url,
64
+ export_timestamp=datetime.datetime.utcnow().isoformat(),
65
+ tool_version=TOOL_VERSION,
66
+ cli_args=cli_args,
67
+ )
68
+ return Manifest(meta=meta)
69
+
70
+ def run_export(self):
71
+ """Main entry point to start the export process."""
72
+ logger.info(f"Starting Metabase export from {self.config.source_url}")
73
+ logger.info(f"Export directory: {self.export_dir.resolve()}")
74
+
75
+ self.export_dir.mkdir(parents=True, exist_ok=True)
76
+
77
+ try:
78
+ logger.info("Fetching source databases...")
79
+ self._fetch_and_store_databases()
80
+
81
+ logger.info("Fetching collection tree...")
82
+ collection_tree = self.client.get_collections_tree(
83
+ params={"archived": self.config.include_archived}
84
+ )
85
+
86
+ # Filter tree if root_collection_ids are specified
87
+ if self.config.root_collection_ids:
88
+ collection_tree = [
89
+ c for c in collection_tree if c.get("id") in self.config.root_collection_ids
90
+ ]
91
+ logger.info(
92
+ f"Export restricted to root collections: {self.config.root_collection_ids}"
93
+ )
94
+
95
+ if not collection_tree:
96
+ logger.warning("No collections found to export.")
97
+ return
98
+
99
+ # Process collections recursively
100
+ self._traverse_collections(collection_tree)
101
+
102
+ # Write the final manifest file
103
+ manifest_path = self.export_dir / "manifest.json"
104
+ logger.info(f"Writing manifest to {manifest_path}")
105
+ write_json_file(self.manifest, manifest_path)
106
+
107
+ # Print summary
108
+ logger.info("=" * 80)
109
+ logger.info("Export Summary:")
110
+ logger.info(f" Collections: {len(self.manifest.collections)}")
111
+ logger.info(f" Cards: {len(self.manifest.cards)}")
112
+ logger.info(f" Dashboards: {len(self.manifest.dashboards)}")
113
+ logger.info(f" Databases: {len(self.manifest.databases)}")
114
+ logger.info("=" * 80)
115
+ logger.info("Export completed successfully.")
116
+ sys.exit(0)
117
+
118
+ except MetabaseAPIError as e:
119
+ logger.error(f"A Metabase API error occurred: {e}", exc_info=True)
120
+ sys.exit(1)
121
+ except Exception as e:
122
+ logger.error(f"An unexpected error occurred: {e}", exc_info=True)
123
+ sys.exit(2)
124
+
125
+ def _fetch_and_store_databases(self):
126
+ """Fetches all databases from the source and adds them to the manifest."""
127
+ databases_response = self.client.get_databases()
128
+
129
+ # Handle different response formats
130
+ if isinstance(databases_response, dict) and "data" in databases_response:
131
+ databases = databases_response["data"]
132
+ elif isinstance(databases_response, list):
133
+ databases = databases_response
134
+ else:
135
+ logger.error(f"Unexpected databases response format: {type(databases_response)}")
136
+ logger.debug(f"Response: {databases_response}")
137
+ databases = []
138
+
139
+ self.manifest.databases = {db["id"]: db["name"] for db in databases}
140
+ logger.info(f"Found {len(self.manifest.databases)} databases.")
141
+
142
+ def _traverse_collections(
143
+ self, collections: list[dict], parent_path: str = "", parent_id: int | None = None
144
+ ):
145
+ """Recursively traverses the collection tree and processes each collection."""
146
+ for collection_data in tqdm(collections, desc="Processing Collections"):
147
+ collection_id = collection_data.get("id")
148
+
149
+ # Skip personal collections unless explicitly included
150
+ if collection_data.get("personal_owner_id") and collection_id not in (
151
+ self.config.root_collection_ids or []
152
+ ):
153
+ logger.info(
154
+ f"Skipping personal collection '{collection_data['name']}' (ID: {collection_id})"
155
+ )
156
+ continue
157
+
158
+ # Handle "root" collection which is a special case
159
+ if isinstance(collection_id, str) and collection_id == "root":
160
+ logger.info("Processing root collection content...")
161
+ current_path = "collections"
162
+ self._process_collection_items("root", current_path)
163
+ elif isinstance(collection_id, int):
164
+ if collection_id in self._processed_collections:
165
+ continue
166
+ self._processed_collections.add(collection_id)
167
+
168
+ sanitized_name = sanitize_filename(collection_data["name"])
169
+ current_path = f"{parent_path}/{sanitized_name}".lstrip("/")
170
+ self._collection_path_map[collection_id] = current_path
171
+
172
+ # Extract parent_id from location field if not provided
173
+ # Location format: "/24/25/" means parent is 25, grandparent is 24
174
+ actual_parent_id = parent_id
175
+ if actual_parent_id is None and collection_data.get("location"):
176
+ location = collection_data["location"].strip("/")
177
+ if location:
178
+ parts = location.split("/")
179
+ if len(parts) > 0:
180
+ try:
181
+ actual_parent_id = int(parts[-1])
182
+ except (ValueError, IndexError):
183
+ pass
184
+
185
+ collection_obj = Collection(
186
+ id=collection_id,
187
+ name=collection_data["name"],
188
+ description=collection_data.get("description"),
189
+ slug=collection_data.get("slug"),
190
+ parent_id=actual_parent_id,
191
+ personal_owner_id=collection_data.get("personal_owner_id"),
192
+ path=current_path,
193
+ )
194
+ self.manifest.collections.append(collection_obj)
195
+
196
+ # Write collection metadata file
197
+ collection_meta_path = self.export_dir / current_path / "_collection.json"
198
+ write_json_file(collection_data, collection_meta_path)
199
+
200
+ logger.info(
201
+ f"Processing collection '{collection_data['name']}' (ID: {collection_id})"
202
+ )
203
+ self._process_collection_items(collection_id, current_path)
204
+
205
+ # Recurse into children, passing current collection_id as parent
206
+ if "children" in collection_data and collection_data["children"]:
207
+ self._traverse_collections(
208
+ collection_data["children"], current_path, collection_id
209
+ )
210
+
211
+ def _process_collection_items(self, collection_id: Any, base_path: str):
212
+ """Fetches and processes all items (cards, dashboards) in a single collection."""
213
+ try:
214
+ params = {"models": ["card", "dashboard"], "archived": self.config.include_archived}
215
+ items_response = self.client.get_collection_items(collection_id, params)
216
+ items = items_response.get("data", [])
217
+
218
+ if not items:
219
+ logger.debug(f"No items found in collection {collection_id}")
220
+ return
221
+
222
+ for item in items:
223
+ model = item.get("model")
224
+ if model == "card":
225
+ self._export_card_with_dependencies(item["id"], base_path)
226
+ elif model == "dashboard" and self.config.include_dashboards:
227
+ self._export_dashboard(item["id"], base_path)
228
+
229
+ except MetabaseAPIError as e:
230
+ logger.error(f"Failed to process items for collection {collection_id}: {e}")
231
+
232
+ @staticmethod
233
+ def _extract_card_dependencies(card_data: dict) -> set[int]:
234
+ """
235
+ Extracts card IDs that this card depends on (references in source-table).
236
+ Returns a set of card IDs that must be exported before this card.
237
+ """
238
+ dependencies = set()
239
+
240
+ # Check for card references in dataset_query
241
+ dataset_query = card_data.get("dataset_query", {})
242
+ query = dataset_query.get("query", {})
243
+
244
+ # Check source-table for card references (format: "card__123")
245
+ source_table = query.get("source-table")
246
+ if isinstance(source_table, str) and source_table.startswith("card__"):
247
+ try:
248
+ card_id = int(source_table.replace("card__", ""))
249
+ dependencies.add(card_id)
250
+ except ValueError:
251
+ logger.warning(f"Invalid card reference format: {source_table}")
252
+
253
+ # Recursively check joins for card references
254
+ joins = query.get("joins", [])
255
+ for join in joins:
256
+ join_source_table = join.get("source-table")
257
+ if isinstance(join_source_table, str) and join_source_table.startswith("card__"):
258
+ try:
259
+ card_id = int(join_source_table.replace("card__", ""))
260
+ dependencies.add(card_id)
261
+ except ValueError:
262
+ logger.warning(f"Invalid card reference in join: {join_source_table}")
263
+
264
+ return dependencies
265
+
266
+ def _export_card_with_dependencies(
267
+ self, card_id: int, base_path: str, dependency_chain: list[int] | None = None
268
+ ):
269
+ """
270
+ Exports a card and recursively exports all its dependencies.
271
+
272
+ Args:
273
+ card_id: The ID of the card to export
274
+ base_path: The base path for the export
275
+ dependency_chain: List of card IDs in the current dependency chain (for circular detection)
276
+ """
277
+ # Skip if already exported
278
+ if card_id in self._exported_cards:
279
+ logger.debug(f"Card {card_id} already exported, skipping")
280
+ return
281
+
282
+ # Initialize dependency chain if not provided
283
+ if dependency_chain is None:
284
+ dependency_chain = []
285
+
286
+ # Check for circular dependencies
287
+ if card_id in dependency_chain:
288
+ chain_str = " -> ".join(str(c) for c in dependency_chain + [card_id])
289
+ logger.warning(f"Circular dependency detected: {chain_str}. Breaking cycle.")
290
+ return
291
+
292
+ # Add to current chain
293
+ current_chain = dependency_chain + [card_id]
294
+
295
+ try:
296
+ logger.debug(f"Fetching card {card_id} to check dependencies")
297
+ card_data = self.client.get_card(card_id)
298
+
299
+ # Extract dependencies
300
+ dependencies = self._extract_card_dependencies(card_data)
301
+
302
+ if dependencies:
303
+ logger.info(
304
+ f"Card {card_id} ('{card_data.get('name', 'Unknown')}') depends on cards: {sorted(dependencies)}"
305
+ )
306
+
307
+ # Recursively export dependencies first
308
+ for dep_id in sorted(dependencies):
309
+ if dep_id not in self._exported_cards:
310
+ logger.info(
311
+ f" -> Exporting dependency: Card {dep_id} (required by Card {card_id})"
312
+ )
313
+
314
+ # Try to fetch the dependency card to determine its collection
315
+ try:
316
+ dep_card_data = self.client.get_card(dep_id)
317
+ dep_collection_id = dep_card_data.get("collection_id")
318
+
319
+ # Determine the base path for the dependency
320
+ if dep_collection_id and dep_collection_id in self._collection_path_map:
321
+ dep_base_path = self._collection_path_map[dep_collection_id]
322
+ else:
323
+ # Use a special "dependencies" folder for cards outside the export scope
324
+ dep_base_path = "dependencies"
325
+ logger.info(
326
+ f" Card {dep_id} is outside export scope, placing in '{dep_base_path}' folder"
327
+ )
328
+
329
+ # Check if dependency is archived
330
+ if (
331
+ dep_card_data.get("archived", False)
332
+ and not self.config.include_archived
333
+ ):
334
+ logger.warning(
335
+ f" Card {dep_id} is archived but --include-archived not set. Exporting anyway due to dependency."
336
+ )
337
+
338
+ # Recursively export the dependency
339
+ self._export_card_with_dependencies(
340
+ dep_id, dep_base_path, current_chain
341
+ )
342
+
343
+ except MetabaseAPIError as e:
344
+ logger.error(f" Failed to fetch dependency card {dep_id}: {e}")
345
+ logger.warning(
346
+ f" Card {card_id} may fail to import due to missing dependency {dep_id}"
347
+ )
348
+
349
+ # Now export the card itself
350
+ self._export_card(card_id, base_path, card_data)
351
+
352
+ except MetabaseAPIError as e:
353
+ logger.error(f"Failed to fetch card {card_id} for dependency analysis: {e}")
354
+
355
+ def _export_card(self, card_id: int, base_path: str, card_data: dict | None = None):
356
+ """
357
+ Exports a single card.
358
+
359
+ Args:
360
+ card_id: The ID of the card to export
361
+ base_path: The base path for the export
362
+ card_data: Optional pre-fetched card data (to avoid redundant API calls)
363
+ """
364
+ # Skip if already exported
365
+ if card_id in self._exported_cards:
366
+ logger.debug(f"Card {card_id} already exported, skipping")
367
+ return
368
+
369
+ try:
370
+ logger.debug(f"Exporting card ID {card_id}")
371
+
372
+ # Fetch card data if not provided
373
+ if card_data is None:
374
+ card_data = self.client.get_card(card_id)
375
+
376
+ if not card_data.get("dataset_query"):
377
+ logger.warning(
378
+ f"Card ID {card_id} ('{card_data['name']}') has no dataset_query. Skipping."
379
+ )
380
+ return
381
+
382
+ db_id = card_data.get("database_id") or card_data["dataset_query"].get("database")
383
+ if db_id is None:
384
+ logger.warning(
385
+ f"Card ID {card_id} ('{card_data['name']}') has no database ID. Skipping."
386
+ )
387
+ return
388
+
389
+ card_slug = sanitize_filename(card_data["name"])
390
+ file_path_str = f"{base_path}/cards/card_{card_id}_{card_slug}.json"
391
+ file_path = self.export_dir / file_path_str
392
+
393
+ write_json_file(card_data, file_path)
394
+ checksum = calculate_checksum(file_path)
395
+
396
+ card_obj = Card(
397
+ id=card_id,
398
+ name=card_data["name"],
399
+ collection_id=card_data.get("collection_id"),
400
+ database_id=db_id,
401
+ file_path=file_path_str,
402
+ checksum=checksum,
403
+ archived=card_data.get("archived", False),
404
+ )
405
+ self.manifest.cards.append(card_obj)
406
+
407
+ # Mark as exported
408
+ self._exported_cards.add(card_id)
409
+
410
+ logger.info(f" -> Exported Card: '{card_data['name']}' (ID: {card_id})")
411
+
412
+ except MetabaseAPIError as e:
413
+ logger.error(f"Failed to export card ID {card_id}: {e}")
414
+ except Exception as e:
415
+ logger.error(f"An unexpected error occurred while exporting card ID {card_id}: {e}")
416
+
417
+ def _export_dashboard(self, dashboard_id: int, base_path: str):
418
+ """Exports a single dashboard."""
419
+ try:
420
+ logger.debug(f"Exporting dashboard ID {dashboard_id}")
421
+ dashboard_data = self.client.get_dashboard(dashboard_id)
422
+
423
+ dash_slug = sanitize_filename(dashboard_data["name"])
424
+ file_path_str = f"{base_path}/dashboards/dash_{dashboard_id}_{dash_slug}.json"
425
+ file_path = self.export_dir / file_path_str
426
+
427
+ write_json_file(dashboard_data, file_path)
428
+ checksum = calculate_checksum(file_path)
429
+
430
+ # Extract card IDs from dashcards
431
+ card_ids = []
432
+ if dashboard_data.get("dashcards"):
433
+ for dashcard in dashboard_data["dashcards"]:
434
+ if dashcard.get("card_id"):
435
+ card_ids.append(dashcard["card_id"])
436
+
437
+ dashboard_obj = Dashboard(
438
+ id=dashboard_id,
439
+ name=dashboard_data["name"],
440
+ collection_id=dashboard_data.get("collection_id"),
441
+ ordered_cards=card_ids,
442
+ file_path=file_path_str,
443
+ checksum=checksum,
444
+ archived=dashboard_data.get("archived", False),
445
+ )
446
+ self.manifest.dashboards.append(dashboard_obj)
447
+ logger.info(f" -> Exported Dashboard: '{dashboard_data['name']}' (ID: {dashboard_id})")
448
+
449
+ except MetabaseAPIError as e:
450
+ logger.error(f"Failed to export dashboard ID {dashboard_id}: {e}")
451
+ except Exception as e:
452
+ logger.error(
453
+ f"An unexpected error occurred while exporting dashboard ID {dashboard_id}: {e}"
454
+ )
455
+
456
+
457
+ def main() -> None:
458
+ """Main entry point for the export tool."""
459
+ config = get_export_args()
460
+ setup_logging(config.log_level)
461
+ exporter = MetabaseExporter(config)
462
+ exporter.run_export()
463
+
464
+
465
+ if __name__ == "__main__":
466
+ main()