mdb-cli 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mdb/mdb.py ADDED
@@ -0,0 +1,1214 @@
1
+ """Unified CLI entry point for mdb: push, pull subcommands."""
2
+
3
+ import argparse
4
+ import csv
5
+ import os
6
+ import sqlite3
7
+ import sys
8
+ from mdb.models import FileMarkers, FeedConflict
9
+ from mdb.parser import find_tap_markers, find_feed_markers, find_adjacent_table, parse_markdown_table, resolve_scope_path, parse_header_types_with_annotation_flags, parse_cast_types, resolve_column_types, parse_post_pull_tap_query, parse_pre_push_feed_query
10
+ from mdb.formatter import format_markdown_table
11
+ from mdb.puller import execute_tap_query, ingest_feed_table, cleanup_stale_tables
12
+ from mdb.validators import validate_dql, validate_dml
13
+ from mdb.discovery import resolve_paths
14
+ from mdb.atomic import atomic_write
15
+ from mdb.filelock import batch_lock
16
+
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Helpers (T002, T003, T004)
20
+ # ---------------------------------------------------------------------------
21
+
22
+ def _collect_file_markers(filepath, include_taps, include_feeds):
23
+ """Parse a single markdown file and extract relevant markers.
24
+
25
+ Returns FileMarkers or None on file read error.
26
+ """
27
+ try:
28
+ with open(filepath, "r", encoding="utf-8") as f:
29
+ content = f.read()
30
+ except UnicodeDecodeError:
31
+ print(f"Error [{filepath}]: File is not valid UTF-8", file=sys.stderr)
32
+ return None
33
+ except FileNotFoundError:
34
+ print(f"Error [{filepath}]: File not found", file=sys.stderr)
35
+ return None
36
+ except OSError as e:
37
+ print(f"Error [{filepath}]: {e}", file=sys.stderr)
38
+ return None
39
+
40
+ lines = content.splitlines()
41
+ tap_markers = find_tap_markers(content) if include_taps else []
42
+ feed_markers = find_feed_markers(content) if include_feeds else []
43
+
44
+ return FileMarkers(
45
+ filepath=filepath,
46
+ content=content,
47
+ lines=lines,
48
+ tap_markers=tap_markers,
49
+ feed_markers=feed_markers,
50
+ )
51
+
52
+
53
+ def _print_summary(files_processed, markers_found, succeeded, failed):
54
+ """Print the final summary line."""
55
+ print(
56
+ f"Processed {files_processed} files, {markers_found} markers: "
57
+ f"{succeeded} succeeded, {failed} failed"
58
+ )
59
+
60
+
61
+ def _discover_files(include_arg: str | None) -> list[str]:
62
+ """Unified file discovery: convert --include CLI arg to sorted list of .md paths.
63
+
64
+ Args:
65
+ include_arg: Raw --include argument value, or None if not provided.
66
+
67
+ Returns:
68
+ Sorted, deduplicated list of absolute .md file paths.
69
+ """
70
+ if include_arg and include_arg.strip():
71
+ patterns = include_arg.split()
72
+ else:
73
+ patterns = ["**"]
74
+
75
+ files, warnings = resolve_paths(patterns)
76
+ for w in warnings:
77
+ print(w, file=sys.stderr)
78
+ return files
79
+
80
+
81
+ def _normalize_include_for_hash(include_arg: str | None) -> str | None:
82
+ """Normalize include_arg for database path hashing.
83
+
84
+ Returns None if include_arg is unset, empty, or equivalent to the default '**'.
85
+ Otherwise returns the raw include_arg for hashing.
86
+ """
87
+ if not include_arg or not include_arg.strip():
88
+ return None
89
+ tokens = include_arg.split()
90
+ if tokens == ["**"]:
91
+ return None
92
+ return include_arg
93
+
94
+
95
+ # ---------------------------------------------------------------------------
96
+ # Conflict detection (T005)
97
+ # ---------------------------------------------------------------------------
98
+
99
+ def _detect_feed_conflicts(all_file_markers, include_paths=None):
100
+ """Detect cross-file feed conflicts.
101
+
102
+ Groups all 🌀 markers by (resolved_path, table_name).
103
+ If markers from more than one file target the same key, that is a conflict.
104
+ Returns list of FeedConflict objects.
105
+ """
106
+ # key: (resolved_path, table_name) -> list of (filepath, line_number)
107
+ groups = {}
108
+ scope_for_key = {}
109
+ for fm in all_file_markers:
110
+ for wm in fm.feed_markers:
111
+ if wm.table_name is None:
112
+ continue
113
+ resolved = resolve_scope_path(wm.scope, fm.filepath, include_paths)
114
+ key = (resolved, wm.table_name)
115
+ groups.setdefault(key, []).append((fm.filepath, wm.line_number))
116
+ scope_for_key[key] = wm.scope
117
+
118
+ conflicts = []
119
+ for (resolved_path, table_name), entries in groups.items():
120
+ # Check if entries come from more than one file
121
+ files_seen = set(fp for fp, _ in entries)
122
+ if len(files_seen) > 1:
123
+ conflicts.append(FeedConflict(
124
+ scope=scope_for_key[(resolved_path, table_name)],
125
+ resolved_path=resolved_path,
126
+ table_name=table_name,
127
+ conflicting=entries,
128
+ ))
129
+ return conflicts
130
+
131
+
132
+ def _conflicted_keys(conflicts):
133
+ """Return a set of (resolved_path, table_name) that are conflicted."""
134
+ return set((c.resolved_path, c.table_name) for c in conflicts)
135
+
136
+
137
+ # ---------------------------------------------------------------------------
138
+ # Push subcommand (T006, T007)
139
+ # ---------------------------------------------------------------------------
140
+
141
+ def _run_push(filepaths, compaction="full", verbose=False, include_paths=None):
142
+ """Execute the push subcommand: pull phase (all 🌀 markers) first, then push phase (all 💎 markers)."""
143
+ with batch_lock(filepaths):
144
+ return _run_push_locked(filepaths, compaction, verbose, include_paths)
145
+
146
+
147
+ def _run_push_locked(filepaths, compaction="full", verbose=False, include_paths=None):
148
+ """Execute push with all filepaths already locked."""
149
+ pull_succeeded = 0
150
+ pull_failed = 0
151
+ push_succeeded = 0
152
+ push_failed = 0
153
+ files_processed = 0
154
+
155
+ # Collect all markers from all files (both feed and tap)
156
+ all_fm = []
157
+ for filepath in filepaths:
158
+ fm = _collect_file_markers(filepath, include_taps=True, include_feeds=True)
159
+ if fm is None:
160
+ pull_failed += 1
161
+ files_processed += 1
162
+ continue
163
+ files_processed += 1
164
+ all_fm.append(fm)
165
+
166
+ # --- Pull phase: detect conflicts and execute all 🌀 markers ---
167
+ conflicts = _detect_feed_conflicts(all_fm, include_paths)
168
+ conflict_keys = _conflicted_keys(conflicts)
169
+
170
+ for conflict in conflicts:
171
+ parts = [f"{fp}:{ln}" for fp, ln in conflict.conflicting]
172
+ locations = " and ".join(parts)
173
+ print(
174
+ f"CONFLICT {conflict.scope} 💾 {conflict.table_name} - "
175
+ f"conflicting writes from {locations}"
176
+ )
177
+ pull_failed += len(conflict.conflicting)
178
+
179
+ # Collect declared table names per db_path for orphan cleanup (FR-006, FR-013)
180
+ discovered_by_db = {}
181
+ for fm in all_fm:
182
+ for marker in fm.feed_markers:
183
+ if marker.table_name is None:
184
+ continue
185
+ resolved = resolve_scope_path(marker.scope, fm.filepath, include_paths)
186
+ discovered_by_db.setdefault(resolved, set()).add(marker.table_name)
187
+
188
+ # Orphan cleanup BEFORE writes (FR-013)
189
+ for db_path, discovered_tables in discovered_by_db.items():
190
+ cleanup_stale_tables(db_path, discovered_tables)
191
+
192
+ for fm in all_fm:
193
+ for marker in fm.feed_markers:
194
+ resolved = resolve_scope_path(marker.scope, fm.filepath, include_paths)
195
+
196
+ if marker.table_name is None:
197
+ msg = f"Could not parse table name from query: {marker.raw_query}"
198
+ print(
199
+ f"FAIL {marker.scope} 🌀 (unknown) - {msg} "
200
+ f"[{fm.filepath}:{marker.line_number}]"
201
+ )
202
+ print(
203
+ f"Error [{fm.filepath}:{marker.line_number}]: {msg}",
204
+ file=sys.stderr,
205
+ )
206
+ pull_failed += 1
207
+ continue
208
+
209
+ if (resolved, marker.table_name) in conflict_keys:
210
+ continue
211
+
212
+ table, table_line_idx = find_adjacent_table(
213
+ fm.lines, marker.line_number, marker.end_line_number
214
+ )
215
+ if table is None:
216
+ msg = "No markdown table found after marker"
217
+ print(
218
+ f"WARN {marker.scope} 🌀 {marker.table_name} - {msg} "
219
+ f"[{fm.filepath}:{marker.line_number}]"
220
+ )
221
+ print(
222
+ f"Error [{fm.filepath}:{marker.line_number}]: {msg}",
223
+ file=sys.stderr,
224
+ )
225
+ pull_failed += 1
226
+ continue
227
+
228
+ col_names, col_types, was_annotated, type_warnings, type_errors = \
229
+ parse_header_types_with_annotation_flags(table.columns)
230
+
231
+ for warn in type_warnings:
232
+ print(
233
+ f"WARN {warn} [{fm.filepath}:{marker.line_number}]",
234
+ file=sys.stderr,
235
+ )
236
+
237
+ if type_errors:
238
+ for err in type_errors:
239
+ print(
240
+ f"FAIL {marker.scope} 🌀 {marker.table_name} - "
241
+ f"{err} [{fm.filepath}:{marker.line_number}]"
242
+ )
243
+ print(
244
+ f"Error [{fm.filepath}:{marker.line_number}]: {err}",
245
+ file=sys.stderr,
246
+ )
247
+ pull_failed += 1
248
+ continue
249
+
250
+ cast_types, cast_errors = parse_cast_types(marker.raw_query)
251
+
252
+ if cast_errors:
253
+ for err in cast_errors:
254
+ print(
255
+ f"FAIL {marker.scope} 🌀 {marker.table_name} - "
256
+ f"{err} [{fm.filepath}:{marker.line_number}]"
257
+ )
258
+ print(
259
+ f"Error [{fm.filepath}:{marker.line_number}]: {err}",
260
+ file=sys.stderr,
261
+ )
262
+ pull_failed += 1
263
+ continue
264
+
265
+ final_types = resolve_column_types(
266
+ col_types, cast_types, len(col_names), was_annotated
267
+ )
268
+
269
+ try:
270
+ result = ingest_feed_table(
271
+ db_path=resolved,
272
+ table_name=marker.table_name,
273
+ columns=col_names,
274
+ rows=table.rows,
275
+ column_types=final_types,
276
+ )
277
+ if result.success:
278
+ if result.skipped and verbose:
279
+ print(
280
+ f"SKIP {marker.scope} 🌀 {marker.table_name} "
281
+ f"(unchanged) [{fm.filepath}:{marker.line_number}]"
282
+ )
283
+ elif result.skipped:
284
+ print(
285
+ f"OK {marker.scope} 🌀 {marker.table_name} "
286
+ f"({len(table.rows)} rows) [{fm.filepath}:{marker.line_number}]"
287
+ )
288
+ else:
289
+ print(
290
+ f"OK {marker.scope} 🌀 {marker.table_name} "
291
+ f"({result.rows_written} rows) [{fm.filepath}:{marker.line_number}]"
292
+ )
293
+ pull_succeeded += 1
294
+ else:
295
+ print(
296
+ f"FAIL {marker.scope} 🌀 {marker.table_name} - "
297
+ f"{result.error} [{fm.filepath}:{marker.line_number}]"
298
+ )
299
+ print(
300
+ f"Error [{fm.filepath}:{marker.line_number}]: {result.error}",
301
+ file=sys.stderr,
302
+ )
303
+ pull_failed += 1
304
+ except Exception as e:
305
+ print(
306
+ f"FAIL {marker.scope} 🌀 {marker.table_name} - "
307
+ f"{e} [{fm.filepath}:{marker.line_number}]"
308
+ )
309
+ print(
310
+ f"Error [{fm.filepath}:{marker.line_number}]: {e}",
311
+ file=sys.stderr,
312
+ )
313
+ pull_failed += 1
314
+
315
+ # --- Push phase: execute all 💎 markers ---
316
+ file_replacements = {}
317
+
318
+ for fm in all_fm:
319
+ replacements = []
320
+
321
+ for marker in fm.tap_markers:
322
+ db_path = resolve_scope_path(marker.scope, fm.filepath, include_paths)
323
+
324
+ validation_error = validate_dql(marker.raw_query)
325
+ if validation_error:
326
+ print(
327
+ f"FAIL {marker.scope} 💎 {marker.raw_query} - "
328
+ f"{validation_error} [{fm.filepath}:{marker.line_number}]"
329
+ )
330
+ print(
331
+ f"Error [{fm.filepath}:{marker.line_number}]: {validation_error}",
332
+ file=sys.stderr,
333
+ )
334
+ push_failed += 1
335
+ continue
336
+
337
+ table, table_line_idx = find_adjacent_table(
338
+ fm.lines, marker.line_number, marker.end_line_number
339
+ )
340
+
341
+ result = execute_tap_query(db_path, marker.raw_query)
342
+ if not result.success:
343
+ print(
344
+ f"FAIL {marker.scope} 💎 {marker.raw_query} - "
345
+ f"{result.error} [{fm.filepath}:{marker.line_number}]"
346
+ )
347
+ print(
348
+ f"Error [{fm.filepath}:{marker.line_number}]: {result.error}",
349
+ file=sys.stderr,
350
+ )
351
+ push_failed += 1
352
+ continue
353
+
354
+ new_table_str = format_markdown_table(
355
+ result.columns, result.rows, compaction=compaction
356
+ )
357
+ new_table_lines = new_table_str.split("\n")
358
+
359
+ if table is None:
360
+ # Dangling tap marker: insert new table after marker/fence
361
+ insert_after = marker.end_line_number if marker.end_line_number else marker.line_number
362
+ insert_idx = insert_after # 0-based index after marker line
363
+ new_lines = [""] + new_table_lines # blank line before table
364
+ replacements.append((insert_idx, insert_idx - 1, new_lines))
365
+ else:
366
+ table_start_idx = table.start_line - 1
367
+ table_end_idx = table.end_line - 1
368
+ replacements.append((table_start_idx, table_end_idx, new_table_lines))
369
+
370
+ row_count = len(result.rows)
371
+ print(
372
+ f"OK {marker.scope} 💎 {marker.raw_query} "
373
+ f"({row_count} rows) [{fm.filepath}:{marker.line_number}]"
374
+ )
375
+ push_succeeded += 1
376
+
377
+ if replacements:
378
+ file_replacements[fm.filepath] = (fm, replacements)
379
+
380
+ # Apply read replacements to files
381
+ for filepath, (fm, replacements) in file_replacements.items():
382
+ replacements.sort(key=lambda r: r[0], reverse=True)
383
+ lines = list(fm.lines)
384
+ for start_idx, end_idx, new_lines in replacements:
385
+ lines[start_idx:end_idx + 1] = new_lines
386
+
387
+ new_content = "\n".join(lines)
388
+ if fm.content.endswith("\n"):
389
+ new_content += "\n"
390
+ atomic_write(filepath, new_content)
391
+
392
+ succeeded = pull_succeeded + push_succeeded
393
+ failed = pull_failed + push_failed
394
+ total_markers = succeeded + failed
395
+ _print_summary(files_processed, total_markers, succeeded, failed)
396
+
397
+ if failed > 0:
398
+ return 1
399
+ return 0
400
+
401
+
402
+ # ---------------------------------------------------------------------------
403
+ # Pull subcommand (T009, T010)
404
+ # ---------------------------------------------------------------------------
405
+
406
+ def _run_pull(filepaths, verbose=False, include_paths=None):
407
+ """Execute the pull subcommand: process 🌀 markers (md-to-db)."""
408
+ succeeded = 0
409
+ failed = 0
410
+ files_processed = 0
411
+
412
+ # Collect all file markers first (for conflict detection)
413
+ all_fm = []
414
+ for filepath in filepaths:
415
+ fm = _collect_file_markers(filepath, include_taps=False, include_feeds=True)
416
+ if fm is None:
417
+ failed += 1
418
+ files_processed += 1
419
+ continue
420
+ files_processed += 1
421
+ all_fm.append(fm)
422
+
423
+ # Detect cross-file feed conflicts
424
+ conflicts = _detect_feed_conflicts(all_fm, include_paths)
425
+ conflict_keys = _conflicted_keys(conflicts)
426
+
427
+ # Report conflicts
428
+ for conflict in conflicts:
429
+ parts = [f"{fp}:{ln}" for fp, ln in conflict.conflicting]
430
+ locations = " and ".join(parts)
431
+ print(
432
+ f"CONFLICT {conflict.scope} 💾 {conflict.table_name} - "
433
+ f"conflicting writes from {locations}"
434
+ )
435
+ failed += len(conflict.conflicting)
436
+
437
+ # Collect declared table names per db_path for orphan cleanup (FR-006, FR-013)
438
+ discovered_by_db = {}
439
+ for fm in all_fm:
440
+ for marker in fm.feed_markers:
441
+ if marker.table_name is None:
442
+ continue
443
+ resolved = resolve_scope_path(marker.scope, fm.filepath, include_paths)
444
+ discovered_by_db.setdefault(resolved, set()).add(marker.table_name)
445
+
446
+ # Orphan cleanup BEFORE writes (FR-013)
447
+ for db_path, discovered_tables in discovered_by_db.items():
448
+ cleanup_stale_tables(db_path, discovered_tables)
449
+
450
+ # Process each file's feed markers
451
+ for fm in all_fm:
452
+ for marker in fm.feed_markers:
453
+ # Resolve DB path relative to markdown file
454
+ resolved = resolve_scope_path(marker.scope, fm.filepath, include_paths)
455
+
456
+ # Check table name extraction
457
+ if marker.table_name is None:
458
+ msg = f"Could not parse table name from query: {marker.raw_query}"
459
+ print(
460
+ f"FAIL {marker.scope} 🌀 (unknown) - {msg} "
461
+ f"[{fm.filepath}:{marker.line_number}]"
462
+ )
463
+ print(
464
+ f"Error [{fm.filepath}:{marker.line_number}]: {msg}",
465
+ file=sys.stderr,
466
+ )
467
+ failed += 1
468
+ continue
469
+
470
+ # Check if this marker is part of a conflict
471
+ if (resolved, marker.table_name) in conflict_keys:
472
+ # Already reported as CONFLICT above; skip
473
+ continue
474
+
475
+ # Find adjacent table
476
+ table, table_line_idx = find_adjacent_table(
477
+ fm.lines, marker.line_number, marker.end_line_number
478
+ )
479
+ if table is None:
480
+ msg = "No markdown table found after marker"
481
+ print(
482
+ f"WARN {marker.scope} 🌀 {marker.table_name} - {msg} "
483
+ f"[{fm.filepath}:{marker.line_number}]"
484
+ )
485
+ print(
486
+ f"Error [{fm.filepath}:{marker.line_number}]: {msg}",
487
+ file=sys.stderr,
488
+ )
489
+ failed += 1
490
+ continue
491
+
492
+ # Parse type annotations from headers
493
+ col_names, col_types, was_annotated, type_warnings, type_errors = \
494
+ parse_header_types_with_annotation_flags(table.columns)
495
+
496
+ # Report warnings to stderr
497
+ for warn in type_warnings:
498
+ print(
499
+ f"WARN {warn} [{fm.filepath}:{marker.line_number}]",
500
+ file=sys.stderr,
501
+ )
502
+
503
+ # Report errors and skip marker
504
+ if type_errors:
505
+ for err in type_errors:
506
+ print(
507
+ f"FAIL {marker.scope} 🌀 {marker.table_name} - "
508
+ f"{err} [{fm.filepath}:{marker.line_number}]"
509
+ )
510
+ print(
511
+ f"Error [{fm.filepath}:{marker.line_number}]: {err}",
512
+ file=sys.stderr,
513
+ )
514
+ failed += 1
515
+ continue
516
+
517
+ # Parse cast types from query marker
518
+ cast_types, cast_errors = parse_cast_types(marker.raw_query)
519
+
520
+ if cast_errors:
521
+ for err in cast_errors:
522
+ print(
523
+ f"FAIL {marker.scope} 🌀 {marker.table_name} - "
524
+ f"{err} [{fm.filepath}:{marker.line_number}]"
525
+ )
526
+ print(
527
+ f"Error [{fm.filepath}:{marker.line_number}]: {err}",
528
+ file=sys.stderr,
529
+ )
530
+ failed += 1
531
+ continue
532
+
533
+ # Resolve final column types (header precedence over cast)
534
+ final_types = resolve_column_types(
535
+ col_types, cast_types, len(col_names), was_annotated
536
+ )
537
+
538
+ # Write to database
539
+ try:
540
+ result = ingest_feed_table(
541
+ db_path=resolved,
542
+ table_name=marker.table_name,
543
+ columns=col_names,
544
+ rows=table.rows,
545
+ column_types=final_types,
546
+ )
547
+ if result.success:
548
+ if result.skipped and verbose:
549
+ print(
550
+ f"SKIP {marker.scope} 🌀 {marker.table_name} "
551
+ f"(unchanged) [{fm.filepath}:{marker.line_number}]"
552
+ )
553
+ elif result.skipped:
554
+ print(
555
+ f"OK {marker.scope} 🌀 {marker.table_name} "
556
+ f"({len(table.rows)} rows) [{fm.filepath}:{marker.line_number}]"
557
+ )
558
+ else:
559
+ print(
560
+ f"OK {marker.scope} 🌀 {marker.table_name} "
561
+ f"({result.rows_written} rows) [{fm.filepath}:{marker.line_number}]"
562
+ )
563
+ succeeded += 1
564
+ else:
565
+ print(
566
+ f"FAIL {marker.scope} 🌀 {marker.table_name} - "
567
+ f"{result.error} [{fm.filepath}:{marker.line_number}]"
568
+ )
569
+ print(
570
+ f"Error [{fm.filepath}:{marker.line_number}]: {result.error}",
571
+ file=sys.stderr,
572
+ )
573
+ failed += 1
574
+ except Exception as e:
575
+ print(
576
+ f"FAIL {marker.scope} 🌀 {marker.table_name} - "
577
+ f"{e} [{fm.filepath}:{marker.line_number}]"
578
+ )
579
+ print(
580
+ f"Error [{fm.filepath}:{marker.line_number}]: {e}",
581
+ file=sys.stderr,
582
+ )
583
+ failed += 1
584
+
585
+ total_markers = succeeded + failed
586
+ _print_summary(files_processed, total_markers, succeeded, failed)
587
+
588
+ if failed > 0:
589
+ return 1
590
+ return 0
591
+
592
+
593
+ # ---------------------------------------------------------------------------
594
+ # Post-pull-tap-query subcommand (027-pull-query)
595
+ # ---------------------------------------------------------------------------
596
+
597
+ def _run_pull_with_tap_query(query, include_paths, cwd, verbose=False):
598
+ """Execute the post-pull-tap-query subcommand: discover markers, pull to DB, run query, output CSV."""
599
+ # Parse scope from query argument
600
+ scope_name, sql_query = parse_post_pull_tap_query(query)
601
+
602
+ # File discovery
603
+ filepaths = _discover_files(include_paths)
604
+
605
+ if not filepaths:
606
+ if scope_name:
607
+ print(f"Error: No markers found for scope '{scope_name}'", file=sys.stderr)
608
+ else:
609
+ print("Error: No data sources found for default scope", file=sys.stderr)
610
+ return 1
611
+
612
+ normalized_include = _normalize_include_for_hash(include_paths)
613
+ with batch_lock(filepaths):
614
+ return _run_pull_with_tap_query_locked(filepaths, scope_name, sql_query, normalized_include, verbose)
615
+
616
+
617
+ def _run_pull_with_tap_query_locked(filepaths, scope_name, sql_query, include_paths, verbose=False):
618
+ """Execute post-pull-tap-query with all filepaths already locked."""
619
+ # Collect feed markers from all discovered files
620
+ all_fm = []
621
+ for filepath in filepaths:
622
+ fm = _collect_file_markers(filepath, include_taps=False, include_feeds=True)
623
+ if fm is None:
624
+ continue
625
+ all_fm.append(fm)
626
+
627
+ # Scope filtering
628
+ filtered_fm = []
629
+ for fm in all_fm:
630
+ filtered_markers = []
631
+ for marker in fm.feed_markers:
632
+ if scope_name == "":
633
+ # Implicit scope: only markers with empty/whitespace scope
634
+ if marker.scope.strip() == "":
635
+ filtered_markers.append(marker)
636
+ else:
637
+ # Explicit scope: only markers with exact scope name match
638
+ if marker.scope.strip() == scope_name:
639
+ filtered_markers.append(marker)
640
+ if filtered_markers:
641
+ # Create a copy of FileMarkers with only matching feed markers
642
+ filtered_fm.append(FileMarkers(
643
+ filepath=fm.filepath,
644
+ content=fm.content,
645
+ lines=fm.lines,
646
+ tap_markers=[],
647
+ feed_markers=filtered_markers,
648
+ ))
649
+
650
+ if not filtered_fm:
651
+ if scope_name:
652
+ print(f"Error: No markers found for scope '{scope_name}'", file=sys.stderr)
653
+ else:
654
+ print("Error: No data sources found for default scope", file=sys.stderr)
655
+ return 1
656
+
657
+ # Conflict detection
658
+ conflicts = _detect_feed_conflicts(filtered_fm, include_paths)
659
+ conflict_keys = _conflicted_keys(conflicts)
660
+
661
+ for conflict in conflicts:
662
+ parts = [f"{fp}:{ln}" for fp, ln in conflict.conflicting]
663
+ locations = " and ".join(parts)
664
+ print(
665
+ f"CONFLICT {conflict.scope} \U0001f4be {conflict.table_name} - "
666
+ f"conflicting writes from {locations}",
667
+ file=sys.stderr,
668
+ )
669
+ return 1
670
+
671
+ # Collect declared table names per db_path for orphan cleanup (FR-006, FR-013)
672
+ discovered_by_db = {}
673
+ for fm in filtered_fm:
674
+ for marker in fm.feed_markers:
675
+ if marker.table_name is None:
676
+ continue
677
+ resolved = resolve_scope_path(marker.scope, fm.filepath, include_paths)
678
+ discovered_by_db.setdefault(resolved, set()).add(marker.table_name)
679
+
680
+ # Orphan cleanup BEFORE writes (FR-013)
681
+ for db_path, discovered_tables in discovered_by_db.items():
682
+ cleanup_stale_tables(db_path, discovered_tables)
683
+
684
+ # Pull phase: feed markers to DB
685
+ pull_failed = 0
686
+ for fm in filtered_fm:
687
+ for marker in fm.feed_markers:
688
+ resolved = resolve_scope_path(marker.scope, fm.filepath, include_paths)
689
+
690
+ if marker.table_name is None:
691
+ msg = f"Could not parse table name from query: {marker.raw_query}"
692
+ print(f"Error [{fm.filepath}:{marker.line_number}]: {msg}", file=sys.stderr)
693
+ pull_failed += 1
694
+ continue
695
+
696
+ if (resolved, marker.table_name) in conflict_keys:
697
+ continue
698
+
699
+ table, table_line_idx = find_adjacent_table(
700
+ fm.lines, marker.line_number, marker.end_line_number
701
+ )
702
+ if table is None:
703
+ msg = "No markdown table found after marker"
704
+ print(f"Error [{fm.filepath}:{marker.line_number}]: {msg}", file=sys.stderr)
705
+ pull_failed += 1
706
+ continue
707
+
708
+ col_names, col_types, was_annotated, type_warnings, type_errors = \
709
+ parse_header_types_with_annotation_flags(table.columns)
710
+
711
+ for warn in type_warnings:
712
+ print(f"WARN {warn} [{fm.filepath}:{marker.line_number}]", file=sys.stderr)
713
+
714
+ if type_errors:
715
+ for err in type_errors:
716
+ print(f"Error [{fm.filepath}:{marker.line_number}]: {err}", file=sys.stderr)
717
+ pull_failed += 1
718
+ continue
719
+
720
+ cast_types, cast_errors = parse_cast_types(marker.raw_query)
721
+
722
+ if cast_errors:
723
+ for err in cast_errors:
724
+ print(f"Error [{fm.filepath}:{marker.line_number}]: {err}", file=sys.stderr)
725
+ pull_failed += 1
726
+ continue
727
+
728
+ final_types = resolve_column_types(
729
+ col_types, cast_types, len(col_names), was_annotated
730
+ )
731
+
732
+ try:
733
+ result = ingest_feed_table(
734
+ db_path=resolved,
735
+ table_name=marker.table_name,
736
+ columns=col_names,
737
+ rows=table.rows,
738
+ column_types=final_types,
739
+ )
740
+ if not result.success:
741
+ print(f"Error [{fm.filepath}:{marker.line_number}]: {result.error}", file=sys.stderr)
742
+ pull_failed += 1
743
+ except Exception as e:
744
+ print(f"Error [{fm.filepath}:{marker.line_number}]: {e}", file=sys.stderr)
745
+ pull_failed += 1
746
+
747
+ if pull_failed > 0:
748
+ return 1
749
+
750
+ # Determine target DB path
751
+ target_db_path = resolve_scope_path(scope_name, "", include_paths)
752
+
753
+ # Read-only validation
754
+ validation_error = validate_dql(sql_query)
755
+ if validation_error:
756
+ print(f"Error: Only SELECT queries are permitted. Found: {validation_error.split(': ')[-1]}", file=sys.stderr)
757
+ return 1
758
+
759
+ # Query execution
760
+ result = execute_tap_query(target_db_path, sql_query)
761
+ if not result.success:
762
+ print(f"Error: {result.error}", file=sys.stderr)
763
+ return 1
764
+
765
+ # CSV output
766
+ writer = csv.writer(sys.stdout)
767
+ writer.writerow(result.columns)
768
+ writer.writerows(result.rows)
769
+
770
+ return 0
771
+
772
+
773
+ # ---------------------------------------------------------------------------
774
+ # Pre-push-feed-query subcommand (028-push-query)
775
+ # ---------------------------------------------------------------------------
776
+
777
+ def _run_push_with_feed_query(scope_name, sql_query, include_paths, compaction, cwd, verbose=False):
778
+ """Execute the pre-push-feed-query pipeline: discover -> pull -> mutate -> push back."""
779
+ # File discovery
780
+ filepaths = _discover_files(include_paths)
781
+
782
+ if not filepaths:
783
+ if scope_name:
784
+ print(f"Error: No markers found for scope '{scope_name}'", file=sys.stderr)
785
+ else:
786
+ print("Error: No data sources found for default scope", file=sys.stderr)
787
+ return 1
788
+
789
+ normalized_include = _normalize_include_for_hash(include_paths)
790
+ with batch_lock(filepaths):
791
+ return _run_push_with_feed_query_locked(filepaths, scope_name, sql_query, normalized_include, compaction, verbose)
792
+
793
+
794
+ def _run_push_with_feed_query_locked(filepaths, scope_name, sql_query, include_paths, compaction, verbose=False):
795
+ """Execute pre-push-feed-query with all filepaths already locked."""
796
+ # Collect both feed and tap markers from all discovered files
797
+ all_fm = []
798
+ for filepath in filepaths:
799
+ fm = _collect_file_markers(filepath, include_taps=True, include_feeds=True)
800
+ if fm is None:
801
+ continue
802
+ all_fm.append(fm)
803
+
804
+ # Scope filtering: filter both 🌀 and 💎 markers by scope
805
+ filtered_fm = []
806
+ for fm in all_fm:
807
+ filtered_feed = []
808
+ filtered_tap = []
809
+ for marker in fm.feed_markers:
810
+ if scope_name == "":
811
+ if marker.scope.strip() == "":
812
+ filtered_feed.append(marker)
813
+ else:
814
+ if marker.scope.strip() == scope_name:
815
+ filtered_feed.append(marker)
816
+ for marker in fm.tap_markers:
817
+ if scope_name == "":
818
+ if marker.scope.strip() == "":
819
+ filtered_tap.append(marker)
820
+ else:
821
+ if marker.scope.strip() == scope_name:
822
+ filtered_tap.append(marker)
823
+ if filtered_feed or filtered_tap:
824
+ filtered_fm.append(FileMarkers(
825
+ filepath=fm.filepath,
826
+ content=fm.content,
827
+ lines=fm.lines,
828
+ tap_markers=filtered_tap,
829
+ feed_markers=filtered_feed,
830
+ ))
831
+
832
+ # Check for feed markers (needed for pull phase)
833
+ has_feed_markers = any(fm.feed_markers for fm in filtered_fm)
834
+ if not has_feed_markers:
835
+ if scope_name:
836
+ print(f"Error: No markers found for scope '{scope_name}'", file=sys.stderr)
837
+ else:
838
+ print("Error: No data sources found for default scope", file=sys.stderr)
839
+ return 1
840
+
841
+ # Conflict detection on feed markers
842
+ conflicts = _detect_feed_conflicts(filtered_fm, include_paths)
843
+ if conflicts:
844
+ for conflict in conflicts:
845
+ parts = [f"{fp}:{ln}" for fp, ln in conflict.conflicting]
846
+ locations = " and ".join(parts)
847
+ print(
848
+ f"CONFLICT {conflict.scope} \U0001f4be {conflict.table_name} - "
849
+ f"conflicting writes from {locations}",
850
+ file=sys.stderr,
851
+ )
852
+ return 1
853
+
854
+ # Collect declared table names per db_path for orphan cleanup (FR-006, FR-013)
855
+ discovered_by_db = {}
856
+ for fm in filtered_fm:
857
+ for marker in fm.feed_markers:
858
+ if marker.table_name is None:
859
+ continue
860
+ resolved = resolve_scope_path(marker.scope, fm.filepath, include_paths)
861
+ discovered_by_db.setdefault(resolved, set()).add(marker.table_name)
862
+
863
+ # Orphan cleanup BEFORE writes (FR-013)
864
+ for db_path, discovered_tables in discovered_by_db.items():
865
+ cleanup_stale_tables(db_path, discovered_tables)
866
+
867
+ # Pull phase: feed 🌀 marker data to DB (re-pull from markdown)
868
+ pull_failed = 0
869
+ for fm in filtered_fm:
870
+ for marker in fm.feed_markers:
871
+ resolved = resolve_scope_path(marker.scope, fm.filepath, include_paths)
872
+
873
+ if marker.table_name is None:
874
+ msg = f"Could not parse table name from query: {marker.raw_query}"
875
+ print(f"Error [{fm.filepath}:{marker.line_number}]: {msg}", file=sys.stderr)
876
+ pull_failed += 1
877
+ continue
878
+
879
+ table, table_line_idx = find_adjacent_table(
880
+ fm.lines, marker.line_number, marker.end_line_number
881
+ )
882
+ if table is None:
883
+ msg = "No markdown table found after marker"
884
+ print(f"Error [{fm.filepath}:{marker.line_number}]: {msg}", file=sys.stderr)
885
+ pull_failed += 1
886
+ continue
887
+
888
+ col_names, col_types, was_annotated, type_warnings, type_errors = \
889
+ parse_header_types_with_annotation_flags(table.columns)
890
+
891
+ for warn in type_warnings:
892
+ print(f"WARN {warn} [{fm.filepath}:{marker.line_number}]", file=sys.stderr)
893
+
894
+ if type_errors:
895
+ for err in type_errors:
896
+ print(f"Error [{fm.filepath}:{marker.line_number}]: {err}", file=sys.stderr)
897
+ pull_failed += 1
898
+ continue
899
+
900
+ cast_types, cast_errors = parse_cast_types(marker.raw_query)
901
+
902
+ if cast_errors:
903
+ for err in cast_errors:
904
+ print(f"Error [{fm.filepath}:{marker.line_number}]: {err}", file=sys.stderr)
905
+ pull_failed += 1
906
+ continue
907
+
908
+ final_types = resolve_column_types(
909
+ col_types, cast_types, len(col_names), was_annotated
910
+ )
911
+
912
+ try:
913
+ result = ingest_feed_table(
914
+ db_path=resolved,
915
+ table_name=marker.table_name,
916
+ columns=col_names,
917
+ rows=table.rows,
918
+ column_types=final_types,
919
+ )
920
+ if result.success and result.skipped and verbose:
921
+ print(
922
+ f"SKIP {marker.scope} 🌀 {marker.table_name} "
923
+ f"(unchanged) [{fm.filepath}:{marker.line_number}]",
924
+ file=sys.stderr,
925
+ )
926
+ elif not result.success:
927
+ print(f"Error [{fm.filepath}:{marker.line_number}]: {result.error}", file=sys.stderr)
928
+ pull_failed += 1
929
+ except Exception as e:
930
+ print(f"Error [{fm.filepath}:{marker.line_number}]: {e}", file=sys.stderr)
931
+ pull_failed += 1
932
+
933
+ if pull_failed > 0:
934
+ return 1
935
+
936
+ # Determine target DB path
937
+ target_db_path = resolve_scope_path(scope_name, "", include_paths)
938
+
939
+ # Execute mutative query in a single transaction
940
+ conn = None
941
+ try:
942
+ conn = sqlite3.connect(target_db_path)
943
+ conn.execute("BEGIN TRANSACTION")
944
+ for stmt in sql_query.split(";"):
945
+ stripped = stmt.strip()
946
+ if stripped:
947
+ conn.execute(stripped)
948
+ conn.commit()
949
+ except Exception as e:
950
+ if conn:
951
+ conn.rollback()
952
+ print(f"Error: {e}", file=sys.stderr)
953
+ return 1
954
+ finally:
955
+ if conn:
956
+ conn.close()
957
+
958
+ # Push-back phase: re-execute queries for both 💎 and 🌀 markers
959
+ file_replacements = {}
960
+
961
+ for fm in filtered_fm:
962
+ replacements = []
963
+
964
+ # Push-back 💎 tap markers
965
+ for marker in fm.tap_markers:
966
+ db_path = resolve_scope_path(marker.scope, fm.filepath, include_paths)
967
+
968
+ validation_error = validate_dql(marker.raw_query)
969
+ if validation_error:
970
+ print(
971
+ f"Error [{fm.filepath}:{marker.line_number}]: {validation_error}",
972
+ file=sys.stderr,
973
+ )
974
+ continue
975
+
976
+ table, table_line_idx = find_adjacent_table(
977
+ fm.lines, marker.line_number, marker.end_line_number
978
+ )
979
+
980
+ result = execute_tap_query(db_path, marker.raw_query)
981
+ if not result.success:
982
+ print(
983
+ f"Error [{fm.filepath}:{marker.line_number}]: {result.error}",
984
+ file=sys.stderr,
985
+ )
986
+ continue
987
+
988
+ new_table_str = format_markdown_table(
989
+ result.columns, result.rows, compaction=compaction
990
+ )
991
+ new_table_lines = new_table_str.split("\n")
992
+
993
+ if table is None:
994
+ # Dangling tap marker: insert new table after marker/fence
995
+ insert_after = marker.end_line_number if marker.end_line_number else marker.line_number
996
+ insert_idx = insert_after # 0-based index after marker line
997
+ new_lines = [""] + new_table_lines # blank line before table
998
+ replacements.append((insert_idx, insert_idx - 1, new_lines))
999
+ else:
1000
+ table_start_idx = table.start_line - 1
1001
+ table_end_idx = table.end_line - 1
1002
+ replacements.append((table_start_idx, table_end_idx, new_table_lines))
1003
+
1004
+ # Push-back 🌀 feed markers (conditionally bidirectional)
1005
+ for marker in fm.feed_markers:
1006
+ db_path = resolve_scope_path(marker.scope, fm.filepath, include_paths)
1007
+
1008
+ table, table_line_idx = find_adjacent_table(
1009
+ fm.lines, marker.line_number, marker.end_line_number
1010
+ )
1011
+ if table is None:
1012
+ continue
1013
+
1014
+ # Re-execute the marker's original SELECT query against mutated DB
1015
+ result = execute_tap_query(db_path, marker.raw_query)
1016
+ if not result.success:
1017
+ print(
1018
+ f"Error [{fm.filepath}:{marker.line_number}]: {result.error}",
1019
+ file=sys.stderr,
1020
+ )
1021
+ continue
1022
+
1023
+ new_table_str = format_markdown_table(
1024
+ result.columns, result.rows, compaction=compaction
1025
+ )
1026
+ new_table_lines = new_table_str.split("\n")
1027
+
1028
+ table_start_idx = table.start_line - 1
1029
+ table_end_idx = table.end_line - 1
1030
+ replacements.append((table_start_idx, table_end_idx, new_table_lines))
1031
+
1032
+ if replacements:
1033
+ file_replacements[fm.filepath] = (fm, replacements)
1034
+
1035
+ # Write all modified files
1036
+ for filepath, (fm, replacements) in file_replacements.items():
1037
+ replacements.sort(key=lambda r: r[0], reverse=True)
1038
+ lines = list(fm.lines)
1039
+ for start_idx, end_idx, new_lines in replacements:
1040
+ lines[start_idx:end_idx + 1] = new_lines
1041
+
1042
+ new_content = "\n".join(lines)
1043
+ if fm.content.endswith("\n"):
1044
+ new_content += "\n"
1045
+ atomic_write(filepath, new_content)
1046
+
1047
+ return 0
1048
+
1049
+
1050
+ # ---------------------------------------------------------------------------
1051
+ # ASCII art banner (009-ascii-art-help)
1052
+ # ---------------------------------------------------------------------------
1053
+
1054
+ MDB_ASCII_ART = r"""
1055
+ ▒▒██▓▓█▒▙ _ _
1056
+ ▒▒██▓▓█▒▒ _ __ ___ __| | |__
1057
+ â–’â–’â–’â–’â–’â–’â–’â–’â–’ | '_ ` _ \ / _` | '_ \
1058
+ ▒███████▒ | | | | | | (_| | |_) |
1059
+ â–’â–“â–“â–“â–“â–“â–“â–“â–’ |_| |_| |_|\__,_|_.__/
1060
+ """.strip()
1061
+
1062
+
1063
+ # ---------------------------------------------------------------------------
1064
+ # Main entry point (T001)
1065
+ # ---------------------------------------------------------------------------
1066
+
1067
+ def main(argv=None):
1068
+ """Entry point for the unified mdb CLI tool."""
1069
+ parser = argparse.ArgumentParser(
1070
+ prog="mdb",
1071
+ description="Push and pull markdown tables with SQLite databases.",
1072
+ formatter_class=argparse.RawDescriptionHelpFormatter,
1073
+ epilog="""\
1074
+ examples:
1075
+ mdb push pull 🌀, then push 💎
1076
+ mdb push -i "docs/**" restrict to docs/ (recursive)
1077
+ mdb push -i "docs/*" restrict to docs/ (shallow)
1078
+ mdb pull "SELECT * FROM t" query markdown data (CSV output)
1079
+ mdb pull -i "docs/**" "SELECT ..." restrict discovery to docs/
1080
+ mdb push "INSERT INTO t ..." mutate + push back
1081
+ mdb push "scope 🌀 DELETE ..." scoped mutate + push back
1082
+ mdb pull "scope 💎 SELECT ..." scoped query
1083
+ """,
1084
+ )
1085
+
1086
+ subparsers = parser.add_subparsers(dest="subcommand")
1087
+
1088
+ # push subcommand
1089
+ push_parser = subparsers.add_parser(
1090
+ "push",
1091
+ help="Process all markers: feed (🌀) first, then tap (💎). Optionally run a mutative query.",
1092
+ )
1093
+ push_parser.add_argument(
1094
+ "-i", "--include",
1095
+ type=str,
1096
+ default=None,
1097
+ help="Space-separated glob patterns to restrict file discovery (default: **)",
1098
+ )
1099
+ push_parser.add_argument(
1100
+ "-c", "--compaction",
1101
+ choices=["full", "fit"],
1102
+ default="full",
1103
+ help="Table compaction mode: full (no padding, default) or fit (aligned columns)",
1104
+ )
1105
+ push_parser.add_argument(
1106
+ "-v", "--verbose",
1107
+ action="store_true",
1108
+ default=False,
1109
+ help="Show detailed output including skipped tables",
1110
+ )
1111
+ push_parser.add_argument(
1112
+ "query",
1113
+ nargs="?",
1114
+ default=None,
1115
+ help='Optional: DML query or "<scope_name> 🌀 <dml_query>"',
1116
+ )
1117
+
1118
+ # pull subcommand (027-pull-query: query-based interface)
1119
+ pull_parser = subparsers.add_parser(
1120
+ "pull",
1121
+ help="Query markdown table data via SQL",
1122
+ )
1123
+ pull_parser.add_argument(
1124
+ "-i", "--include",
1125
+ type=str,
1126
+ default=None,
1127
+ help="Space-separated glob patterns to restrict file discovery (default: **)",
1128
+ )
1129
+ pull_parser.add_argument(
1130
+ "query",
1131
+ help='SQL query or "<scope_name> 💎 <sql_query>"',
1132
+ )
1133
+
1134
+ # init subcommand (014-init-command, 045-init-skill-path)
1135
+ init_parser = subparsers.add_parser(
1136
+ "init",
1137
+ help="Install the mdb skill into the current project",
1138
+ formatter_class=argparse.RawDescriptionHelpFormatter,
1139
+ epilog="""\
1140
+ examples:
1141
+ mdb init write to .mdb/skills/mdb/SKILL.md (default)
1142
+ mdb init .claude/skills write to .claude/skills/mdb/SKILL.md
1143
+ mdb init path/to/skills write to path/to/skills/mdb/SKILL.md
1144
+ mdb init --force overwrite without prompting
1145
+ """,
1146
+ )
1147
+ init_parser.add_argument(
1148
+ "dir",
1149
+ nargs="?",
1150
+ default=".mdb/skills",
1151
+ metavar="DIR",
1152
+ help="Parent directory for the skill file (default: .mdb/skills). "
1153
+ "The tool writes to DIR/mdb/SKILL.md.",
1154
+ )
1155
+ init_parser.add_argument(
1156
+ "--force",
1157
+ action="store_true",
1158
+ help="Overwrite existing skill file without prompting",
1159
+ )
1160
+
1161
+ args = parser.parse_args(argv)
1162
+
1163
+ if args.subcommand is None:
1164
+ parser.print_help()
1165
+ return 2
1166
+
1167
+ # Init subcommand: dispatch BEFORE resolve_paths() since init
1168
+ # does not use path resolution (014-init-command)
1169
+ if args.subcommand == "init":
1170
+ print(MDB_ASCII_ART)
1171
+ print()
1172
+ from mdb.init import init_skill
1173
+ result = init_skill(target_dir=args.dir, force=args.force)
1174
+ return 0 if result.success else 1
1175
+
1176
+ # Pull subcommand: dispatch BEFORE resolve_paths() since pull
1177
+ # uses its own discovery logic (027-pull-query)
1178
+ if args.subcommand == "pull":
1179
+ if not args.query.strip():
1180
+ print("Error: Query argument is empty", file=sys.stderr)
1181
+ return 1
1182
+ return _run_pull_with_tap_query(args.query, args.include, os.getcwd())
1183
+
1184
+ # Push subcommand: uses its own discovery logic (028-push-query)
1185
+ if args.subcommand == "push":
1186
+ # Empty query validation (FR-011)
1187
+ if args.query is not None and args.query.strip() == "":
1188
+ print("Error: Query argument is empty", file=sys.stderr)
1189
+ return 1
1190
+
1191
+ if args.query is not None:
1192
+ # Push-query mode: parse scope, validate DML, run pipeline
1193
+ scope_name, sql_query = parse_pre_push_feed_query(args.query)
1194
+ dml_error = validate_dml(sql_query)
1195
+ if dml_error:
1196
+ print(f"Error: {dml_error}", file=sys.stderr)
1197
+ return 1
1198
+ return _run_push_with_feed_query(scope_name, sql_query, args.include, args.compaction, os.getcwd(), verbose=args.verbose)
1199
+ else:
1200
+ # Standard push: discover files, run pull-then-push
1201
+ filepaths = _discover_files(args.include)
1202
+
1203
+ if not filepaths:
1204
+ print("No markdown files found")
1205
+ return 0
1206
+
1207
+ normalized_include = _normalize_include_for_hash(args.include)
1208
+ return _run_push(filepaths, compaction=args.compaction, verbose=args.verbose, include_paths=normalized_include)
1209
+
1210
+ return 2
1211
+
1212
+
1213
+ if __name__ == "__main__":
1214
+ sys.exit(main())