opencode-skills-collection 2.0.0 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/bundled-skills/.antigravity-install-manifest.json +6 -1
  2. package/bundled-skills/docs/integrations/jetski-cortex.md +3 -3
  3. package/bundled-skills/docs/integrations/jetski-gemini-loader/README.md +1 -1
  4. package/bundled-skills/docs/maintainers/repo-growth-seo.md +3 -3
  5. package/bundled-skills/docs/maintainers/skills-update-guide.md +1 -1
  6. package/bundled-skills/docs/users/bundles.md +1 -1
  7. package/bundled-skills/docs/users/claude-code-skills.md +1 -1
  8. package/bundled-skills/docs/users/gemini-cli-skills.md +1 -1
  9. package/bundled-skills/docs/users/getting-started.md +1 -1
  10. package/bundled-skills/docs/users/kiro-integration.md +1 -1
  11. package/bundled-skills/docs/users/usage.md +4 -4
  12. package/bundled-skills/docs/users/visual-guide.md +4 -4
  13. package/bundled-skills/manage-skills/SKILL.md +187 -0
  14. package/bundled-skills/monte-carlo-monitor-creation/SKILL.md +222 -0
  15. package/bundled-skills/monte-carlo-monitor-creation/references/comparison-monitor.md +426 -0
  16. package/bundled-skills/monte-carlo-monitor-creation/references/custom-sql-monitor.md +207 -0
  17. package/bundled-skills/monte-carlo-monitor-creation/references/metric-monitor.md +292 -0
  18. package/bundled-skills/monte-carlo-monitor-creation/references/table-monitor.md +231 -0
  19. package/bundled-skills/monte-carlo-monitor-creation/references/validation-monitor.md +404 -0
  20. package/bundled-skills/monte-carlo-prevent/SKILL.md +252 -0
  21. package/bundled-skills/monte-carlo-prevent/references/TROUBLESHOOTING.md +23 -0
  22. package/bundled-skills/monte-carlo-prevent/references/parameters.md +32 -0
  23. package/bundled-skills/monte-carlo-prevent/references/workflows.md +478 -0
  24. package/bundled-skills/monte-carlo-push-ingestion/SKILL.md +363 -0
  25. package/bundled-skills/monte-carlo-push-ingestion/references/anomaly-detection.md +87 -0
  26. package/bundled-skills/monte-carlo-push-ingestion/references/custom-lineage.md +203 -0
  27. package/bundled-skills/monte-carlo-push-ingestion/references/direct-http-api.md +207 -0
  28. package/bundled-skills/monte-carlo-push-ingestion/references/prerequisites.md +150 -0
  29. package/bundled-skills/monte-carlo-push-ingestion/references/push-lineage.md +160 -0
  30. package/bundled-skills/monte-carlo-push-ingestion/references/push-metadata.md +158 -0
  31. package/bundled-skills/monte-carlo-push-ingestion/references/push-query-logs.md +219 -0
  32. package/bundled-skills/monte-carlo-push-ingestion/references/validation.md +257 -0
  33. package/bundled-skills/monte-carlo-push-ingestion/scripts/sample_verify.py +357 -0
  34. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_and_push_lineage.py +70 -0
  35. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_and_push_metadata.py +65 -0
  36. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_and_push_query_logs.py +70 -0
  37. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_lineage.py +214 -0
  38. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_metadata.py +160 -0
  39. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_query_logs.py +164 -0
  40. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/push_lineage.py +198 -0
  41. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/push_metadata.py +193 -0
  42. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/push_query_logs.py +207 -0
  43. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/collect_and_push_metadata.py +71 -0
  44. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/collect_and_push_query_logs.py +64 -0
  45. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/collect_metadata.py +253 -0
  46. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/collect_query_logs.py +149 -0
  47. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/push_metadata.py +190 -0
  48. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/push_query_logs.py +208 -0
  49. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_and_push_lineage.py +83 -0
  50. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_and_push_metadata.py +77 -0
  51. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_and_push_query_logs.py +83 -0
  52. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_lineage.py +240 -0
  53. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_metadata.py +212 -0
  54. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_query_logs.py +204 -0
  55. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/push_lineage.py +192 -0
  56. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/push_metadata.py +178 -0
  57. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/push_query_logs.py +200 -0
  58. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_and_push_lineage.py +119 -0
  59. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_and_push_metadata.py +119 -0
  60. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_and_push_query_logs.py +117 -0
  61. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_lineage.py +265 -0
  62. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_metadata.py +313 -0
  63. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_query_logs.py +284 -0
  64. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/push_lineage.py +309 -0
  65. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/push_metadata.py +245 -0
  66. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/push_query_logs.py +255 -0
  67. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_and_push_lineage.py +78 -0
  68. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_and_push_metadata.py +80 -0
  69. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_and_push_query_logs.py +88 -0
  70. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_lineage.py +235 -0
  71. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_metadata.py +219 -0
  72. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_query_logs.py +239 -0
  73. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/push_lineage.py +178 -0
  74. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/push_metadata.py +178 -0
  75. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/push_query_logs.py +196 -0
  76. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_and_push_lineage.py +154 -0
  77. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_and_push_metadata.py +137 -0
  78. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_and_push_query_logs.py +137 -0
  79. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_lineage.py +349 -0
  80. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_metadata.py +329 -0
  81. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_query_logs.py +254 -0
  82. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/push_lineage.py +307 -0
  83. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/push_metadata.py +228 -0
  84. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/push_query_logs.py +248 -0
  85. package/bundled-skills/monte-carlo-push-ingestion/scripts/test_template_sdk_usage.py +340 -0
  86. package/bundled-skills/monte-carlo-validation-notebook/SKILL.md +685 -0
  87. package/bundled-skills/monte-carlo-validation-notebook/scripts/generate_notebook_url.py +141 -0
  88. package/bundled-skills/monte-carlo-validation-notebook/scripts/resolve_dbt_schema.py +161 -0
  89. package/package.json +1 -1
  90. package/skills_index.json +503 -61
@@ -0,0 +1,357 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Monte Carlo Push Ingestion — Verification Helper
4
+
5
+ Queries the Monte Carlo GraphQL API to verify that pushed metadata, lineage, and
6
+ query logs are visible in the platform.
7
+
8
+ Prerequisites:
9
+ pip install requests
10
+
11
+ Set environment variables:
12
+ MCD_ID — GraphQL API key ID (from getmontecarlo.com/settings/api)
13
+ MCD_TOKEN — GraphQL API key secret
14
+ MCD_RESOURCE_UUID — Your MC warehouse/resource UUID
15
+
16
+ Usage:
17
+ python sample_verify.py \
18
+ --full-table-id "analytics:public.orders" \
19
+ --check-schema \
20
+ --check-metrics \
21
+ --check-detectors \
22
+ --check-lineage \
23
+ --expected-sources "analytics:public.customers" "analytics:public.raw_orders"
24
+ """
25
+
26
+ import argparse
27
+ import json
28
+ import os
29
+ import sys
30
+ from datetime import datetime, timedelta, timezone
31
+
32
+ import requests
33
+
34
+ GRAPHQL_URL = "https://api.getmontecarlo.com/graphql"
35
+
36
+
37
+ def graphql(query: str, variables: dict, key_id: str, key_token: str) -> dict:
38
+ """Execute a GraphQL query/mutation and return the data payload."""
39
+ resp = requests.post(
40
+ GRAPHQL_URL,
41
+ json={"query": query, "variables": variables},
42
+ headers={
43
+ "x-mcd-id": key_id,
44
+ "x-mcd-token": key_token,
45
+ "Content-Type": "application/json",
46
+ },
47
+ timeout=30,
48
+ )
49
+ resp.raise_for_status()
50
+ body = resp.json()
51
+ if "errors" in body:
52
+ raise RuntimeError(json.dumps(body["errors"], indent=2))
53
+ return body["data"]
54
+
55
+
56
+ # ---------------------------------------------------------------------------
57
+ # Step 1: Resolve MCON from fullTableId
58
+ # ---------------------------------------------------------------------------
59
+
60
+ def get_table_mcon(full_table_id: str, dw_id: str, key_id: str, key_token: str) -> str:
61
+ """Resolve a fullTableId + warehouse UUID to an MCON."""
62
+ data = graphql(
63
+ """query GetTable($fullTableId: String!, $dwId: UUID!) {
64
+ getTable(fullTableId: $fullTableId, dwId: $dwId) {
65
+ mcon fullTableId displayName
66
+ }
67
+ }""",
68
+ {"fullTableId": full_table_id, "dwId": dw_id},
69
+ key_id, key_token,
70
+ )
71
+ table = data.get("getTable")
72
+ if not table:
73
+ raise ValueError(f"Table not found: {full_table_id} in resource {dw_id}")
74
+ print(f" Resolved: {table['fullTableId']} → MCON: {table['mcon']}")
75
+ return table["mcon"]
76
+
77
+
78
+ # ---------------------------------------------------------------------------
79
+ # Step 2: Verify schema (columns)
80
+ # ---------------------------------------------------------------------------
81
+
82
+ def verify_schema(mcon: str, expected_fields: list[str], key_id: str, key_token: str) -> bool:
83
+ """Check that the table's column names match expected_fields."""
84
+ data = graphql(
85
+ """query GetSchema($mcon: String!) {
86
+ getTable(mcon: $mcon) {
87
+ versions {
88
+ edges {
89
+ node {
90
+ fields { name fieldType }
91
+ }
92
+ }
93
+ }
94
+ }
95
+ }""",
96
+ {"mcon": mcon},
97
+ key_id, key_token,
98
+ )
99
+ edges = (data.get("getTable") or {}).get("versions", {}).get("edges", [])
100
+ if not edges:
101
+ print(" WARN: no schema versions found")
102
+ return False
103
+ fields = edges[0]["node"]["fields"]
104
+ got_names = {f["name"].lower() for f in fields}
105
+ print(f" Schema: {len(fields)} column(s) — {', '.join(f['name'] for f in fields[:8])}{'...' if len(fields) > 8 else ''}")
106
+ if expected_fields:
107
+ missing = [e for e in expected_fields if e.lower() not in got_names]
108
+ if missing:
109
+ print(f" FAIL: missing columns: {missing}")
110
+ return False
111
+ print(f" PASS: all expected columns present")
112
+ return True
113
+
114
+
115
+ # ---------------------------------------------------------------------------
116
+ # Step 3: Verify volume/freshness metrics
117
+ # ---------------------------------------------------------------------------
118
+
119
+ def verify_metrics(mcon: str, key_id: str, key_token: str) -> None:
120
+ """Fetch and display the latest row_count and freshness metrics."""
121
+ end = datetime.now(tz=timezone.utc)
122
+ start = end - timedelta(days=7)
123
+ for metric_name in ("total_row_count", "total_row_count_last_changed_on"):
124
+ data = graphql(
125
+ """query GetMetrics($mcon: String!, $metricName: String!, $start: DateTime!, $end: DateTime!) {
126
+ getMetricsV4(dwId: null, mcon: $mcon, metricName: $metricName,
127
+ startTime: $start, endTime: $end) {
128
+ metricsJson
129
+ }
130
+ }""",
131
+ {"mcon": mcon, "metricName": metric_name,
132
+ "start": start.isoformat(), "end": end.isoformat()},
133
+ key_id, key_token,
134
+ )
135
+ metrics_json = (data.get("getMetricsV4") or {}).get("metricsJson")
136
+ if not metrics_json:
137
+ print(f" {metric_name}: no data")
138
+ continue
139
+ points = json.loads(metrics_json)
140
+ if not points:
141
+ print(f" {metric_name}: no data points")
142
+ continue
143
+ latest = max(points, key=lambda p: p.get("measurementTimestamp") or "")
144
+ val = latest.get("value")
145
+ ts = latest.get("measurementTimestamp")
146
+ if metric_name == "total_row_count_last_changed_on" and val:
147
+ ts_fmt = datetime.fromtimestamp(float(val), tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
148
+ print(f" {metric_name}: {ts_fmt}")
149
+ else:
150
+ print(f" {metric_name}: {val} (at {ts})")
151
+
152
+
153
+ # ---------------------------------------------------------------------------
154
+ # Step 3b: Verify detector status (freshness + volume)
155
+ # ---------------------------------------------------------------------------
156
+
157
+ def verify_detectors(mcon: str, key_id: str, key_token: str) -> None:
158
+ """Check the status of freshness and volume anomaly detectors."""
159
+ data = graphql(
160
+ """query GetDetectors($mcon: String!) {
161
+ getTable(mcon: $mcon) {
162
+ thresholds {
163
+ freshness { status }
164
+ size { status }
165
+ }
166
+ }
167
+ }""",
168
+ {"mcon": mcon},
169
+ key_id, key_token,
170
+ )
171
+ thresholds = (data.get("getTable") or {}).get("thresholds") or {}
172
+ freshness = thresholds.get("freshness") or {}
173
+ size = thresholds.get("size") or {}
174
+ freshness_status = freshness.get("status", "not available")
175
+ size_status = size.get("status", "not available")
176
+ print(f" Freshness detector: {freshness_status}")
177
+ print(f" Volume detector: {size_status}")
178
+ if freshness_status in ("no data", "training"):
179
+ print(" ↳ Freshness needs 7+ pushes with changed last_update_time over ~2 weeks")
180
+ if size_status in ("no data", "training"):
181
+ print(" ↳ Volume needs 10-48 samples over ~42 days (push hourly, consistently)")
182
+
183
+
184
+ # ---------------------------------------------------------------------------
185
+ # Step 4: Verify table lineage (upstream)
186
+ # ---------------------------------------------------------------------------
187
+
188
+ def verify_table_lineage(
189
+ mcon: str,
190
+ expected_source_mcons: list[str],
191
+ key_id: str,
192
+ key_token: str,
193
+ ) -> bool:
194
+ """Check that expected source MCONs appear in the upstream lineage."""
195
+ data = graphql(
196
+ """query GetLineage($mcon: String!) {
197
+ getTableLineage(mcon: $mcon, direction: "upstream", hops: 1) {
198
+ connectedNodes { mcon displayName objectType }
199
+ flattenedEdges { directlyConnectedMcons }
200
+ }
201
+ }""",
202
+ {"mcon": mcon},
203
+ key_id, key_token,
204
+ )
205
+ lineage = data.get("getTableLineage") or {}
206
+ connected = {n["mcon"] for n in lineage.get("connectedNodes", [])}
207
+ flat = {m for e in lineage.get("flattenedEdges", []) for m in e.get("directlyConnectedMcons", [])}
208
+ all_found = connected | flat
209
+ print(f" Upstream nodes: {len(connected)}")
210
+ if not expected_source_mcons:
211
+ return True
212
+ missing = [s for s in expected_source_mcons if s not in all_found]
213
+ if missing:
214
+ print(f" FAIL: missing sources: {missing}")
215
+ return False
216
+ print(" PASS: all expected sources present")
217
+ return True
218
+
219
+
220
+ # ---------------------------------------------------------------------------
221
+ # Step 5: Verify column lineage
222
+ # ---------------------------------------------------------------------------
223
+
224
+ def verify_column_lineage(
225
+ source_mcon: str,
226
+ source_column: str,
227
+ expected_dest_mcon: str,
228
+ expected_dest_column: str,
229
+ key_id: str,
230
+ key_token: str,
231
+ ) -> bool:
232
+ """Check that source_column flows to expected_dest_column on expected_dest_mcon."""
233
+ data = graphql(
234
+ """query GetColLineage($mcon: String!, $column: String!) {
235
+ getDerivedTablesPartialLineage(mcon: $mcon, column: $column, pageSize: 1000) {
236
+ destinations {
237
+ table { mcon displayName }
238
+ columns { columnName }
239
+ }
240
+ }
241
+ }""",
242
+ {"mcon": source_mcon, "column": source_column},
243
+ key_id, key_token,
244
+ )
245
+ destinations = (data.get("getDerivedTablesPartialLineage") or {}).get("destinations", [])
246
+ for dest in destinations:
247
+ if dest["table"]["mcon"] == expected_dest_mcon:
248
+ cols = {c["columnName"] for c in dest.get("columns", [])}
249
+ if expected_dest_column in cols:
250
+ print(f" PASS: {source_column} → {dest['table']['displayName']}.{expected_dest_column}")
251
+ return True
252
+ print(f" FAIL: {source_column} → {expected_dest_mcon}.{expected_dest_column} not found")
253
+ return False
254
+
255
+
256
+ # ---------------------------------------------------------------------------
257
+ # Step 6: Verify query logs
258
+ # ---------------------------------------------------------------------------
259
+
260
+ def verify_query_logs(
261
+ mcon: str,
262
+ start_time: datetime,
263
+ end_time: datetime,
264
+ key_id: str,
265
+ key_token: str,
266
+ ) -> None:
267
+ """Report read/write query counts for a table within the given time window."""
268
+ for query_type in ("read", "write"):
269
+ cursor = None
270
+ total = 0
271
+ while True:
272
+ data = graphql(
273
+ """query GetQueries($mcon: String!, $type: String!, $start: DateTime!, $end: DateTime!, $after: String) {
274
+ getAggregatedQueries(mcon: $mcon, queryType: $type,
275
+ startTime: $start, endTime: $end,
276
+ first: 200, after: $after) {
277
+ edges { node { queryHash queryCount lastSeen } }
278
+ pageInfo { hasNextPage endCursor }
279
+ }
280
+ }""",
281
+ {"mcon": mcon, "type": query_type,
282
+ "start": start_time.isoformat(), "end": end_time.isoformat(),
283
+ "after": cursor},
284
+ key_id, key_token,
285
+ )
286
+ result = data.get("getAggregatedQueries") or {}
287
+ total += sum(e["node"]["queryCount"] for e in result.get("edges", []))
288
+ page = result.get("pageInfo", {})
289
+ if not page.get("hasNextPage"):
290
+ break
291
+ cursor = page["endCursor"]
292
+ print(f" {query_type} queries: {total}")
293
+
294
+
295
+ # ---------------------------------------------------------------------------
296
+ # CLI
297
+ # ---------------------------------------------------------------------------
298
+
299
+ def main() -> None:
300
+ parser = argparse.ArgumentParser(description="Verify Monte Carlo push-ingested data via GraphQL")
301
+ parser.add_argument("--key-id", default=os.environ.get("MCD_ID"))
302
+ parser.add_argument("--key-token", default=os.environ.get("MCD_TOKEN"))
303
+ parser.add_argument("--resource-uuid", default=os.environ.get("MCD_RESOURCE_UUID"), required=False)
304
+ parser.add_argument("--full-table-id", required=True, help="e.g. analytics:public.orders")
305
+ parser.add_argument("--mcon", help="Use MCON directly instead of resolving from fullTableId")
306
+ parser.add_argument("--check-schema", action="store_true")
307
+ parser.add_argument("--check-metrics", action="store_true")
308
+ parser.add_argument("--check-detectors", action="store_true", help="Check freshness/volume detector status")
309
+ parser.add_argument("--check-lineage", action="store_true")
310
+ parser.add_argument("--check-query-logs", action="store_true")
311
+ parser.add_argument("--expected-fields", nargs="*", default=[])
312
+ parser.add_argument("--expected-sources", nargs="*", default=[], help="Source MCONs for lineage check")
313
+ parser.add_argument("--lookback-hours", type=int, default=24, help="For query log check (default: 24)")
314
+ args = parser.parse_args()
315
+
316
+ if not args.key_id or not args.key_token:
317
+ print("ERROR: Provide --key-id/--key-token or set MCD_ID/MCD_TOKEN", file=sys.stderr)
318
+ sys.exit(1)
319
+
320
+ print(f"\n{'='*60}")
321
+ print(f"Verifying: {args.full_table_id}")
322
+ print(f"{'='*60}")
323
+
324
+ mcon = args.mcon
325
+ if not mcon:
326
+ if not args.resource_uuid:
327
+ print("ERROR: --resource-uuid required when --mcon is not provided", file=sys.stderr)
328
+ sys.exit(1)
329
+ mcon = get_table_mcon(args.full_table_id, args.resource_uuid, args.key_id, args.key_token)
330
+
331
+ if args.check_schema:
332
+ print("\n[Schema]")
333
+ verify_schema(mcon, args.expected_fields, args.key_id, args.key_token)
334
+
335
+ if args.check_metrics:
336
+ print("\n[Metrics]")
337
+ verify_metrics(mcon, args.key_id, args.key_token)
338
+
339
+ if args.check_detectors:
340
+ print("\n[Detectors]")
341
+ verify_detectors(mcon, args.key_id, args.key_token)
342
+
343
+ if args.check_lineage:
344
+ print("\n[Table Lineage]")
345
+ verify_table_lineage(mcon, args.expected_sources, args.key_id, args.key_token)
346
+
347
+ if args.check_query_logs:
348
+ print("\n[Query Logs]")
349
+ end = datetime.now(tz=timezone.utc)
350
+ start = end - timedelta(hours=args.lookback_hours)
351
+ verify_query_logs(mcon, start, end, args.key_id, args.key_token)
352
+
353
+ print("\nDone.")
354
+
355
+
356
+ if __name__ == "__main__":
357
+ main()
@@ -0,0 +1,70 @@
1
+ """
2
+ BigQuery — Lineage Collection and Push (combined)
3
+ ===================================================
4
+ Imports ``collect()`` from ``collect_lineage`` and ``push()`` from
5
+ ``push_lineage``, runs both in sequence.
6
+
7
+ Substitution points (search for "← SUBSTITUTE"):
8
+ - BIGQUERY_PROJECT_ID : GCP project ID to collect from
9
+ - BIGQUERY_REGION : BigQuery region for INFORMATION_SCHEMA queries (e.g. "us", "eu")
10
+ - LOOKBACK_HOURS : how far back to scan job history (default 24 h)
11
+ - MCD_INGEST_ID / MCD_INGEST_TOKEN : Monte Carlo API credentials
12
+ - MCD_RESOURCE_UUID : UUID of the BigQuery connection in Monte Carlo
13
+
14
+ Prerequisites:
15
+ pip install google-cloud-bigquery pycarlo
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import argparse
21
+ import os
22
+
23
+ from collect_lineage import collect, LOOKBACK_HOURS
24
+ from push_lineage import push, _BATCH_SIZE
25
+
26
+
27
+ def main() -> None:
28
+ parser = argparse.ArgumentParser(description="Push BigQuery lineage to Monte Carlo")
29
+ parser.add_argument("--project-id", default=os.getenv("BIGQUERY_PROJECT_ID")) # ← SUBSTITUTE
30
+ parser.add_argument("--region", default=os.getenv("BIGQUERY_REGION", "us")) # ← SUBSTITUTE
31
+ parser.add_argument("--resource-uuid", default=os.getenv("MCD_RESOURCE_UUID"))
32
+ parser.add_argument("--key-id", default=os.getenv("MCD_INGEST_ID"))
33
+ parser.add_argument("--key-token", default=os.getenv("MCD_INGEST_TOKEN"))
34
+ parser.add_argument("--lookback-hours", type=int, default=LOOKBACK_HOURS)
35
+ parser.add_argument("--output-file", default="lineage_output.json")
36
+ parser.add_argument("--push-result-file", default="lineage_push_result.json")
37
+ parser.add_argument(
38
+ "--batch-size",
39
+ type=int,
40
+ default=_BATCH_SIZE,
41
+ help=f"Max events per push batch (default: {_BATCH_SIZE})",
42
+ )
43
+ args = parser.parse_args()
44
+
45
+ required = ["project_id", "resource_uuid", "key_id", "key_token"]
46
+ missing = [k for k in required if getattr(args, k) is None]
47
+ if missing:
48
+ parser.error(f"Missing required arguments/env vars: {missing}")
49
+
50
+ # Step 1: Collect
51
+ collect(
52
+ project_id=args.project_id,
53
+ region=args.region,
54
+ lookback_hours=args.lookback_hours,
55
+ output_file=args.output_file,
56
+ )
57
+
58
+ # Step 2: Push
59
+ push(
60
+ input_file=args.output_file,
61
+ resource_uuid=args.resource_uuid,
62
+ key_id=args.key_id,
63
+ key_token=args.key_token,
64
+ batch_size=args.batch_size,
65
+ output_file=args.push_result_file,
66
+ )
67
+
68
+
69
+ if __name__ == "__main__":
70
+ main()
@@ -0,0 +1,65 @@
1
+ """
2
+ BigQuery — Metadata Collection and Push (combined)
3
+ ===================================================
4
+ Imports ``collect()`` from ``collect_metadata`` and ``push()`` from
5
+ ``push_metadata``, runs both in sequence.
6
+
7
+ Substitution points (search for "← SUBSTITUTE"):
8
+ - BIGQUERY_PROJECT_ID : GCP project ID to collect from
9
+ - GOOGLE_APPLICATION_CREDENTIALS : path to service-account JSON key file
10
+ - MCD_INGEST_ID / MCD_INGEST_TOKEN : Monte Carlo API credentials
11
+ - MCD_RESOURCE_UUID : UUID of the BigQuery connection in Monte Carlo
12
+ - DATASET_EXCLUSIONS : datasets to skip (informational / system datasets)
13
+
14
+ Prerequisites:
15
+ pip install google-cloud-bigquery pycarlo
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import argparse
21
+ import os
22
+
23
+ from collect_metadata import collect
24
+ from push_metadata import push, _BATCH_SIZE
25
+
26
+
27
+ def main() -> None:
28
+ parser = argparse.ArgumentParser(description="Push BigQuery metadata to Monte Carlo")
29
+ parser.add_argument("--project-id", default=os.getenv("BIGQUERY_PROJECT_ID")) # ← SUBSTITUTE
30
+ parser.add_argument("--resource-uuid", default=os.getenv("MCD_RESOURCE_UUID"))
31
+ parser.add_argument("--key-id", default=os.getenv("MCD_INGEST_ID"))
32
+ parser.add_argument("--key-token", default=os.getenv("MCD_INGEST_TOKEN"))
33
+ parser.add_argument("--output-file", default="metadata_output.json")
34
+ parser.add_argument("--push-result-file", default="metadata_push_result.json")
35
+ parser.add_argument(
36
+ "--batch-size",
37
+ type=int,
38
+ default=_BATCH_SIZE,
39
+ help=f"Max assets per push batch (default: {_BATCH_SIZE})",
40
+ )
41
+ args = parser.parse_args()
42
+
43
+ missing = [k for k, v in vars(args).items() if v is None and k not in ("output_file", "push_result_file", "batch_size")]
44
+ if missing:
45
+ parser.error(f"Missing required arguments/env vars: {missing}")
46
+
47
+ # Step 1: Collect
48
+ collect(
49
+ project_id=args.project_id,
50
+ output_file=args.output_file,
51
+ )
52
+
53
+ # Step 2: Push
54
+ push(
55
+ input_file=args.output_file,
56
+ resource_uuid=args.resource_uuid,
57
+ key_id=args.key_id,
58
+ key_token=args.key_token,
59
+ batch_size=args.batch_size,
60
+ output_file=args.push_result_file,
61
+ )
62
+
63
+
64
+ if __name__ == "__main__":
65
+ main()
@@ -0,0 +1,70 @@
1
+ """
2
+ BigQuery — Query Log Collection and Push (combined)
3
+ =====================================================
4
+ Imports ``collect()`` from ``collect_query_logs`` and ``push()`` from
5
+ ``push_query_logs``, runs both in sequence.
6
+
7
+ Substitution points (search for "← SUBSTITUTE"):
8
+ - BIGQUERY_PROJECT_ID : GCP project ID to collect query logs from
9
+ - GOOGLE_APPLICATION_CREDENTIALS : path to service-account JSON key file
10
+ - LOOKBACK_HOURS : how many hours back to collect (default 25, skip last 1 h)
11
+ - MCD_INGEST_ID / MCD_INGEST_TOKEN : Monte Carlo API credentials
12
+ - MCD_RESOURCE_UUID : UUID of the BigQuery connection in Monte Carlo
13
+
14
+ Prerequisites:
15
+ pip install google-cloud-bigquery pycarlo
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import argparse
21
+ import os
22
+
23
+ from collect_query_logs import collect, LOOKBACK_HOURS, LOOKBACK_LAG_HOURS
24
+ from push_query_logs import push, _BATCH_SIZE
25
+
26
+
27
+ def main() -> None:
28
+ parser = argparse.ArgumentParser(description="Push BigQuery query logs to Monte Carlo")
29
+ parser.add_argument("--project-id", default=os.getenv("BIGQUERY_PROJECT_ID")) # ← SUBSTITUTE
30
+ parser.add_argument("--resource-uuid", default=os.getenv("MCD_RESOURCE_UUID"))
31
+ parser.add_argument("--key-id", default=os.getenv("MCD_INGEST_ID"))
32
+ parser.add_argument("--key-token", default=os.getenv("MCD_INGEST_TOKEN"))
33
+ parser.add_argument("--lookback-hours", type=int, default=LOOKBACK_HOURS)
34
+ parser.add_argument("--lookback-lag-hours", type=int, default=LOOKBACK_LAG_HOURS)
35
+ parser.add_argument("--output-file", default="query_logs_output.json")
36
+ parser.add_argument("--push-result-file", default="query_logs_push_result.json")
37
+ parser.add_argument(
38
+ "--batch-size",
39
+ type=int,
40
+ default=_BATCH_SIZE,
41
+ help=f"Max entries per push batch (default: {_BATCH_SIZE})",
42
+ )
43
+ args = parser.parse_args()
44
+
45
+ required = ["project_id", "resource_uuid", "key_id", "key_token"]
46
+ missing = [k for k in required if getattr(args, k) is None]
47
+ if missing:
48
+ parser.error(f"Missing required arguments/env vars: {missing}")
49
+
50
+ # Step 1: Collect
51
+ collect(
52
+ project_id=args.project_id,
53
+ lookback_hours=args.lookback_hours,
54
+ lookback_lag_hours=args.lookback_lag_hours,
55
+ output_file=args.output_file,
56
+ )
57
+
58
+ # Step 2: Push
59
+ push(
60
+ input_file=args.output_file,
61
+ resource_uuid=args.resource_uuid,
62
+ key_id=args.key_id,
63
+ key_token=args.key_token,
64
+ batch_size=args.batch_size,
65
+ output_file=args.push_result_file,
66
+ )
67
+
68
+
69
+ if __name__ == "__main__":
70
+ main()