opencode-skills-collection 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/bundled-skills/.antigravity-install-manifest.json +6 -1
  2. package/bundled-skills/docs/integrations/jetski-cortex.md +3 -3
  3. package/bundled-skills/docs/integrations/jetski-gemini-loader/README.md +1 -1
  4. package/bundled-skills/docs/maintainers/repo-growth-seo.md +3 -3
  5. package/bundled-skills/docs/maintainers/skills-update-guide.md +1 -1
  6. package/bundled-skills/docs/users/bundles.md +1 -1
  7. package/bundled-skills/docs/users/claude-code-skills.md +1 -1
  8. package/bundled-skills/docs/users/gemini-cli-skills.md +1 -1
  9. package/bundled-skills/docs/users/getting-started.md +1 -1
  10. package/bundled-skills/docs/users/kiro-integration.md +1 -1
  11. package/bundled-skills/docs/users/usage.md +4 -4
  12. package/bundled-skills/docs/users/visual-guide.md +4 -4
  13. package/bundled-skills/manage-skills/SKILL.md +187 -0
  14. package/bundled-skills/monte-carlo-monitor-creation/SKILL.md +222 -0
  15. package/bundled-skills/monte-carlo-monitor-creation/references/comparison-monitor.md +426 -0
  16. package/bundled-skills/monte-carlo-monitor-creation/references/custom-sql-monitor.md +207 -0
  17. package/bundled-skills/monte-carlo-monitor-creation/references/metric-monitor.md +292 -0
  18. package/bundled-skills/monte-carlo-monitor-creation/references/table-monitor.md +231 -0
  19. package/bundled-skills/monte-carlo-monitor-creation/references/validation-monitor.md +404 -0
  20. package/bundled-skills/monte-carlo-prevent/SKILL.md +252 -0
  21. package/bundled-skills/monte-carlo-prevent/references/TROUBLESHOOTING.md +23 -0
  22. package/bundled-skills/monte-carlo-prevent/references/parameters.md +32 -0
  23. package/bundled-skills/monte-carlo-prevent/references/workflows.md +478 -0
  24. package/bundled-skills/monte-carlo-push-ingestion/SKILL.md +363 -0
  25. package/bundled-skills/monte-carlo-push-ingestion/references/anomaly-detection.md +87 -0
  26. package/bundled-skills/monte-carlo-push-ingestion/references/custom-lineage.md +203 -0
  27. package/bundled-skills/monte-carlo-push-ingestion/references/direct-http-api.md +207 -0
  28. package/bundled-skills/monte-carlo-push-ingestion/references/prerequisites.md +150 -0
  29. package/bundled-skills/monte-carlo-push-ingestion/references/push-lineage.md +160 -0
  30. package/bundled-skills/monte-carlo-push-ingestion/references/push-metadata.md +158 -0
  31. package/bundled-skills/monte-carlo-push-ingestion/references/push-query-logs.md +219 -0
  32. package/bundled-skills/monte-carlo-push-ingestion/references/validation.md +257 -0
  33. package/bundled-skills/monte-carlo-push-ingestion/scripts/sample_verify.py +357 -0
  34. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_and_push_lineage.py +70 -0
  35. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_and_push_metadata.py +65 -0
  36. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_and_push_query_logs.py +70 -0
  37. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_lineage.py +214 -0
  38. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_metadata.py +160 -0
  39. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_query_logs.py +164 -0
  40. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/push_lineage.py +198 -0
  41. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/push_metadata.py +193 -0
  42. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/push_query_logs.py +207 -0
  43. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/collect_and_push_metadata.py +71 -0
  44. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/collect_and_push_query_logs.py +64 -0
  45. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/collect_metadata.py +253 -0
  46. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/collect_query_logs.py +149 -0
  47. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/push_metadata.py +190 -0
  48. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/push_query_logs.py +208 -0
  49. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_and_push_lineage.py +83 -0
  50. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_and_push_metadata.py +77 -0
  51. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_and_push_query_logs.py +83 -0
  52. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_lineage.py +240 -0
  53. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_metadata.py +212 -0
  54. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_query_logs.py +204 -0
  55. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/push_lineage.py +192 -0
  56. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/push_metadata.py +178 -0
  57. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/push_query_logs.py +200 -0
  58. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_and_push_lineage.py +119 -0
  59. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_and_push_metadata.py +119 -0
  60. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_and_push_query_logs.py +117 -0
  61. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_lineage.py +265 -0
  62. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_metadata.py +313 -0
  63. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_query_logs.py +284 -0
  64. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/push_lineage.py +309 -0
  65. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/push_metadata.py +245 -0
  66. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/push_query_logs.py +255 -0
  67. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_and_push_lineage.py +78 -0
  68. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_and_push_metadata.py +80 -0
  69. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_and_push_query_logs.py +88 -0
  70. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_lineage.py +235 -0
  71. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_metadata.py +219 -0
  72. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_query_logs.py +239 -0
  73. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/push_lineage.py +178 -0
  74. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/push_metadata.py +178 -0
  75. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/push_query_logs.py +196 -0
  76. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_and_push_lineage.py +154 -0
  77. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_and_push_metadata.py +137 -0
  78. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_and_push_query_logs.py +137 -0
  79. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_lineage.py +349 -0
  80. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_metadata.py +329 -0
  81. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_query_logs.py +254 -0
  82. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/push_lineage.py +307 -0
  83. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/push_metadata.py +228 -0
  84. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/push_query_logs.py +248 -0
  85. package/bundled-skills/monte-carlo-push-ingestion/scripts/test_template_sdk_usage.py +340 -0
  86. package/bundled-skills/monte-carlo-validation-notebook/SKILL.md +685 -0
  87. package/bundled-skills/monte-carlo-validation-notebook/scripts/generate_notebook_url.py +141 -0
  88. package/bundled-skills/monte-carlo-validation-notebook/scripts/resolve_dbt_schema.py +161 -0
  89. package/package.json +1 -1
  90. package/skills_index.json +503 -61
@@ -0,0 +1,196 @@
1
+ """
2
+ Redshift — Query Log Push (push-only)
3
+ =======================================
4
+ Reads a JSON manifest file produced by collect_query_logs.py and pushes the query
5
+ log entries to Monte Carlo via the push ingestion API, with configurable batching
6
+ to keep compressed payloads under 1 MB.
7
+
8
+ Substitution points (search for "← SUBSTITUTE"):
9
+ - MCD_INGEST_ID / MCD_INGEST_TOKEN : Monte Carlo API credentials
10
+ - MCD_RESOURCE_UUID : UUID of the Redshift connection in Monte Carlo
11
+ - PUSH_BATCH_SIZE : number of entries per API call (default 100)
12
+
13
+ Prerequisites:
14
+ pip install pycarlo
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import argparse
20
+ import json
21
+ import logging
22
+ import os
23
+ from concurrent.futures import ThreadPoolExecutor, as_completed
24
+ from datetime import datetime, timezone
25
+ from typing import Any
26
+
27
+ from dateutil.parser import isoparse
28
+ from pycarlo.core import Client, Session
29
+ from pycarlo.features.ingestion import IngestionService
30
+ from pycarlo.features.ingestion.models import QueryLogEntry
31
+
32
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
33
+ log = logging.getLogger(__name__)
34
+
35
+ LOG_TYPE = "redshift"
36
+ DEFAULT_BATCH_SIZE = 100 # ← SUBSTITUTE: conservative default to stay under 1 MB compressed
37
+
38
+ # Truncate query_text longer than this to prevent 413 errors.
39
+ # Some SQL statements (e.g., generated by BI tools) can be 100KB+ and blow up
40
+ # compressed payloads even at small batch sizes.
41
+ _MAX_QUERY_TEXT_LEN = 10_000
42
+
43
+
44
+ def _build_query_log_entries(entry_dicts: list[dict[str, Any]]) -> list[QueryLogEntry]:
45
+ """Convert manifest query dicts into QueryLogEntry objects."""
46
+ entries = []
47
+ truncated = 0
48
+ for d in entry_dicts:
49
+ query_text = d.get("query_text") or ""
50
+
51
+ # Truncate very long SQL to prevent 413 Request Too Large
52
+ if len(query_text) > _MAX_QUERY_TEXT_LEN:
53
+ query_text = query_text[:_MAX_QUERY_TEXT_LEN] + "... [TRUNCATED]"
54
+ truncated += 1
55
+
56
+ extra = {}
57
+ if d.get("database_name") is not None:
58
+ extra["database_name"] = d["database_name"]
59
+ if d.get("elapsed_time_us") is not None:
60
+ extra["elapsed_time_us"] = d["elapsed_time_us"]
61
+
62
+ start_time = d.get("start_time")
63
+ end_time = d.get("end_time")
64
+
65
+ entries.append(
66
+ QueryLogEntry(
67
+ query_id=d.get("query_id"),
68
+ query_text=query_text,
69
+ start_time=isoparse(start_time) if start_time else None,
70
+ end_time=isoparse(end_time) if end_time else None,
71
+ user=d.get("user"),
72
+ extra=extra or None,
73
+ )
74
+ )
75
+ if truncated:
76
+ log.info("Truncated %d query text(s) exceeding %d chars", truncated, _MAX_QUERY_TEXT_LEN)
77
+ return entries
78
+
79
+
80
+ def push(
81
+ manifest_path: str,
82
+ resource_uuid: str,
83
+ key_id: str,
84
+ key_token: str,
85
+ batch_size: int = DEFAULT_BATCH_SIZE,
86
+ ) -> dict[str, Any]:
87
+ """Read a collect manifest and push query log entries to Monte Carlo in batches.
88
+
89
+ Returns a summary dict with invocation IDs and counts.
90
+ """
91
+ with open(manifest_path) as fh:
92
+ manifest = json.load(fh)
93
+
94
+ entry_dicts: list[dict[str, Any]] = manifest["entries"]
95
+ entries = _build_query_log_entries(entry_dicts)
96
+ log.info("Loaded %d query log entries from %s", len(entries), manifest_path)
97
+
98
+ if not entries:
99
+ log.info("No query log entries to push.")
100
+ summary = {
101
+ "resource_uuid": resource_uuid,
102
+ "log_type": LOG_TYPE,
103
+ "invocation_ids": [],
104
+ "pushed_at": datetime.now(timezone.utc).isoformat(),
105
+ "query_log_count": 0,
106
+ "batch_count": 0,
107
+ "batch_size": batch_size,
108
+ }
109
+ push_manifest_path = manifest_path.replace(".json", "_push_result.json")
110
+ with open(push_manifest_path, "w") as fh:
111
+ json.dump(summary, fh, indent=2)
112
+ return summary
113
+
114
+ # Split into batches
115
+ batches = []
116
+ for i in range(0, len(entries), batch_size):
117
+ batches.append(entries[i : i + batch_size])
118
+ total_batches = len(batches)
119
+
120
+ def _push_batch(batch: list, batch_num: int) -> str | None:
121
+ """Push a single batch using a dedicated Session (thread-safe)."""
122
+ client = Client(session=Session(mcd_id=key_id, mcd_token=key_token, scope="Ingestion"))
123
+ service = IngestionService(mc_client=client)
124
+ result = service.send_query_logs(
125
+ resource_uuid=resource_uuid,
126
+ log_type=LOG_TYPE,
127
+ events=batch,
128
+ )
129
+ invocation_id = service.extract_invocation_id(result)
130
+ log.info("Pushed batch %d/%d (%d entries) — invocation_id=%s", batch_num, total_batches, len(batch), invocation_id)
131
+ return invocation_id
132
+
133
+ # Push batches in parallel (each thread gets its own pycarlo Session)
134
+ max_workers = min(4, total_batches)
135
+ invocation_ids: list[str | None] = [None] * total_batches
136
+
137
+ with ThreadPoolExecutor(max_workers=max_workers) as pool:
138
+ futures = {
139
+ pool.submit(_push_batch, batch, i + 1): i
140
+ for i, batch in enumerate(batches)
141
+ }
142
+ for future in as_completed(futures):
143
+ idx = futures[future]
144
+ try:
145
+ invocation_ids[idx] = future.result()
146
+ except Exception as exc:
147
+ log.error("ERROR pushing batch %d: %s", idx + 1, exc)
148
+ raise
149
+
150
+ log.info("All %d batches pushed (%d workers)", total_batches, max_workers)
151
+
152
+ summary = {
153
+ "resource_uuid": resource_uuid,
154
+ "log_type": LOG_TYPE,
155
+ "invocation_ids": invocation_ids,
156
+ "pushed_at": datetime.now(timezone.utc).isoformat(),
157
+ "query_log_count": len(entries),
158
+ "batch_count": total_batches,
159
+ "batch_size": batch_size,
160
+ "lookback_hours": manifest.get("lookback_hours"),
161
+ "lookback_lag_hours": manifest.get("lookback_lag_hours"),
162
+ }
163
+
164
+ push_manifest_path = manifest_path.replace(".json", "_push_result.json")
165
+ with open(push_manifest_path, "w") as fh:
166
+ json.dump(summary, fh, indent=2)
167
+ log.info("Push result written to %s", push_manifest_path)
168
+
169
+ return summary
170
+
171
+
172
+ def main() -> None:
173
+ parser = argparse.ArgumentParser(description="Push Redshift query logs to Monte Carlo from manifest")
174
+ parser.add_argument("--manifest", default="manifest_query_logs.json")
175
+ parser.add_argument("--resource-uuid", default=os.getenv("MCD_RESOURCE_UUID"))
176
+ parser.add_argument("--key-id", default=os.getenv("MCD_INGEST_ID"))
177
+ parser.add_argument("--key-token", default=os.getenv("MCD_INGEST_TOKEN"))
178
+ parser.add_argument("--batch-size", type=int, default=DEFAULT_BATCH_SIZE)
179
+ args = parser.parse_args()
180
+
181
+ required = ["resource_uuid", "key_id", "key_token"]
182
+ missing = [k for k in required if getattr(args, k) is None]
183
+ if missing:
184
+ parser.error(f"Missing required arguments/env vars: {missing}")
185
+
186
+ push(
187
+ manifest_path=args.manifest,
188
+ resource_uuid=args.resource_uuid,
189
+ key_id=args.key_id,
190
+ key_token=args.key_token,
191
+ batch_size=args.batch_size,
192
+ )
193
+
194
+
195
+ if __name__ == "__main__":
196
+ main()
@@ -0,0 +1,154 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Collect lineage from Snowflake and push it to Monte Carlo — combined.
4
+
5
+ Imports ``collect()`` from ``collect_lineage`` and ``push()`` from
6
+ ``push_lineage``, runs both in sequence.
7
+
8
+ Substitution points
9
+ -------------------
10
+ - SNOWFLAKE_ACCOUNT (env) / --account (CLI) : Snowflake account identifier
11
+ - SNOWFLAKE_USER (env) / --user (CLI) : Snowflake username
12
+ - SNOWFLAKE_PASSWORD (env) / --password (CLI) : Snowflake password
13
+ - SNOWFLAKE_WAREHOUSE (env) / --warehouse (CLI) : Snowflake virtual warehouse
14
+ - MCD_INGEST_ID (env) / --key-id (CLI) : Monte Carlo ingestion key ID
15
+ - MCD_INGEST_TOKEN (env) / --key-token (CLI) : Monte Carlo ingestion key token
16
+ - MCD_RESOURCE_UUID (env) / --resource-uuid (CLI) : MC resource UUID for this connection
17
+
18
+ Prerequisites
19
+ -------------
20
+ pip install pycarlo snowflake-connector-python
21
+
22
+ Usage (table-level):
23
+ python collect_and_push_lineage.py \\
24
+ --account <SNOWFLAKE_ACCOUNT> \\
25
+ --user <SNOWFLAKE_USER> \\
26
+ --password <SNOWFLAKE_PASSWORD> \\
27
+ --warehouse <SNOWFLAKE_WAREHOUSE> \\
28
+ --key-id <MCD_INGEST_ID> \\
29
+ --key-token <MCD_INGEST_TOKEN> \\
30
+ --resource-uuid <MCD_RESOURCE_UUID>
31
+
32
+ Usage (column-level):
33
+ python collect_and_push_lineage.py ... --column-lineage
34
+ """
35
+
36
+ from __future__ import annotations
37
+
38
+ import argparse
39
+ import os
40
+
41
+ from collect_lineage import collect, _LOOKBACK_HOURS
42
+ from push_lineage import push, _BATCH_SIZE
43
+
44
+
45
+ def main() -> None:
46
+ parser = argparse.ArgumentParser(
47
+ description="Collect Snowflake lineage from ACCOUNT_USAGE and push to Monte Carlo",
48
+ )
49
+ parser.add_argument(
50
+ "--account",
51
+ default=os.environ.get("SNOWFLAKE_ACCOUNT"),
52
+ help="Snowflake account identifier (env: SNOWFLAKE_ACCOUNT)",
53
+ )
54
+ parser.add_argument(
55
+ "--user",
56
+ default=os.environ.get("SNOWFLAKE_USER"),
57
+ help="Snowflake username (env: SNOWFLAKE_USER)",
58
+ )
59
+ parser.add_argument(
60
+ "--password",
61
+ default=os.environ.get("SNOWFLAKE_PASSWORD"),
62
+ help="Snowflake password (env: SNOWFLAKE_PASSWORD)",
63
+ )
64
+ parser.add_argument(
65
+ "--warehouse",
66
+ default=os.environ.get("SNOWFLAKE_WAREHOUSE"),
67
+ help="Snowflake virtual warehouse (env: SNOWFLAKE_WAREHOUSE)",
68
+ )
69
+ parser.add_argument(
70
+ "--key-id",
71
+ default=os.environ.get("MCD_INGEST_ID"),
72
+ help="Monte Carlo ingestion key ID (env: MCD_INGEST_ID)",
73
+ )
74
+ parser.add_argument(
75
+ "--key-token",
76
+ default=os.environ.get("MCD_INGEST_TOKEN"),
77
+ help="Monte Carlo ingestion key token (env: MCD_INGEST_TOKEN)",
78
+ )
79
+ parser.add_argument(
80
+ "--resource-uuid",
81
+ default=os.environ.get("MCD_RESOURCE_UUID"),
82
+ help="Monte Carlo resource UUID for this Snowflake connection (env: MCD_RESOURCE_UUID)",
83
+ )
84
+ parser.add_argument(
85
+ "--lookback-hours",
86
+ type=int,
87
+ default=_LOOKBACK_HOURS,
88
+ help=f"Hours of QUERY_HISTORY to scan (default: {_LOOKBACK_HOURS})",
89
+ )
90
+ parser.add_argument(
91
+ "--column-lineage",
92
+ action="store_true",
93
+ help="Push column-level lineage instead of table-level",
94
+ )
95
+ parser.add_argument(
96
+ "--output-file",
97
+ default="lineage_output.json",
98
+ help="Path for the intermediate collect manifest (default: lineage_output.json)",
99
+ )
100
+ parser.add_argument(
101
+ "--push-result-file",
102
+ default="lineage_push_result.json",
103
+ help="Path to write the push result (default: lineage_push_result.json)",
104
+ )
105
+ parser.add_argument(
106
+ "--batch-size",
107
+ type=int,
108
+ default=_BATCH_SIZE,
109
+ help=f"Max events per push batch (default: {_BATCH_SIZE})",
110
+ )
111
+ args = parser.parse_args()
112
+
113
+ missing = [
114
+ name
115
+ for name, val in [
116
+ ("--account", args.account),
117
+ ("--user", args.user),
118
+ ("--password", args.password),
119
+ ("--warehouse", args.warehouse),
120
+ ("--key-id", args.key_id),
121
+ ("--key-token", args.key_token),
122
+ ("--resource-uuid", args.resource_uuid),
123
+ ]
124
+ if not val
125
+ ]
126
+ if missing:
127
+ parser.error(f"Missing required arguments: {', '.join(missing)}")
128
+
129
+ # Step 1: Collect
130
+ collect(
131
+ account=args.account,
132
+ user=args.user,
133
+ password=args.password,
134
+ warehouse=args.warehouse,
135
+ lookback_hours=args.lookback_hours,
136
+ column_lineage=args.column_lineage,
137
+ output_file=args.output_file,
138
+ )
139
+
140
+ # Step 2: Push
141
+ push(
142
+ input_file=args.output_file,
143
+ resource_uuid=args.resource_uuid,
144
+ key_id=args.key_id,
145
+ key_token=args.key_token,
146
+ batch_size=args.batch_size,
147
+ output_file=args.push_result_file,
148
+ )
149
+
150
+ print("Done.")
151
+
152
+
153
+ if __name__ == "__main__":
154
+ main()
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Collect table metadata from Snowflake and push it to Monte Carlo — combined.
4
+
5
+ Imports ``collect()`` from ``collect_metadata`` and ``push()`` from
6
+ ``push_metadata``, runs both in sequence.
7
+
8
+ Substitution points
9
+ -------------------
10
+ - SNOWFLAKE_ACCOUNT (env) / --account (CLI) : Snowflake account identifier (e.g. xy12345.us-east-1)
11
+ - SNOWFLAKE_USER (env) / --user (CLI) : Snowflake username
12
+ - SNOWFLAKE_PASSWORD (env) / --password (CLI) : Snowflake password
13
+ - SNOWFLAKE_WAREHOUSE (env) / --warehouse (CLI) : Snowflake virtual warehouse
14
+ - MCD_INGEST_ID (env) / --key-id (CLI) : Monte Carlo ingestion key ID
15
+ - MCD_INGEST_TOKEN (env) / --key-token (CLI) : Monte Carlo ingestion key token
16
+ - MCD_RESOURCE_UUID (env) / --resource-uuid (CLI) : MC resource UUID for this connection
17
+
18
+ Prerequisites
19
+ -------------
20
+ pip install pycarlo snowflake-connector-python
21
+
22
+ Usage
23
+ -----
24
+ python collect_and_push_metadata.py \\
25
+ --account <SNOWFLAKE_ACCOUNT> \\
26
+ --user <SNOWFLAKE_USER> \\
27
+ --password <SNOWFLAKE_PASSWORD> \\
28
+ --warehouse <SNOWFLAKE_WAREHOUSE> \\
29
+ --key-id <MCD_INGEST_ID> \\
30
+ --key-token <MCD_INGEST_TOKEN> \\
31
+ --resource-uuid <MCD_RESOURCE_UUID>
32
+ """
33
+
34
+ import argparse
35
+ import os
36
+
37
+ from collect_metadata import collect
38
+ from push_metadata import push, _BATCH_SIZE
39
+
40
+
41
+ def main() -> None:
42
+ parser = argparse.ArgumentParser(
43
+ description="Collect Snowflake table metadata and push to Monte Carlo",
44
+ )
45
+ parser.add_argument(
46
+ "--account",
47
+ default=os.environ.get("SNOWFLAKE_ACCOUNT"),
48
+ help="Snowflake account identifier, e.g. xy12345.us-east-1 (env: SNOWFLAKE_ACCOUNT)", # ← SUBSTITUTE
49
+ )
50
+ parser.add_argument(
51
+ "--user",
52
+ default=os.environ.get("SNOWFLAKE_USER"),
53
+ help="Snowflake username (env: SNOWFLAKE_USER)", # ← SUBSTITUTE
54
+ )
55
+ parser.add_argument(
56
+ "--password",
57
+ default=os.environ.get("SNOWFLAKE_PASSWORD"),
58
+ help="Snowflake password (env: SNOWFLAKE_PASSWORD)", # ← SUBSTITUTE
59
+ )
60
+ parser.add_argument(
61
+ "--warehouse",
62
+ default=os.environ.get("SNOWFLAKE_WAREHOUSE"),
63
+ help="Snowflake virtual warehouse (env: SNOWFLAKE_WAREHOUSE)", # ← SUBSTITUTE
64
+ )
65
+ parser.add_argument(
66
+ "--key-id",
67
+ default=os.environ.get("MCD_INGEST_ID"),
68
+ help="Monte Carlo ingestion key ID (env: MCD_INGEST_ID)",
69
+ )
70
+ parser.add_argument(
71
+ "--key-token",
72
+ default=os.environ.get("MCD_INGEST_TOKEN"),
73
+ help="Monte Carlo ingestion key token (env: MCD_INGEST_TOKEN)",
74
+ )
75
+ parser.add_argument(
76
+ "--resource-uuid",
77
+ default=os.environ.get("MCD_RESOURCE_UUID"),
78
+ help="Monte Carlo resource UUID for this Snowflake connection (env: MCD_RESOURCE_UUID)",
79
+ )
80
+ parser.add_argument(
81
+ "--output-file",
82
+ default="metadata_output.json",
83
+ help="Path for the intermediate collect manifest (default: metadata_output.json)",
84
+ )
85
+ parser.add_argument(
86
+ "--push-result-file",
87
+ default="metadata_push_result.json",
88
+ help="Path to write the push result (default: metadata_push_result.json)",
89
+ )
90
+ parser.add_argument(
91
+ "--batch-size",
92
+ type=int,
93
+ default=_BATCH_SIZE,
94
+ help=f"Max assets per push batch (default: {_BATCH_SIZE})",
95
+ )
96
+ args = parser.parse_args()
97
+
98
+ missing = [
99
+ name
100
+ for name, val in [
101
+ ("--account", args.account),
102
+ ("--user", args.user),
103
+ ("--password", args.password),
104
+ ("--warehouse", args.warehouse),
105
+ ("--key-id", args.key_id),
106
+ ("--key-token", args.key_token),
107
+ ("--resource-uuid", args.resource_uuid),
108
+ ]
109
+ if not val
110
+ ]
111
+ if missing:
112
+ parser.error(f"Missing required arguments: {', '.join(missing)}")
113
+
114
+ # Step 1: Collect
115
+ collect(
116
+ account=args.account,
117
+ user=args.user,
118
+ password=args.password,
119
+ warehouse=args.warehouse,
120
+ output_file=args.output_file,
121
+ )
122
+
123
+ # Step 2: Push
124
+ push(
125
+ input_file=args.output_file,
126
+ resource_uuid=args.resource_uuid,
127
+ key_id=args.key_id,
128
+ key_token=args.key_token,
129
+ batch_size=args.batch_size,
130
+ output_file=args.push_result_file,
131
+ )
132
+
133
+ print("Done.")
134
+
135
+
136
+ if __name__ == "__main__":
137
+ main()
@@ -0,0 +1,137 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Collect query logs from Snowflake and push them to Monte Carlo — combined.
4
+
5
+ Imports ``collect()`` from ``collect_query_logs`` and ``push()`` from
6
+ ``push_query_logs``, runs both in sequence.
7
+
8
+ Substitution points
9
+ -------------------
10
+ - SNOWFLAKE_ACCOUNT (env) / --account (CLI) : Snowflake account identifier
11
+ - SNOWFLAKE_USER (env) / --user (CLI) : Snowflake username
12
+ - SNOWFLAKE_PASSWORD (env) / --password (CLI) : Snowflake password
13
+ - SNOWFLAKE_WAREHOUSE (env) / --warehouse (CLI) : Snowflake virtual warehouse
14
+ - MCD_INGEST_ID (env) / --key-id (CLI) : Monte Carlo ingestion key ID
15
+ - MCD_INGEST_TOKEN (env) / --key-token (CLI) : Monte Carlo ingestion key token
16
+ - MCD_RESOURCE_UUID (env) / --resource-uuid (CLI) : MC resource UUID for this connection
17
+
18
+ Prerequisites
19
+ -------------
20
+ pip install pycarlo snowflake-connector-python
21
+
22
+ Usage
23
+ -----
24
+ python collect_and_push_query_logs.py \\
25
+ --account <SNOWFLAKE_ACCOUNT> \\
26
+ --user <SNOWFLAKE_USER> \\
27
+ --password <SNOWFLAKE_PASSWORD> \\
28
+ --warehouse <SNOWFLAKE_WAREHOUSE> \\
29
+ --key-id <MCD_INGEST_ID> \\
30
+ --key-token <MCD_INGEST_TOKEN> \\
31
+ --resource-uuid <MCD_RESOURCE_UUID>
32
+ """
33
+
34
+ import argparse
35
+ import os
36
+
37
+ from collect_query_logs import collect
38
+ from push_query_logs import push, _BATCH_SIZE
39
+
40
+
41
+ def main() -> None:
42
+ parser = argparse.ArgumentParser(
43
+ description="Collect Snowflake query logs from ACCOUNT_USAGE and push to Monte Carlo",
44
+ )
45
+ parser.add_argument(
46
+ "--account",
47
+ default=os.environ.get("SNOWFLAKE_ACCOUNT"),
48
+ help="Snowflake account identifier, e.g. xy12345.us-east-1 (env: SNOWFLAKE_ACCOUNT)", # ← SUBSTITUTE
49
+ )
50
+ parser.add_argument(
51
+ "--user",
52
+ default=os.environ.get("SNOWFLAKE_USER"),
53
+ help="Snowflake username (env: SNOWFLAKE_USER)",
54
+ )
55
+ parser.add_argument(
56
+ "--password",
57
+ default=os.environ.get("SNOWFLAKE_PASSWORD"),
58
+ help="Snowflake password (env: SNOWFLAKE_PASSWORD)",
59
+ )
60
+ parser.add_argument(
61
+ "--warehouse",
62
+ default=os.environ.get("SNOWFLAKE_WAREHOUSE"),
63
+ help="Snowflake virtual warehouse (env: SNOWFLAKE_WAREHOUSE)", # ← SUBSTITUTE
64
+ )
65
+ parser.add_argument(
66
+ "--key-id",
67
+ default=os.environ.get("MCD_INGEST_ID"),
68
+ help="Monte Carlo ingestion key ID (env: MCD_INGEST_ID)",
69
+ )
70
+ parser.add_argument(
71
+ "--key-token",
72
+ default=os.environ.get("MCD_INGEST_TOKEN"),
73
+ help="Monte Carlo ingestion key token (env: MCD_INGEST_TOKEN)",
74
+ )
75
+ parser.add_argument(
76
+ "--resource-uuid",
77
+ default=os.environ.get("MCD_RESOURCE_UUID"),
78
+ help="Monte Carlo resource UUID for this Snowflake connection (env: MCD_RESOURCE_UUID)",
79
+ )
80
+ parser.add_argument(
81
+ "--output-file",
82
+ default="query_logs_output.json",
83
+ help="Path for the intermediate collect manifest (default: query_logs_output.json)",
84
+ )
85
+ parser.add_argument(
86
+ "--push-result-file",
87
+ default="query_logs_push_result.json",
88
+ help="Path to write the push result (default: query_logs_push_result.json)",
89
+ )
90
+ parser.add_argument(
91
+ "--batch-size",
92
+ type=int,
93
+ default=_BATCH_SIZE,
94
+ help=f"Max entries per push batch (default: {_BATCH_SIZE})",
95
+ )
96
+ args = parser.parse_args()
97
+
98
+ missing = [
99
+ name
100
+ for name, val in [
101
+ ("--account", args.account),
102
+ ("--user", args.user),
103
+ ("--password", args.password),
104
+ ("--warehouse", args.warehouse),
105
+ ("--key-id", args.key_id),
106
+ ("--key-token", args.key_token),
107
+ ("--resource-uuid", args.resource_uuid),
108
+ ]
109
+ if not val
110
+ ]
111
+ if missing:
112
+ parser.error(f"Missing required arguments: {', '.join(missing)}")
113
+
114
+ # Step 1: Collect
115
+ collect(
116
+ account=args.account,
117
+ user=args.user,
118
+ password=args.password,
119
+ warehouse=args.warehouse,
120
+ output_file=args.output_file,
121
+ )
122
+
123
+ # Step 2: Push
124
+ push(
125
+ input_file=args.output_file,
126
+ resource_uuid=args.resource_uuid,
127
+ key_id=args.key_id,
128
+ key_token=args.key_token,
129
+ batch_size=args.batch_size,
130
+ output_file=args.push_result_file,
131
+ )
132
+
133
+ print("Done.")
134
+
135
+
136
+ if __name__ == "__main__":
137
+ main()