opencode-skills-collection 2.0.0 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/bundled-skills/.antigravity-install-manifest.json +6 -1
  2. package/bundled-skills/docs/integrations/jetski-cortex.md +3 -3
  3. package/bundled-skills/docs/integrations/jetski-gemini-loader/README.md +1 -1
  4. package/bundled-skills/docs/maintainers/repo-growth-seo.md +3 -3
  5. package/bundled-skills/docs/maintainers/skills-update-guide.md +1 -1
  6. package/bundled-skills/docs/users/bundles.md +1 -1
  7. package/bundled-skills/docs/users/claude-code-skills.md +1 -1
  8. package/bundled-skills/docs/users/gemini-cli-skills.md +1 -1
  9. package/bundled-skills/docs/users/getting-started.md +1 -1
  10. package/bundled-skills/docs/users/kiro-integration.md +1 -1
  11. package/bundled-skills/docs/users/usage.md +4 -4
  12. package/bundled-skills/docs/users/visual-guide.md +4 -4
  13. package/bundled-skills/manage-skills/SKILL.md +187 -0
  14. package/bundled-skills/monte-carlo-monitor-creation/SKILL.md +222 -0
  15. package/bundled-skills/monte-carlo-monitor-creation/references/comparison-monitor.md +426 -0
  16. package/bundled-skills/monte-carlo-monitor-creation/references/custom-sql-monitor.md +207 -0
  17. package/bundled-skills/monte-carlo-monitor-creation/references/metric-monitor.md +292 -0
  18. package/bundled-skills/monte-carlo-monitor-creation/references/table-monitor.md +231 -0
  19. package/bundled-skills/monte-carlo-monitor-creation/references/validation-monitor.md +404 -0
  20. package/bundled-skills/monte-carlo-prevent/SKILL.md +252 -0
  21. package/bundled-skills/monte-carlo-prevent/references/TROUBLESHOOTING.md +23 -0
  22. package/bundled-skills/monte-carlo-prevent/references/parameters.md +32 -0
  23. package/bundled-skills/monte-carlo-prevent/references/workflows.md +478 -0
  24. package/bundled-skills/monte-carlo-push-ingestion/SKILL.md +363 -0
  25. package/bundled-skills/monte-carlo-push-ingestion/references/anomaly-detection.md +87 -0
  26. package/bundled-skills/monte-carlo-push-ingestion/references/custom-lineage.md +203 -0
  27. package/bundled-skills/monte-carlo-push-ingestion/references/direct-http-api.md +207 -0
  28. package/bundled-skills/monte-carlo-push-ingestion/references/prerequisites.md +150 -0
  29. package/bundled-skills/monte-carlo-push-ingestion/references/push-lineage.md +160 -0
  30. package/bundled-skills/monte-carlo-push-ingestion/references/push-metadata.md +158 -0
  31. package/bundled-skills/monte-carlo-push-ingestion/references/push-query-logs.md +219 -0
  32. package/bundled-skills/monte-carlo-push-ingestion/references/validation.md +257 -0
  33. package/bundled-skills/monte-carlo-push-ingestion/scripts/sample_verify.py +357 -0
  34. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_and_push_lineage.py +70 -0
  35. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_and_push_metadata.py +65 -0
  36. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_and_push_query_logs.py +70 -0
  37. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_lineage.py +214 -0
  38. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_metadata.py +160 -0
  39. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/collect_query_logs.py +164 -0
  40. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/push_lineage.py +198 -0
  41. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/push_metadata.py +193 -0
  42. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery/push_query_logs.py +207 -0
  43. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/collect_and_push_metadata.py +71 -0
  44. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/collect_and_push_query_logs.py +64 -0
  45. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/collect_metadata.py +253 -0
  46. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/collect_query_logs.py +149 -0
  47. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/push_metadata.py +190 -0
  48. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/bigquery-iceberg/push_query_logs.py +208 -0
  49. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_and_push_lineage.py +83 -0
  50. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_and_push_metadata.py +77 -0
  51. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_and_push_query_logs.py +83 -0
  52. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_lineage.py +240 -0
  53. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_metadata.py +212 -0
  54. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/collect_query_logs.py +204 -0
  55. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/push_lineage.py +192 -0
  56. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/push_metadata.py +178 -0
  57. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/databricks/push_query_logs.py +200 -0
  58. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_and_push_lineage.py +119 -0
  59. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_and_push_metadata.py +119 -0
  60. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_and_push_query_logs.py +117 -0
  61. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_lineage.py +265 -0
  62. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_metadata.py +313 -0
  63. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/collect_query_logs.py +284 -0
  64. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/push_lineage.py +309 -0
  65. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/push_metadata.py +245 -0
  66. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/hive/push_query_logs.py +255 -0
  67. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_and_push_lineage.py +78 -0
  68. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_and_push_metadata.py +80 -0
  69. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_and_push_query_logs.py +88 -0
  70. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_lineage.py +235 -0
  71. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_metadata.py +219 -0
  72. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/collect_query_logs.py +239 -0
  73. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/push_lineage.py +178 -0
  74. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/push_metadata.py +178 -0
  75. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/redshift/push_query_logs.py +196 -0
  76. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_and_push_lineage.py +154 -0
  77. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_and_push_metadata.py +137 -0
  78. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_and_push_query_logs.py +137 -0
  79. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_lineage.py +349 -0
  80. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_metadata.py +329 -0
  81. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/collect_query_logs.py +254 -0
  82. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/push_lineage.py +307 -0
  83. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/push_metadata.py +228 -0
  84. package/bundled-skills/monte-carlo-push-ingestion/scripts/templates/snowflake/push_query_logs.py +248 -0
  85. package/bundled-skills/monte-carlo-push-ingestion/scripts/test_template_sdk_usage.py +340 -0
  86. package/bundled-skills/monte-carlo-validation-notebook/SKILL.md +685 -0
  87. package/bundled-skills/monte-carlo-validation-notebook/scripts/generate_notebook_url.py +141 -0
  88. package/bundled-skills/monte-carlo-validation-notebook/scripts/resolve_dbt_schema.py +161 -0
  89. package/package.json +1 -1
  90. package/skills_index.json +503 -61
@@ -0,0 +1,228 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Push table metadata to Monte Carlo from a JSON manifest — push only.
4
+
5
+ Reads a manifest file produced by ``collect_metadata.py`` and sends the assets
6
+ to Monte Carlo as RelationalAsset events using the pycarlo push ingestion API.
7
+ Large payloads are split into batches to stay under the 1 MB compressed limit.
8
+
9
+ Can be run standalone via CLI or imported (use the ``push()`` function).
10
+
11
+ Substitution points
12
+ -------------------
13
+ - MCD_INGEST_ID (env) / --key-id (CLI) : Monte Carlo ingestion key ID
14
+ - MCD_INGEST_TOKEN (env) / --key-token (CLI) : Monte Carlo ingestion key token
15
+ - MCD_RESOURCE_UUID (env) / --resource-uuid (CLI) : MC resource UUID for this connection
16
+
17
+ Prerequisites
18
+ -------------
19
+ pip install pycarlo
20
+
21
+ Usage
22
+ -----
23
+ python push_metadata.py \\
24
+ --key-id <MCD_INGEST_ID> \\
25
+ --key-token <MCD_INGEST_TOKEN> \\
26
+ --resource-uuid <MCD_RESOURCE_UUID> \\
27
+ --input-file metadata_output.json
28
+ """
29
+
30
+ import argparse
31
+ import json
32
+ import os
33
+ from concurrent.futures import ThreadPoolExecutor, as_completed
34
+ from datetime import datetime, timezone
35
+
36
+ from pycarlo.core import Client, Session
37
+ from pycarlo.features.ingestion import IngestionService
38
+ from pycarlo.features.ingestion.models import (
39
+ AssetField,
40
+ AssetFreshness,
41
+ AssetMetadata,
42
+ AssetVolume,
43
+ RelationalAsset,
44
+ )
45
+
46
+ # ← SUBSTITUTE: set RESOURCE_TYPE to match your Monte Carlo connection type
47
+ RESOURCE_TYPE = "snowflake"
48
+
49
+ # Maximum assets per batch — conservative default to keep compressed payload under 1 MB
50
+ # ← SUBSTITUTE: tune based on average asset size (fields per table, description length, etc.)
51
+ _BATCH_SIZE = 500
52
+
53
+
54
+ def _asset_from_dict(d: dict) -> RelationalAsset:
55
+ """Reconstruct a RelationalAsset from a manifest dict entry."""
56
+ fields = [
57
+ AssetField(
58
+ name=f["name"],
59
+ type=f.get("type"),
60
+ description=f.get("description"),
61
+ )
62
+ for f in d.get("fields", [])
63
+ ]
64
+
65
+ volume = None
66
+ if d.get("volume"):
67
+ volume = AssetVolume(
68
+ row_count=d["volume"].get("row_count"),
69
+ byte_count=d["volume"].get("byte_count"),
70
+ )
71
+
72
+ freshness = None
73
+ if d.get("freshness"):
74
+ freshness = AssetFreshness(
75
+ last_update_time=d["freshness"].get("last_update_time"),
76
+ )
77
+
78
+ return RelationalAsset(
79
+ type=d.get("type", "TABLE"),
80
+ metadata=AssetMetadata(
81
+ name=d["name"],
82
+ database=d["database"],
83
+ schema=d["schema"],
84
+ description=d.get("description"),
85
+ ),
86
+ fields=fields,
87
+ volume=volume,
88
+ freshness=freshness,
89
+ )
90
+
91
+
92
+ def push(
93
+ input_file: str,
94
+ resource_uuid: str,
95
+ key_id: str,
96
+ key_token: str,
97
+ batch_size: int = _BATCH_SIZE,
98
+ output_file: str = "metadata_push_result.json",
99
+ ) -> dict:
100
+ """
101
+ Read a metadata manifest and push assets to Monte Carlo in batches.
102
+
103
+ Returns a result dict with invocation IDs for each batch.
104
+ """
105
+ with open(input_file) as fh:
106
+ manifest = json.load(fh)
107
+
108
+ asset_dicts = manifest.get("assets", [])
109
+ resource_type = manifest.get("resource_type", RESOURCE_TYPE)
110
+ assets = [_asset_from_dict(d) for d in asset_dicts]
111
+ print(f"Loaded {len(assets)} asset(s) from {input_file}")
112
+
113
+ # Split into batches
114
+ batches = []
115
+ for i in range(0, max(len(assets), 1), batch_size):
116
+ batches.append(assets[i : i + batch_size])
117
+ total_batches = len(batches)
118
+
119
+ def _push_batch(batch: list, batch_num: int) -> str | None:
120
+ """Push a single batch using a dedicated Session (thread-safe)."""
121
+ client = Client(session=Session(mcd_id=key_id, mcd_token=key_token, scope="Ingestion"))
122
+ service = IngestionService(mc_client=client)
123
+ result = service.send_metadata(
124
+ resource_uuid=resource_uuid,
125
+ resource_type=resource_type,
126
+ events=batch,
127
+ )
128
+ invocation_id = service.extract_invocation_id(result)
129
+ print(f" Pushed batch {batch_num}/{total_batches} ({len(batch)} assets) — invocation_id={invocation_id}")
130
+ return invocation_id
131
+
132
+ # Push batches in parallel (each thread gets its own pycarlo Session)
133
+ max_workers = min(4, total_batches)
134
+ invocation_ids: list[str | None] = [None] * total_batches
135
+
136
+ with ThreadPoolExecutor(max_workers=max_workers) as pool:
137
+ futures = {
138
+ pool.submit(_push_batch, batch, i + 1): i
139
+ for i, batch in enumerate(batches)
140
+ }
141
+ for future in as_completed(futures):
142
+ idx = futures[future]
143
+ try:
144
+ invocation_ids[idx] = future.result()
145
+ except Exception as exc:
146
+ print(f" ERROR pushing batch {idx + 1}: {exc}")
147
+ raise
148
+
149
+ print(f" All {total_batches} batches pushed ({max_workers} workers)")
150
+
151
+ push_result = {
152
+ "resource_uuid": resource_uuid,
153
+ "resource_type": resource_type,
154
+ "invocation_ids": invocation_ids,
155
+ "pushed_at": datetime.now(tz=timezone.utc).isoformat(),
156
+ "total_assets": len(assets),
157
+ "batch_count": total_batches,
158
+ "batch_size": batch_size,
159
+ }
160
+ with open(output_file, "w") as fh:
161
+ json.dump(push_result, fh, indent=2)
162
+ print(f"Push result written to {output_file}")
163
+
164
+ return push_result
165
+
166
+
167
+ def main() -> None:
168
+ parser = argparse.ArgumentParser(
169
+ description="Push Snowflake table metadata from a manifest to Monte Carlo",
170
+ )
171
+ parser.add_argument(
172
+ "--key-id",
173
+ default=os.environ.get("MCD_INGEST_ID"),
174
+ help="Monte Carlo ingestion key ID (env: MCD_INGEST_ID)",
175
+ )
176
+ parser.add_argument(
177
+ "--key-token",
178
+ default=os.environ.get("MCD_INGEST_TOKEN"),
179
+ help="Monte Carlo ingestion key token (env: MCD_INGEST_TOKEN)",
180
+ )
181
+ parser.add_argument(
182
+ "--resource-uuid",
183
+ default=os.environ.get("MCD_RESOURCE_UUID"),
184
+ help="Monte Carlo resource UUID for this Snowflake connection (env: MCD_RESOURCE_UUID)",
185
+ )
186
+ parser.add_argument(
187
+ "--input-file",
188
+ default="metadata_output.json",
189
+ help="Path to the collect manifest to read (default: metadata_output.json)",
190
+ )
191
+ parser.add_argument(
192
+ "--output-file",
193
+ default="metadata_push_result.json",
194
+ help="Path to write the push result (default: metadata_push_result.json)",
195
+ )
196
+ parser.add_argument(
197
+ "--batch-size",
198
+ type=int,
199
+ default=_BATCH_SIZE,
200
+ help=f"Max assets per push batch (default: {_BATCH_SIZE})",
201
+ )
202
+ args = parser.parse_args()
203
+
204
+ missing = [
205
+ name
206
+ for name, val in [
207
+ ("--key-id", args.key_id),
208
+ ("--key-token", args.key_token),
209
+ ("--resource-uuid", args.resource_uuid),
210
+ ]
211
+ if not val
212
+ ]
213
+ if missing:
214
+ parser.error(f"Missing required arguments: {', '.join(missing)}")
215
+
216
+ push(
217
+ input_file=args.input_file,
218
+ resource_uuid=args.resource_uuid,
219
+ key_id=args.key_id,
220
+ key_token=args.key_token,
221
+ batch_size=args.batch_size,
222
+ output_file=args.output_file,
223
+ )
224
+ print("Done.")
225
+
226
+
227
+ if __name__ == "__main__":
228
+ main()
@@ -0,0 +1,248 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Push query logs to Monte Carlo from a JSON manifest — push only.
4
+
5
+ Reads a manifest file produced by ``collect_query_logs.py`` and sends the query
6
+ log entries to Monte Carlo using the pycarlo push ingestion API. Large payloads
7
+ are split into batches to stay under the 1 MB compressed limit.
8
+
9
+ Can be run standalone via CLI or imported (use the ``push()`` function).
10
+
11
+ Substitution points
12
+ -------------------
13
+ - MCD_INGEST_ID (env) / --key-id (CLI) : Monte Carlo ingestion key ID
14
+ - MCD_INGEST_TOKEN (env) / --key-token (CLI) : Monte Carlo ingestion key token
15
+ - MCD_RESOURCE_UUID (env) / --resource-uuid (CLI) : MC resource UUID for this connection
16
+
17
+ Prerequisites
18
+ -------------
19
+ pip install pycarlo
20
+
21
+ Usage
22
+ -----
23
+ python push_query_logs.py \\
24
+ --key-id <MCD_INGEST_ID> \\
25
+ --key-token <MCD_INGEST_TOKEN> \\
26
+ --resource-uuid <MCD_RESOURCE_UUID> \\
27
+ --input-file query_logs_output.json
28
+ """
29
+
30
+ import argparse
31
+ import json
32
+ import os
33
+ from concurrent.futures import ThreadPoolExecutor, as_completed
34
+ from datetime import datetime, timezone
35
+
36
+ from dateutil.parser import isoparse
37
+ from pycarlo.core import Client, Session
38
+ from pycarlo.features.ingestion import IngestionService
39
+ from pycarlo.features.ingestion.models import QueryLogEntry
40
+
41
+ # ← SUBSTITUTE: set LOG_TYPE to match your warehouse type (query logs use log_type, not resource_type)
42
+ LOG_TYPE = "snowflake"
43
+
44
+ # Maximum entries per batch — conservative default to keep compressed payload under 1 MB.
45
+ # Query logs include full SQL text — keep batches small to stay under the 1 MB
46
+ # compressed payload limit. 50 entries can trigger 413 on active warehouses.
47
+ # ← SUBSTITUTE: tune based on average query length
48
+ _BATCH_SIZE = 100
49
+
50
+ # Truncate query_text longer than this to prevent 413 errors.
51
+ # Some SQL statements (e.g., generated by BI tools) can be 100KB+ and blow up
52
+ # compressed payloads even at small batch sizes.
53
+ _MAX_QUERY_TEXT_LEN = 10_000
54
+
55
+
56
+ def _build_query_log_entries(queries: list[dict]) -> list[QueryLogEntry]:
57
+ """Convert manifest query dicts into QueryLogEntry objects."""
58
+ entries = []
59
+ truncated = 0
60
+ for q in queries:
61
+ start_time = q.get("start_time")
62
+ end_time = q.get("end_time")
63
+ query_text = q.get("query_text") or ""
64
+ query_id = q.get("query_id")
65
+ user_name = q.get("user")
66
+ warehouse_name = q.get("warehouse")
67
+ bytes_scanned = q.get("bytes_scanned")
68
+ rows_produced = q.get("rows_produced")
69
+
70
+ # Truncate very long SQL to prevent 413 Request Too Large
71
+ if len(query_text) > _MAX_QUERY_TEXT_LEN:
72
+ query_text = query_text[:_MAX_QUERY_TEXT_LEN] + "... [TRUNCATED]"
73
+ truncated += 1
74
+
75
+ extra = {}
76
+ if warehouse_name is not None:
77
+ extra["warehouse_name"] = warehouse_name
78
+ if bytes_scanned is not None:
79
+ extra["bytes_scanned"] = int(bytes_scanned)
80
+
81
+ entries.append(
82
+ QueryLogEntry(
83
+ start_time=isoparse(start_time) if start_time else None,
84
+ end_time=isoparse(end_time) if end_time else None,
85
+ query_text=query_text,
86
+ query_id=query_id,
87
+ user=user_name,
88
+ returned_rows=int(rows_produced) if rows_produced is not None else None,
89
+ extra=extra or None,
90
+ )
91
+ )
92
+ if truncated:
93
+ print(f" Truncated {truncated} query text(s) exceeding {_MAX_QUERY_TEXT_LEN} chars")
94
+ return entries
95
+
96
+
97
+ def push(
98
+ input_file: str,
99
+ resource_uuid: str,
100
+ key_id: str,
101
+ key_token: str,
102
+ batch_size: int = _BATCH_SIZE,
103
+ output_file: str = "query_logs_push_result.json",
104
+ ) -> dict:
105
+ """
106
+ Read a query log manifest and push entries to Monte Carlo in batches.
107
+
108
+ Returns a result dict with invocation IDs for each batch.
109
+ """
110
+ with open(input_file) as fh:
111
+ manifest = json.load(fh)
112
+
113
+ queries = manifest.get("queries", [])
114
+ log_type = manifest.get("log_type", LOG_TYPE)
115
+ entries = _build_query_log_entries(queries)
116
+ print(f"Loaded {len(entries)} query log entry/entries from {input_file}")
117
+
118
+ if not entries:
119
+ print("No query log entries to push.")
120
+ push_result = {
121
+ "resource_uuid": resource_uuid,
122
+ "log_type": log_type,
123
+ "invocation_ids": [],
124
+ "pushed_at": datetime.now(tz=timezone.utc).isoformat(),
125
+ "total_entries": 0,
126
+ "batch_count": 0,
127
+ "batch_size": batch_size,
128
+ }
129
+ with open(output_file, "w") as fh:
130
+ json.dump(push_result, fh, indent=2)
131
+ return push_result
132
+
133
+ # Split into batches
134
+ batches = []
135
+ for i in range(0, len(entries), batch_size):
136
+ batches.append(entries[i : i + batch_size])
137
+ total_batches = len(batches)
138
+
139
+ def _push_batch(batch: list, batch_num: int) -> str | None:
140
+ """Push a single batch using a dedicated Session (thread-safe)."""
141
+ client = Client(session=Session(mcd_id=key_id, mcd_token=key_token, scope="Ingestion"))
142
+ service = IngestionService(mc_client=client)
143
+ result = service.send_query_logs(
144
+ resource_uuid=resource_uuid,
145
+ log_type=log_type,
146
+ events=batch,
147
+ )
148
+ invocation_id = service.extract_invocation_id(result)
149
+ print(f" Pushed batch {batch_num}/{total_batches} ({len(batch)} entries) — invocation_id={invocation_id}")
150
+ return invocation_id
151
+
152
+ # Push batches in parallel (each thread gets its own pycarlo Session)
153
+ max_workers = min(4, total_batches)
154
+ invocation_ids: list[str | None] = [None] * total_batches
155
+
156
+ with ThreadPoolExecutor(max_workers=max_workers) as pool:
157
+ futures = {
158
+ pool.submit(_push_batch, batch, i + 1): i
159
+ for i, batch in enumerate(batches)
160
+ }
161
+ for future in as_completed(futures):
162
+ idx = futures[future]
163
+ try:
164
+ invocation_ids[idx] = future.result()
165
+ except Exception as exc:
166
+ print(f" ERROR pushing batch {idx + 1}: {exc}")
167
+ raise
168
+
169
+ print(f" All {total_batches} batches pushed ({max_workers} workers)")
170
+
171
+ push_result = {
172
+ "resource_uuid": resource_uuid,
173
+ "log_type": log_type,
174
+ "invocation_ids": invocation_ids,
175
+ "pushed_at": datetime.now(tz=timezone.utc).isoformat(),
176
+ "total_entries": len(entries),
177
+ "batch_count": total_batches,
178
+ "batch_size": batch_size,
179
+ }
180
+ with open(output_file, "w") as fh:
181
+ json.dump(push_result, fh, indent=2)
182
+ print(f"Push result written to {output_file}")
183
+
184
+ return push_result
185
+
186
+
187
+ def main() -> None:
188
+ parser = argparse.ArgumentParser(
189
+ description="Push Snowflake query logs from a manifest to Monte Carlo",
190
+ )
191
+ parser.add_argument(
192
+ "--key-id",
193
+ default=os.environ.get("MCD_INGEST_ID"),
194
+ help="Monte Carlo ingestion key ID (env: MCD_INGEST_ID)",
195
+ )
196
+ parser.add_argument(
197
+ "--key-token",
198
+ default=os.environ.get("MCD_INGEST_TOKEN"),
199
+ help="Monte Carlo ingestion key token (env: MCD_INGEST_TOKEN)",
200
+ )
201
+ parser.add_argument(
202
+ "--resource-uuid",
203
+ default=os.environ.get("MCD_RESOURCE_UUID"),
204
+ help="Monte Carlo resource UUID for this Snowflake connection (env: MCD_RESOURCE_UUID)",
205
+ )
206
+ parser.add_argument(
207
+ "--input-file",
208
+ default="query_logs_output.json",
209
+ help="Path to the collect manifest to read (default: query_logs_output.json)",
210
+ )
211
+ parser.add_argument(
212
+ "--output-file",
213
+ default="query_logs_push_result.json",
214
+ help="Path to write the push result (default: query_logs_push_result.json)",
215
+ )
216
+ parser.add_argument(
217
+ "--batch-size",
218
+ type=int,
219
+ default=_BATCH_SIZE,
220
+ help=f"Max entries per push batch (default: {_BATCH_SIZE})",
221
+ )
222
+ args = parser.parse_args()
223
+
224
+ missing = [
225
+ name
226
+ for name, val in [
227
+ ("--key-id", args.key_id),
228
+ ("--key-token", args.key_token),
229
+ ("--resource-uuid", args.resource_uuid),
230
+ ]
231
+ if not val
232
+ ]
233
+ if missing:
234
+ parser.error(f"Missing required arguments: {', '.join(missing)}")
235
+
236
+ push(
237
+ input_file=args.input_file,
238
+ resource_uuid=args.resource_uuid,
239
+ key_id=args.key_id,
240
+ key_token=args.key_token,
241
+ batch_size=args.batch_size,
242
+ output_file=args.output_file,
243
+ )
244
+ print("Done.")
245
+
246
+
247
+ if __name__ == "__main__":
248
+ main()