graflag-runner 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: graflag_runner
3
+ Version: 1.0.0
4
+ Summary: Framework for executing graph anomaly detection methods with resource monitoring
5
+ Author: GraFlag Team
6
+ Requires-Python: >=3.7
7
+ Requires-Dist: psutil>=5.8.0
8
+ Dynamic: author
9
+ Dynamic: requires-dist
10
+ Dynamic: requires-python
11
+ Dynamic: summary
@@ -0,0 +1,36 @@
1
+ """
2
+ GraFlag Runner - Framework for executing graph anomaly detection methods.
3
+
4
+ This package provides:
5
+ - MethodRunner: Main execution wrapper with resource monitoring
6
+ - ResourceMonitor: Real-time CPU, memory, and GPU tracking
7
+ - ResultWriter: Simple API for methods to save standardized results
8
+ - StreamableArray: Wrapper for memory-efficient streaming of large arrays
9
+ - subprocess_utils: Utilities for running subprocesses with real-time output
10
+ - logging: Simple logging functions (debug, info, warning, error, critical, exception)
11
+ """
12
+
13
+ from .runner import MethodRunner
14
+ from .results import ResultWriter
15
+ from .streaming import StreamableArray, stream_write_json
16
+ from .subprocess_utils import (
17
+ run_with_realtime_output,
18
+ run_command_list,
19
+ save_output_to_file
20
+ )
21
+ from .logging_utils import debug, info, warning, error, critical, exception
22
+
23
+ __version__ = "1.0.0"
24
+ __all__ = [
25
+ "MethodRunner",
26
+ "ResultWriter",
27
+ "run_with_realtime_output",
28
+ "run_command_list",
29
+ "save_output_to_file",
30
+ "debug",
31
+ "info",
32
+ "warning",
33
+ "error",
34
+ "critical",
35
+ "exception"
36
+ ]
@@ -0,0 +1,6 @@
1
+ """Allow running as module: python -m graflag_runner"""
2
+
3
+ from .runner import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -0,0 +1,11 @@
1
+ Metadata-Version: 2.4
2
+ Name: graflag_runner
3
+ Version: 1.0.0
4
+ Summary: Framework for executing graph anomaly detection methods with resource monitoring
5
+ Author: GraFlag Team
6
+ Requires-Python: >=3.7
7
+ Requires-Dist: psutil>=5.8.0
8
+ Dynamic: author
9
+ Dynamic: requires-dist
10
+ Dynamic: requires-python
11
+ Dynamic: summary
@@ -0,0 +1,25 @@
1
+ __init__.py
2
+ __main__.py
3
+ logging_utils.py
4
+ monitor.py
5
+ results.py
6
+ runner.py
7
+ setup.py
8
+ streaming.py
9
+ subprocess_utils.py
10
+ write_utils.py
11
+ ./__init__.py
12
+ ./__main__.py
13
+ ./logging_utils.py
14
+ ./monitor.py
15
+ ./results.py
16
+ ./runner.py
17
+ ./streaming.py
18
+ ./subprocess_utils.py
19
+ ./write_utils.py
20
+ graflag_runner.egg-info/PKG-INFO
21
+ graflag_runner.egg-info/SOURCES.txt
22
+ graflag_runner.egg-info/dependency_links.txt
23
+ graflag_runner.egg-info/entry_points.txt
24
+ graflag_runner.egg-info/requires.txt
25
+ graflag_runner.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ graflag-run = graflag_runner.runner:main
@@ -0,0 +1 @@
1
+ psutil>=5.8.0
@@ -0,0 +1 @@
1
+ graflag_runner
@@ -0,0 +1,47 @@
1
+ """
2
+ Logging utilities for GraFlag methods.
3
+
4
+ Provides consistent logging functions across all methods.
5
+ """
6
+
7
+ import logging as _logging
8
+ import os
9
+
10
+ # Configure logging once at module import
11
+ _method_name = os.environ.get("METHOD_NAME", "unknown_method")
12
+ _logging.basicConfig(
13
+ level=_logging.INFO,
14
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
15
+ force=True
16
+ )
17
+ _logger = _logging.getLogger(_method_name)
18
+
19
+
20
+ def debug(msg, *args, **kwargs):
21
+ """Log a debug message."""
22
+ _logger.debug(msg, *args, **kwargs)
23
+
24
+
25
+ def info(msg, *args, **kwargs):
26
+ """Log an info message."""
27
+ _logger.info(msg, *args, **kwargs)
28
+
29
+
30
+ def warning(msg, *args, **kwargs):
31
+ """Log a warning message."""
32
+ _logger.warning(msg, *args, **kwargs)
33
+
34
+
35
+ def error(msg, *args, **kwargs):
36
+ """Log an error message."""
37
+ _logger.error(msg, *args, **kwargs)
38
+
39
+
40
+ def critical(msg, *args, **kwargs):
41
+ """Log a critical message."""
42
+ _logger.critical(msg, *args, **kwargs)
43
+
44
+
45
+ def exception(msg, *args, **kwargs):
46
+ """Log an exception message with traceback."""
47
+ _logger.exception(msg, *args, **kwargs)
@@ -0,0 +1,157 @@
1
+ """Resource monitoring for method execution."""
2
+
3
+ import os
4
+ import time
5
+ import psutil
6
+ import subprocess
7
+ from typing import Optional, Dict
8
+ from pathlib import Path
9
+ import logging
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ # Import ResultWriter for spot() functionality
14
+ from .results import ResultWriter
15
+
16
+
17
+ class ResourceMonitor:
18
+ """Monitor CPU, memory, and GPU usage during method execution."""
19
+
20
+ def __init__(self, pid: Optional[int] = None):
21
+ """
22
+ Initialize resource monitor.
23
+
24
+ Args:
25
+ pid: Process ID to monitor (None = current process)
26
+ """
27
+ self.pid = pid or os.getpid()
28
+ self.monitoring = False
29
+ self.peak_memory_mb = 0
30
+ self.peak_gpu_mb = 0
31
+ self.has_gpu = self._check_gpu()
32
+
33
+ # System totals
34
+ self.total_memory_mb = psutil.virtual_memory().total / (1024 * 1024)
35
+ self.total_gpu_mb = self._get_total_gpu_memory() if self.has_gpu else 0
36
+
37
+ # ResultWriter for spot() functionality
38
+ self.result_writer = ResultWriter()
39
+
40
+ def _check_gpu(self) -> bool:
41
+ """Check if NVIDIA GPU is available."""
42
+ try:
43
+ subprocess.run(
44
+ ["nvidia-smi"],
45
+ stdout=subprocess.DEVNULL,
46
+ stderr=subprocess.DEVNULL,
47
+ timeout=2
48
+ )
49
+ return True
50
+ except (FileNotFoundError, subprocess.TimeoutExpired):
51
+ return False
52
+
53
+ def _get_total_gpu_memory(self) -> float:
54
+ """Get total GPU memory in MB."""
55
+ if not self.has_gpu:
56
+ return 0
57
+ try:
58
+ result = subprocess.run(
59
+ ["nvidia-smi", "--query-gpu=memory.total", "--format=csv,noheader,nounits"],
60
+ capture_output=True,
61
+ text=True,
62
+ timeout=2
63
+ )
64
+ return float(result.stdout.strip().split('\n')[0])
65
+ except Exception:
66
+ return 0
67
+
68
+ def _get_process_memory(self) -> float:
69
+ """Get current process memory usage in MB (including children)."""
70
+ try:
71
+ process = psutil.Process(self.pid)
72
+ # Get memory for main process
73
+ memory = process.memory_info().rss / (1024 * 1024)
74
+ # Add memory for all child processes
75
+ for child in process.children(recursive=True):
76
+ try:
77
+ memory += child.memory_info().rss / (1024 * 1024)
78
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
79
+ continue
80
+ return memory
81
+ except (psutil.NoSuchProcess, psutil.AccessDenied):
82
+ return 0
83
+
84
+ def _get_gpu_memory(self) -> float:
85
+ """Get GPU memory usage in MB (all processes in container)."""
86
+ if not self.has_gpu:
87
+ return 0
88
+ try:
89
+ # In a container, we're the only workload, so track total GPU usage
90
+ # This is more reliable than trying to track specific PIDs since
91
+ # PyTorch/CUDA may spawn processes that aren't direct children
92
+ result = subprocess.run(
93
+ ["nvidia-smi", "--query-gpu=memory.used", "--format=csv,noheader,nounits"],
94
+ capture_output=True,
95
+ text=True,
96
+ timeout=2
97
+ )
98
+
99
+ if result.returncode == 0 and result.stdout.strip():
100
+ return float(result.stdout.strip().split('\n')[0])
101
+
102
+ return 0.0
103
+ except Exception as e:
104
+ logger.debug(f"Failed to get GPU memory: {e}")
105
+ return 0
106
+
107
+ def start_monitoring(self, interval: float = 1.0):
108
+ """
109
+ Start background monitoring loop.
110
+
111
+ Args:
112
+ interval: Monitoring interval in seconds
113
+ """
114
+ self.monitoring = True
115
+
116
+ logger.info(f"[INFO] Resource monitoring started (PID: {self.pid})")
117
+ logger.info(f" Total memory: {self.total_memory_mb:.0f}MB")
118
+ if self.has_gpu:
119
+ logger.info(f" Total GPU memory: {self.total_gpu_mb:.0f}MB")
120
+ else:
121
+ logger.info(" GPU: Not available")
122
+
123
+ while self.monitoring:
124
+ current_memory = self._get_process_memory()
125
+ current_gpu = self._get_gpu_memory()
126
+
127
+ # Update peaks
128
+ self.peak_memory_mb = max(self.peak_memory_mb, current_memory)
129
+ self.peak_gpu_mb = max(self.peak_gpu_mb, current_gpu)
130
+
131
+ # Log to CSV using spot() method
132
+ if self.result_writer:
133
+ self.result_writer.spot(
134
+ "resources",
135
+ memory_mb=round(current_memory, 2),
136
+ gpu_mb=round(current_gpu, 2)
137
+ )
138
+
139
+ time.sleep(interval)
140
+
141
+ def stop_monitoring(self):
142
+ """Stop background monitoring."""
143
+ self.monitoring = False
144
+
145
+ def get_summary(self) -> Dict[str, float]:
146
+ """
147
+ Get resource usage summary.
148
+
149
+ Returns:
150
+ Dictionary with peak memory and GPU usage
151
+ """
152
+ return {
153
+ "peak_memory_mb": round(self.peak_memory_mb, 2),
154
+ "peak_gpu_mb": round(self.peak_gpu_mb, 2) if self.has_gpu else None,
155
+ "total_memory_mb": round(self.total_memory_mb, 2),
156
+ "total_gpu_mb": round(self.total_gpu_mb, 2) if self.has_gpu else None,
157
+ }
@@ -0,0 +1,255 @@
1
+ """Result management and standardization."""
2
+
3
+ import json
4
+ import csv
5
+ import os
6
+ import time
7
+ from pathlib import Path
8
+ from typing import List, Dict, Any, Optional, Union, Iterator
9
+ from collections import OrderedDict
10
+ import logging
11
+
12
+ from .streaming import StreamableArray, stream_write_json
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class ResultWriter:
18
+ """
19
+ Simple API for methods to save standardized results.
20
+
21
+ Usage in method code:
22
+ from graflag_runner import ResultWriter
23
+
24
+ writer = ResultWriter()
25
+ writer.save_scores(
26
+ result_type="TEMPORAL_NODE_SCORES",
27
+ scores=[[0.1, 0.2], [0.3, 0.4]],
28
+ timestamps=[0, 1],
29
+ node_ids=[0, 1]
30
+ )
31
+ writer.add_metadata(method_name="TADDY", dataset="uci")
32
+ writer.finalize()
33
+ """
34
+
35
+ VALID_RESULT_TYPES = {
36
+ "NODE_ANOMALY_SCORES",
37
+ "EDGE_ANOMALY_SCORES",
38
+ "GRAPH_ANOMALY_SCORES",
39
+ "TEMPORAL_NODE_ANOMALY_SCORES",
40
+ "TEMPORAL_EDGE_ANOMALY_SCORES",
41
+ "TEMPORAL_GRAPH_ANOMALY_SCORES",
42
+ "NODE_STREAM_ANOMALY_SCORES",
43
+ "EDGE_STREAM_ANOMALY_SCORES",
44
+ "GRAPH_STREAM_ANOMALY_SCORES",
45
+ }
46
+
47
+ def __init__(self):
48
+ """
49
+ Initialize result writer.
50
+
51
+ Args:
52
+ output_dir: Directory to save results.json
53
+ """
54
+ self.output_dir = Path(os.environ.get("EXP"))
55
+ self.output_dir.mkdir(parents=True, exist_ok=True)
56
+
57
+ self.results = {
58
+ "result_type": None,
59
+ "scores": None,
60
+ "metadata": {}
61
+ }
62
+
63
+ # Schema tracking for spot() method
64
+ self._spot_schemas: Dict[str, OrderedDict] = {}
65
+
66
+ def save_scores(
67
+ self,
68
+ result_type: str,
69
+ scores: Union[List, StreamableArray, Iterator],
70
+ **kwargs
71
+ ):
72
+ """
73
+ Save anomaly scores with specified result type.
74
+
75
+ Supports both regular lists and streaming for large datasets:
76
+ - Regular list/array: scores = [[...], [...], ...]
77
+ - Generator: scores = StreamableArray(generate_rows())
78
+ - Raw iterator: Will be wrapped in StreamableArray automatically
79
+
80
+ Args:
81
+ result_type: One of VALID_RESULT_TYPES
82
+ scores: Anomaly scores (list, StreamableArray, or generator)
83
+ Can be a generator for memory-efficient handling of large arrays
84
+ **kwargs: Additional fields (timestamps, node_ids, edges, etc.)
85
+ """
86
+ if result_type not in self.VALID_RESULT_TYPES:
87
+ raise ValueError(
88
+ f"Invalid result_type: {result_type}. "
89
+ f"Must be one of {self.VALID_RESULT_TYPES}"
90
+ )
91
+
92
+ self.results["result_type"] = result_type
93
+
94
+ # Wrap raw generators/iterators in StreamableArray
95
+ if hasattr(scores, '__iter__') and hasattr(scores, '__next__'):
96
+ if not isinstance(scores, StreamableArray):
97
+ scores = StreamableArray(scores)
98
+ logger.info("[INFO] Wrapped generator in StreamableArray for streaming")
99
+
100
+ self.results["scores"] = scores
101
+
102
+ # Add optional fields
103
+ for key, value in kwargs.items():
104
+ self.results[key] = value
105
+
106
+ if isinstance(scores, StreamableArray):
107
+ logger.info(f"[OK] Streamable scores registered: {result_type}")
108
+ else:
109
+ logger.info(f"[OK] Scores saved: {result_type}")
110
+
111
+ def add_metadata(self, **kwargs):
112
+ """
113
+ Add metadata fields.
114
+
115
+ Args:
116
+ **kwargs: Metadata key-value pairs (method_name, dataset, etc.)
117
+ """
118
+ self.results["metadata"].update(kwargs)
119
+
120
+ def add_resource_metrics(
121
+ self,
122
+ exec_time_ms: float,
123
+ peak_memory_mb: float,
124
+ peak_gpu_mb: Optional[float] = None
125
+ ):
126
+ """
127
+ Add resource consumption metrics.
128
+
129
+ Args:
130
+ exec_time_ms: Execution time in milliseconds
131
+ peak_memory_mb: Peak memory usage in MB
132
+ peak_gpu_mb: Peak GPU memory in MB (optional)
133
+ """
134
+ self.results["metadata"]["exec_time_ms"] = round(exec_time_ms, 2)
135
+ self.results["metadata"]["peak_memory_mb"] = round(peak_memory_mb, 2)
136
+ if peak_gpu_mb is not None:
137
+ self.results["metadata"]["peak_gpu_mb"] = round(peak_gpu_mb, 2)
138
+
139
+ def finalize(self) -> Path:
140
+ """
141
+ Write results to results.json file.
142
+
143
+ Uses streaming for large score arrays to avoid memory issues.
144
+ Regular lists are written normally, StreamableArray objects are
145
+ written row-by-row without loading the entire array into memory.
146
+
147
+ Returns:
148
+ Path to results.json
149
+ """
150
+ output_file = self.output_dir / "results.json"
151
+
152
+ # Validation
153
+ if self.results["result_type"] is None:
154
+ raise ValueError("No scores saved. Call save_scores() first.")
155
+
156
+ # Check if we need streaming
157
+ has_streamable = isinstance(self.results.get("scores"), StreamableArray)
158
+
159
+ if has_streamable:
160
+ logger.info("[INFO] Writing results with streaming (large data)...")
161
+ stream_write_json(self.results, output_file)
162
+ else:
163
+ # Regular JSON dump for small data
164
+ logger.info("[INFO] Writing results (standard)...")
165
+ with open(output_file, 'w') as f:
166
+ json.dump(self.results, f, indent=2)
167
+
168
+ logger.info(f"[OK] Results written to: {output_file}")
169
+ return output_file
170
+
171
+ def spot(self, metric_key: str, **metrics):
172
+ """
173
+ Track real-time metrics to a CSV file with schema validation.
174
+
175
+ This method is used for monitoring progress during training/execution:
176
+ - Creates a CSV file named "{metric_key}.csv" in the output directory
177
+ - First column is always "timestamp" (Unix timestamp)
178
+ - Subsequent columns are the metric keys provided in **metrics
179
+ - Schema is locked after first call - subsequent calls must have same keys
180
+ - Automatically appends new rows on each call
181
+
182
+ Args:
183
+ metric_key: Identifier for the metric group (e.g., "training", "validation", "resources")
184
+ Used as the CSV filename: "{metric_key}.csv"
185
+ **metrics: Metric key-value pairs to record (e.g., loss=0.5, auc=0.85)
186
+
187
+ Raises:
188
+ ValueError: If schema changes after first call (different metric keys)
189
+
190
+ Examples:
191
+ # Track training metrics
192
+ writer.spot("training", epoch=1, loss=0.5, auc=0.85)
193
+ writer.spot("training", epoch=2, loss=0.3, auc=0.90) # Must have same keys
194
+
195
+ # Track resource usage
196
+ writer.spot("resources", memory_mb=512.5, gpu_mb=2048.0)
197
+
198
+ # Track validation metrics separately
199
+ writer.spot("validation", epoch=1, val_loss=0.6, val_auc=0.82)
200
+ """
201
+ if not metrics:
202
+ raise ValueError("At least one metric must be provided to spot()")
203
+
204
+ # Get CSV file path
205
+ csv_file = self.output_dir / f"{metric_key}.csv"
206
+
207
+ # Get current schema (ordered dict to preserve column order)
208
+ current_schema = OrderedDict(sorted(metrics.items()))
209
+
210
+ # Check if this is the first call for this metric_key
211
+ if metric_key not in self._spot_schemas:
212
+ # First call - establish schema
213
+ self._spot_schemas[metric_key] = current_schema
214
+
215
+ # Create CSV file with header
216
+ with open(csv_file, 'w', newline='') as f:
217
+ writer = csv.writer(f)
218
+ header = ['timestamp'] + list(current_schema.keys())
219
+ writer.writerow(header)
220
+
221
+ logger.debug(f"[INFO] Created spot metric file: {csv_file}")
222
+ logger.debug(f" Schema: {list(current_schema.keys())}")
223
+ else:
224
+ # Validate schema matches
225
+ expected_schema = self._spot_schemas[metric_key]
226
+ if set(current_schema.keys()) != set(expected_schema.keys()):
227
+ raise ValueError(
228
+ f"Schema mismatch for metric '{metric_key}'.\n"
229
+ f"Expected keys: {list(expected_schema.keys())}\n"
230
+ f"Provided keys: {list(current_schema.keys())}\n"
231
+ f"All spot() calls for the same metric_key must have identical metric keys."
232
+ )
233
+
234
+ # Append row to CSV
235
+ timestamp = time.time()
236
+ with open(csv_file, 'a', newline='') as f:
237
+ writer = csv.writer(f)
238
+ # Use the established schema order
239
+ schema = self._spot_schemas[metric_key]
240
+ row = [timestamp] + [metrics[key] for key in schema.keys()]
241
+ writer.writerow(row)
242
+
243
+ @staticmethod
244
+ def load_results(results_file: str) -> Dict[str, Any]:
245
+ """
246
+ Load results from JSON file.
247
+
248
+ Args:
249
+ results_file: Path to results.json
250
+
251
+ Returns:
252
+ Results dictionary
253
+ """
254
+ with open(results_file, 'r') as f:
255
+ return json.load(f)
@@ -0,0 +1,326 @@
1
+ """Main method runner with resource monitoring."""
2
+
3
+ import os
4
+ import sys
5
+ import json
6
+ import time
7
+ import subprocess
8
+ import threading
9
+ from datetime import datetime, timezone
10
+ from pathlib import Path
11
+ from typing import Optional, Dict, Any
12
+ import logging
13
+
14
+ from .monitor import ResourceMonitor
15
+ from .results import ResultWriter
16
+ from .subprocess_utils import run_with_realtime_output, save_output_to_file
17
+
18
+ logging.basicConfig(
19
+ level=logging.INFO,
20
+ format="%(asctime)s - %(levelname)s - %(message)s"
21
+ )
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class MethodRunner:
26
+ """
27
+ Wrapper for executing graph anomaly detection methods.
28
+
29
+ Features:
30
+ - Automatic resource monitoring (CPU, memory, GPU)
31
+ - Execution timing
32
+ - Result standardization
33
+ - Error handling
34
+
35
+ Usage in Dockerfile:
36
+ CMD ["python", "-m", "graflag_runner.runner"]
37
+
38
+ Environment variables required:
39
+ - DATA: Input dataset path
40
+ - EXP: Experiment output path
41
+ - METHOD_NAME: Method name
42
+ - COMMAND: Command to execute (e.g., "python main.py --dataset uci")
43
+ """
44
+
45
+ def __init__(
46
+ self,
47
+ data_dir: str,
48
+ exp_dir: str,
49
+ method_name: str,
50
+ command: str,
51
+ monitor_interval: float = 1.0,
52
+ pass_env_args: bool = False,
53
+ **kwargs
54
+ ):
55
+ """
56
+ Initialize method runner.
57
+
58
+ Args:
59
+ data_dir: Input dataset directory
60
+ exp_dir: Experiment output directory
61
+ method_name: Name of the method
62
+ command: Command to execute
63
+ monitor_interval: Resource monitoring interval in seconds (default: 1.0)
64
+ pass_env_args: Whether to extract env vars starting with _ and pass as CLI args (default: False)
65
+ **kwargs: Additional configuration
66
+ """
67
+ self.data_dir = Path(data_dir)
68
+ self.exp_dir = Path(exp_dir)
69
+ self.method_name = method_name
70
+ self.command = command
71
+ self.monitor_interval = monitor_interval
72
+ self.pass_env_args = pass_env_args
73
+ self.config = kwargs
74
+
75
+ # Extract environment variables starting with _ if requested
76
+ if self.pass_env_args:
77
+ self.command = self._build_command_with_env_args()
78
+
79
+ # Create experiment directory
80
+ self.exp_dir.mkdir(parents=True, exist_ok=True)
81
+
82
+ # Initialize resource monitor (uses spot() method now)
83
+ self.monitor = ResourceMonitor()
84
+
85
+ logger.info("=" * 60)
86
+ logger.info(f"GraFlag Runner - {self.method_name}")
87
+ logger.info("=" * 60)
88
+ logger.info(f"[INFO] Data: {self.data_dir}")
89
+ logger.info(f"[INFO] Output: {self.exp_dir}")
90
+ logger.info(f"[INFO] Command: {self.command}")
91
+ logger.info(f"[INFO] Monitor interval: {self.monitor_interval}s")
92
+ logger.info(f"[INFO] Pass env args: {self.pass_env_args}")
93
+ logger.info("")
94
+
95
+ def _build_command_with_env_args(self) -> str:
96
+ """
97
+ Extract environment variables starting with _ and append them as CLI arguments.
98
+
99
+ Example:
100
+ _BATCH_SIZE=128 -> --batch_size 128
101
+ _LEARNING_RATE=0.001 -> --learning_rate 0.001
102
+
103
+ Returns:
104
+ Command string with appended arguments
105
+ """
106
+ env_args = []
107
+
108
+ for key, value in os.environ.items():
109
+ if key.startswith("_"):
110
+ # Remove leading underscore and convert to lowercase
111
+ arg_name = key[1:].lower()
112
+ env_args.append(f"--{arg_name} {value}")
113
+
114
+ if env_args:
115
+ args_str = " ".join(env_args)
116
+ logger.info(f"[INFO] Extracted env args: {args_str}")
117
+ return f"{self.command} {args_str}"
118
+
119
+ return self.command
120
+
121
+ def _save_status(self, status: str, exec_time_ms: float = None,
122
+ resources: dict = None, exit_code: int = None,
123
+ error: str = None):
124
+ """Save execution status to status.json in the experiment directory."""
125
+ status_data = {
126
+ "status": status,
127
+ "timestamp": datetime.now(timezone.utc).isoformat(),
128
+ "method_name": self.method_name,
129
+ }
130
+ if exec_time_ms is not None:
131
+ status_data["exec_time_ms"] = round(exec_time_ms, 2)
132
+ if resources is not None:
133
+ status_data["resources"] = resources
134
+ if exit_code is not None:
135
+ status_data["exit_code"] = exit_code
136
+ if error is not None:
137
+ status_data["error"] = str(error)
138
+
139
+ status_file = self.exp_dir / "status.json"
140
+ try:
141
+ with open(status_file, 'w') as f:
142
+ json.dump(status_data, f, indent=2)
143
+ except Exception as e:
144
+ logger.warning(f"Failed to write status.json: {e}")
145
+
146
+ def run(self) -> Dict[str, Any]:
147
+ """
148
+ Execute method with monitoring.
149
+
150
+ Returns:
151
+ Dictionary with execution summary
152
+ """
153
+ # Save initial running status
154
+ self._save_status("running")
155
+
156
+ # Start monitoring in background thread
157
+ monitor_thread = threading.Thread(
158
+ target=self.monitor.start_monitoring,
159
+ args=(self.monitor_interval,),
160
+ daemon=True
161
+ )
162
+ monitor_thread.start()
163
+
164
+ # Record start time
165
+ start_time = time.time()
166
+
167
+ try:
168
+ logger.info("[INFO] Starting method execution...")
169
+
170
+ # Execute command with real-time output using utility function
171
+ return_code, captured_output = run_with_realtime_output(
172
+ command=self.command,
173
+ shell=True,
174
+ cwd=os.getcwd()
175
+ )
176
+
177
+ # Record end time
178
+ end_time = time.time()
179
+ exec_time_ms = (end_time - start_time) * 1000
180
+
181
+ # Stop monitoring
182
+ self.monitor.stop_monitoring()
183
+ monitor_thread.join(timeout=2)
184
+
185
+ # Get resource summary
186
+ resources = self.monitor.get_summary()
187
+
188
+ # Log results
189
+ logger.info("")
190
+ logger.info("[INFO] Execution Summary:")
191
+ logger.info(f" [INFO] Execution time: {exec_time_ms:.2f}ms")
192
+ logger.info(f" [INFO] Peak memory: {resources['peak_memory_mb']:.2f}MB")
193
+ if resources['peak_gpu_mb'] is not None:
194
+ logger.info(f" [INFO] Peak GPU memory: {resources['peak_gpu_mb']:.2f}MB")
195
+ logger.info("")
196
+
197
+ # Save captured output to file using utility function
198
+ output_file = self.exp_dir / "method_output.txt"
199
+ save_output_to_file(
200
+ output_lines=captured_output,
201
+ output_file=str(output_file),
202
+ header="=== METHOD OUTPUT ===\n"
203
+ )
204
+ logger.info(f"[INFO] Full output saved to: {output_file}")
205
+
206
+ if return_code == 0:
207
+ logger.info("[OK] Method execution completed successfully")
208
+ self._save_status("completed", exec_time_ms, resources, exit_code=0)
209
+ else:
210
+ logger.error(f"[FAIL] Method execution failed with exit code {return_code}")
211
+ logger.error(f"[INFO] Check {output_file} for details")
212
+ self._save_status("failed", exec_time_ms, resources, exit_code=return_code)
213
+ raise RuntimeError(f"Method execution failed with exit code {return_code}")
214
+
215
+ return {
216
+ "success": True,
217
+ "exec_time_ms": exec_time_ms,
218
+ "resources": resources,
219
+ "output_file": str(output_file)
220
+ }
221
+
222
+ except Exception as e:
223
+ # Stop monitoring on error
224
+ self.monitor.stop_monitoring()
225
+ # Save failed status (only if not already saved by return_code check)
226
+ status_file = self.exp_dir / "status.json"
227
+ try:
228
+ existing = json.loads(status_file.read_text())
229
+ if existing.get("status") == "running":
230
+ end_time = time.time()
231
+ self._save_status("failed", (end_time - start_time) * 1000, error=str(e))
232
+ except Exception:
233
+ self._save_status("failed", error=str(e))
234
+ logger.error(f"[FAIL] Execution error: {e}")
235
+ raise
236
+
237
+ @classmethod
238
+ def from_env(cls, pass_env_args: bool = False):
239
+ """
240
+ Create runner from environment variables.
241
+
242
+ Args:
243
+ pass_env_args: Whether to pass _* env vars as CLI args (default: False)
244
+
245
+ Environment variables:
246
+ - DATA: Input dataset path
247
+ - EXP: Experiment output path
248
+ - METHOD_NAME: Method name
249
+ - COMMAND: Command to execute
250
+ - MONITOR_INTERVAL: Resource monitoring interval in seconds (optional, default: 1.0)
251
+ - SUPPORTED_DATASETS: Comma-separated list of compatible dataset patterns (optional)
252
+ """
253
+ data_dir = os.environ.get("DATA")
254
+ exp_dir = os.environ.get("EXP")
255
+ method_name = os.environ.get("METHOD_NAME", "Unknown")
256
+ command = os.environ.get("COMMAND")
257
+ monitor_interval = float(os.environ.get("MONITOR_INTERVAL", "1.0"))
258
+ supported_datasets = os.environ.get("SUPPORTED_DATASETS", "")
259
+
260
+ if not all([data_dir, exp_dir, command]):
261
+ raise ValueError(
262
+ "Missing required environment variables: DATA, EXP, COMMAND"
263
+ )
264
+
265
+ # Validate dataset compatibility if SUPPORTED_DATASETS is specified
266
+ if supported_datasets:
267
+ dataset_name = os.path.basename(data_dir.rstrip('/'))
268
+ patterns = [p.strip() for p in supported_datasets.split(',') if p.strip()]
269
+
270
+ is_compatible = False
271
+ for pattern in patterns:
272
+ # Support wildcard patterns (e.g., "generaldyg_*", "btc_*")
273
+ if pattern.endswith('*'):
274
+ prefix = pattern[:-1]
275
+ if dataset_name.startswith(prefix):
276
+ is_compatible = True
277
+ break
278
+ elif pattern == dataset_name:
279
+ is_compatible = True
280
+ break
281
+
282
+ if not is_compatible:
283
+ logger.error(f"[FAIL] Dataset '{dataset_name}' is not compatible with method '{method_name}'")
284
+ logger.error(f" Supported datasets: {', '.join(patterns)}")
285
+ raise ValueError(
286
+ f"Dataset '{dataset_name}' is not compatible with method '{method_name}'. "
287
+ f"Supported datasets: {', '.join(patterns)}"
288
+ )
289
+
290
+ return cls(
291
+ data_dir=data_dir,
292
+ exp_dir=exp_dir,
293
+ method_name=method_name,
294
+ command=command,
295
+ monitor_interval=monitor_interval,
296
+ pass_env_args=pass_env_args
297
+ )
298
+
299
+
300
+ def main():
301
+ """CLI entry point for running as module."""
302
+ import argparse
303
+
304
+ parser = argparse.ArgumentParser(description="GraFlag Method Runner")
305
+ parser.add_argument(
306
+ "--pass-env-args",
307
+ action="store_true",
308
+ help="Extract environment variables starting with _ and pass as CLI arguments"
309
+ )
310
+
311
+ args = parser.parse_args()
312
+
313
+ try:
314
+ runner = MethodRunner.from_env(pass_env_args=args.pass_env_args)
315
+ summary = runner.run()
316
+
317
+ logger.info("[OK] Runner completed successfully")
318
+ sys.exit(0)
319
+
320
+ except Exception as e:
321
+ logger.error(f"[FAIL] Runner failed: {e}")
322
+ sys.exit(1)
323
+
324
+
325
+ if __name__ == "__main__":
326
+ main()
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,21 @@
1
+ """Setup script for graflag_runner package."""
2
+
3
+ from setuptools import setup
4
+
5
+ setup(
6
+ name="graflag_runner",
7
+ version="1.0.0",
8
+ description="Framework for executing graph anomaly detection methods with resource monitoring",
9
+ author="GraFlag Team",
10
+ packages=["graflag_runner"],
11
+ package_dir={"graflag_runner": "."},
12
+ install_requires=[
13
+ "psutil>=5.8.0",
14
+ ],
15
+ python_requires=">=3.7",
16
+ entry_points={
17
+ "console_scripts": [
18
+ "graflag-run=graflag_runner.runner:main",
19
+ ],
20
+ },
21
+ )
@@ -0,0 +1,160 @@
1
+ """Streaming utilities for handling large result data."""
2
+
3
+ import json
4
+ from typing import Iterator, Any, Union, List
5
+ from pathlib import Path
6
+ import logging
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ class StreamableArray:
12
+ """
13
+ Wrapper for large arrays that can be generated on-demand.
14
+
15
+ This allows methods to avoid loading entire large matrices into memory
16
+ by generating rows/values lazily as needed during JSON serialization.
17
+
18
+ Usage:
19
+ # Instead of building a huge list:
20
+ # scores = [[...], [...], ...] # 10GB in memory
21
+
22
+ # Use a generator:
23
+ def generate_rows():
24
+ for i in range(1000000):
25
+ yield compute_row(i)
26
+
27
+ scores = StreamableArray(generate_rows())
28
+ """
29
+
30
+ def __init__(self, generator: Iterator):
31
+ """
32
+ Initialize with a generator or iterator.
33
+
34
+ Args:
35
+ generator: Iterator that yields array elements (rows, values, etc.)
36
+ """
37
+ self.generator = generator
38
+
39
+ def __iter__(self):
40
+ """Make this object iterable."""
41
+ return self.generator
42
+
43
+
44
+ def stream_write_json(data: dict, output_path: Path, streamable_keys: List[str] = None):
45
+ """
46
+ Write JSON to file with support for streaming large arrays.
47
+
48
+ This function writes JSON incrementally, streaming any StreamableArray objects
49
+ row-by-row instead of loading them entirely into memory. Regular data is
50
+ serialized normally.
51
+
52
+ Args:
53
+ data: Dictionary to serialize to JSON
54
+ output_path: Path to output JSON file
55
+ streamable_keys: Keys in data that contain StreamableArray objects
56
+ (auto-detected if None)
57
+
58
+ Example:
59
+ data = {
60
+ "result_type": "TEMPORAL_EDGE_ANOMALY_SCORES",
61
+ "scores": StreamableArray(generate_scores()), # Large!
62
+ "timestamps": [0, 1, 2, ...], # Regular list
63
+ "metadata": {...}
64
+ }
65
+ stream_write_json(data, Path("results.json"))
66
+ """
67
+ if streamable_keys is None:
68
+ # Auto-detect StreamableArray objects
69
+ streamable_keys = [
70
+ key for key, value in data.items()
71
+ if isinstance(value, StreamableArray)
72
+ ]
73
+
74
+ logger.debug(f"Stream writing JSON with streamable keys: {streamable_keys}")
75
+
76
+ with open(output_path, 'w') as f:
77
+ f.write('{\n')
78
+
79
+ keys = list(data.keys())
80
+ for idx, key in enumerate(keys):
81
+ value = data[key]
82
+ is_last = (idx == len(keys) - 1)
83
+
84
+ # Write key
85
+ f.write(f' {json.dumps(key)}: ')
86
+
87
+ # Handle streamable arrays
88
+ if isinstance(value, StreamableArray):
89
+ _stream_write_array(f, value, indent=2)
90
+ else:
91
+ # Regular JSON serialization with proper indentation
92
+ serialized = json.dumps(value, indent=2)
93
+ # Indent all lines after the first
94
+ lines = serialized.split('\n')
95
+ f.write(lines[0])
96
+ for line in lines[1:]:
97
+ f.write('\n ' + line)
98
+
99
+ # Comma between fields
100
+ if not is_last:
101
+ f.write(',')
102
+ f.write('\n')
103
+
104
+ f.write('}\n')
105
+
106
+ logger.info(f"[OK] Streamed JSON written to: {output_path}")
107
+
108
+
109
+ def _stream_write_array(f, streamable: StreamableArray, indent: int = 0):
110
+ """
111
+ Internal function to stream write an array element by element.
112
+
113
+ Args:
114
+ f: File handle to write to
115
+ streamable: StreamableArray to serialize
116
+ indent: Indentation level (number of spaces)
117
+ """
118
+ indent_str = ' ' * indent
119
+ f.write('[\n')
120
+
121
+ first = True
122
+ row_count = 0
123
+
124
+ for element in streamable:
125
+ if not first:
126
+ f.write(',\n')
127
+
128
+ # Write element with indentation
129
+ f.write(f'{indent_str} ')
130
+ json.dump(element, f)
131
+
132
+ first = False
133
+ row_count += 1
134
+
135
+ # Progress logging for very large arrays (every 1000 rows)
136
+ if row_count % 1000 == 0:
137
+ logger.debug(f" ... streamed {row_count} rows")
138
+
139
+ f.write(f'\n{indent_str}]')
140
+
141
+ if row_count >= 1000:
142
+ logger.info(f" Streamed total: {row_count} rows")
143
+
144
+
145
+ def is_streamable(obj: Any) -> bool:
146
+ """
147
+ Check if an object is streamable (generator/iterator).
148
+
149
+ Args:
150
+ obj: Object to check
151
+
152
+ Returns:
153
+ True if object is a generator or iterator (excluding strings/lists)
154
+ """
155
+ return (
156
+ isinstance(obj, StreamableArray) or
157
+ (hasattr(obj, '__iter__') and
158
+ hasattr(obj, '__next__') and
159
+ not isinstance(obj, (str, bytes, list, tuple, dict)))
160
+ )
@@ -0,0 +1,166 @@
1
+ """Subprocess utilities for real-time output streaming."""
2
+
3
+ import os
4
+ import sys
5
+ import subprocess
6
+ from typing import List, Tuple, Optional, Dict
7
+
8
+
9
+ def run_with_realtime_output(
10
+ command: str,
11
+ shell: bool = True,
12
+ cwd: Optional[str] = None,
13
+ env: Optional[Dict[str, str]] = None,
14
+ stdin_passthrough: bool = True
15
+ ) -> Tuple[int, List[str]]:
16
+ """
17
+ Run a command with real-time output streaming while capturing all output.
18
+
19
+ This function streams subprocess output in real-time to stdout while also
20
+ capturing it for later use. Optionally forwards stdin from parent to subprocess,
21
+ enabling interactive processes and piped input.
22
+
23
+ Args:
24
+ command: Command to execute (string if shell=True, list if shell=False)
25
+ shell: Whether to execute through shell (default: True)
26
+ cwd: Working directory for the command (default: current directory)
27
+ env: Environment variables dict (default: copy of current environment)
28
+ stdin_passthrough: Forward stdin from parent to subprocess (default: True)
29
+ Set to False to disable stdin (subprocess gets None)
30
+
31
+ Returns:
32
+ Tuple of (return_code, captured_output_lines)
33
+ - return_code: Exit code of the process
34
+ - captured_output_lines: List of output lines (includes newlines)
35
+
36
+ Example:
37
+ >>> # Basic usage
38
+ >>> return_code, output = run_with_realtime_output("python train.py")
39
+ >>> if return_code == 0:
40
+ >>> with open("log.txt", "w") as f:
41
+ >>> f.writelines(output)
42
+
43
+ >>> # With piped input
44
+ >>> return_code, output = run_with_realtime_output(
45
+ >>> "python process.py",
46
+ >>> stdin_passthrough=True # Allows: echo "data" | python wrapper.py
47
+ >>> )
48
+
49
+ >>> # Interactive process
50
+ >>> return_code, output = run_with_realtime_output(
51
+ >>> "python interactive.py",
52
+ >>> stdin_passthrough=True # User can type input
53
+ >>> )
54
+ """
55
+ # Set environment variables to disable Python output buffering
56
+ if env is None:
57
+ env = os.environ.copy()
58
+ env['PYTHONUNBUFFERED'] = '1'
59
+
60
+ # Determine stdin handling
61
+ # If stdin_passthrough=True, inherit parent's stdin (allows piped input & interactive)
62
+ # If stdin_passthrough=False, subprocess gets no input (stdin=None)
63
+ stdin_config = None if stdin_passthrough else subprocess.DEVNULL
64
+
65
+ # Start subprocess with piped output and configurable stdin
66
+ process = subprocess.Popen(
67
+ command,
68
+ shell=shell,
69
+ stdin=stdin_config, # Inherit stdin or disable it
70
+ stdout=subprocess.PIPE,
71
+ stderr=subprocess.STDOUT, # Merge stderr into stdout
72
+ text=True,
73
+ bufsize=1, # Line buffered
74
+ universal_newlines=True,
75
+ env=env,
76
+ cwd=cwd
77
+ )
78
+
79
+ # Capture output while streaming it in real-time
80
+ captured_output = []
81
+ for line in process.stdout:
82
+ # Print to console (forwarded to parent)
83
+ print(line, end='', flush=True)
84
+ sys.stdout.flush() # Force flush
85
+ # Capture for later use
86
+ captured_output.append(line)
87
+
88
+ # Wait for process to complete
89
+ return_code = process.wait()
90
+
91
+ return return_code, captured_output
92
+
93
+
94
+ def run_command_list(
95
+ commands: List[str],
96
+ cwd: Optional[str] = None,
97
+ env: Optional[Dict[str, str]] = None,
98
+ stop_on_error: bool = True,
99
+ stdin_passthrough: bool = False
100
+ ) -> List[Tuple[str, int, List[str]]]:
101
+ """
102
+ Run multiple commands sequentially with real-time output.
103
+
104
+ Args:
105
+ commands: List of commands to execute
106
+ cwd: Working directory for all commands
107
+ env: Environment variables dict
108
+ stop_on_error: If True, stop executing after first failure
109
+ stdin_passthrough: Forward stdin to commands (default: False for batch jobs)
110
+
111
+ Returns:
112
+ List of tuples: [(command, return_code, output_lines), ...]
113
+
114
+ Example:
115
+ >>> results = run_command_list([
116
+ >>> "python prepare_data.py",
117
+ >>> "python train_model.py"
118
+ >>> ])
119
+ >>> for cmd, code, output in results:
120
+ >>> if code != 0:
121
+ >>> print(f"Failed: {cmd}")
122
+ """
123
+ results = []
124
+
125
+ for command in commands:
126
+ print(f"\n{'='*60}")
127
+ print(f"Running: {command}")
128
+ print(f"{'='*60}\n")
129
+
130
+ return_code, output = run_with_realtime_output(
131
+ command=command,
132
+ cwd=cwd,
133
+ env=env,
134
+ stdin_passthrough=stdin_passthrough
135
+ )
136
+
137
+ results.append((command, return_code, output))
138
+
139
+ if stop_on_error and return_code != 0:
140
+ print(f"\n[ERROR] Command failed with exit code {return_code}")
141
+ break
142
+
143
+ return results
144
+
145
+
146
+ def save_output_to_file(
147
+ output_lines: List[str],
148
+ output_file: str,
149
+ header: str = "=== OUTPUT ===\n"
150
+ ) -> None:
151
+ """
152
+ Save captured output to a file.
153
+
154
+ Args:
155
+ output_lines: List of output lines to save
156
+ output_file: Path to output file
157
+ header: Optional header to prepend to the file
158
+
159
+ Example:
160
+ >>> return_code, output = run_with_realtime_output("python train.py")
161
+ >>> save_output_to_file(output, "training.log", "=== TRAINING LOG ===\n")
162
+ """
163
+ with open(output_file, 'w') as f:
164
+ if header:
165
+ f.write(header)
166
+ f.writelines(output_lines)
File without changes