ado-git-repo-insights 1.2.1__py3-none-any.whl → 2.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ado_git_repo_insights/__init__.py +3 -3
- ado_git_repo_insights/cli.py +703 -354
- ado_git_repo_insights/config.py +186 -186
- ado_git_repo_insights/extractor/__init__.py +1 -1
- ado_git_repo_insights/extractor/ado_client.py +452 -246
- ado_git_repo_insights/extractor/pr_extractor.py +239 -239
- ado_git_repo_insights/ml/__init__.py +13 -0
- ado_git_repo_insights/ml/date_utils.py +70 -0
- ado_git_repo_insights/ml/forecaster.py +288 -0
- ado_git_repo_insights/ml/insights.py +497 -0
- ado_git_repo_insights/persistence/__init__.py +1 -1
- ado_git_repo_insights/persistence/database.py +193 -193
- ado_git_repo_insights/persistence/models.py +207 -145
- ado_git_repo_insights/persistence/repository.py +662 -376
- ado_git_repo_insights/transform/__init__.py +1 -1
- ado_git_repo_insights/transform/aggregators.py +950 -0
- ado_git_repo_insights/transform/csv_generator.py +132 -132
- ado_git_repo_insights/utils/__init__.py +1 -1
- ado_git_repo_insights/utils/datetime_utils.py +101 -101
- ado_git_repo_insights/utils/logging_config.py +172 -172
- ado_git_repo_insights/utils/run_summary.py +207 -206
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/METADATA +56 -15
- ado_git_repo_insights-2.7.4.dist-info/RECORD +27 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/licenses/LICENSE +21 -21
- ado_git_repo_insights-1.2.1.dist-info/RECORD +0 -22
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/WHEEL +0 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/entry_points.txt +0 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/top_level.txt +0 -0
|
@@ -1,172 +1,172 @@
|
|
|
1
|
-
"""Logging configuration with selective secret redaction.
|
|
2
|
-
|
|
3
|
-
Provides console and JSONL logging formats with precise redaction rules.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from __future__ import annotations
|
|
7
|
-
|
|
8
|
-
import json
|
|
9
|
-
import logging
|
|
10
|
-
import os
|
|
11
|
-
import re
|
|
12
|
-
from dataclasses import dataclass, field
|
|
13
|
-
from pathlib import Path
|
|
14
|
-
from typing import Any
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
@dataclass
|
|
18
|
-
class RedactionConfig:
|
|
19
|
-
"""Configuration for selective secret redaction."""
|
|
20
|
-
|
|
21
|
-
# Known secret value patterns (regex)
|
|
22
|
-
value_patterns: list[str] = field(
|
|
23
|
-
default_factory=lambda: [
|
|
24
|
-
r"[A-Za-z0-9]{52}", # Azure DevOps PAT format (52 chars)
|
|
25
|
-
r"Bearer\s+[A-Za-z0-9\-._~+/]+=*", # Bearer tokens
|
|
26
|
-
]
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
# Explicit key deny-list (exact matches, case-insensitive)
|
|
30
|
-
key_denylist: set[str] = field(
|
|
31
|
-
default_factory=lambda: {
|
|
32
|
-
"pat",
|
|
33
|
-
"personal_access_token",
|
|
34
|
-
"auth_header",
|
|
35
|
-
"authorization",
|
|
36
|
-
"webhook_url",
|
|
37
|
-
"secret",
|
|
38
|
-
"password",
|
|
39
|
-
}
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
def should_redact_key(self, key: str) -> bool:
|
|
43
|
-
"""Check if a key should be redacted based on deny-list."""
|
|
44
|
-
return key.lower() in self.key_denylist
|
|
45
|
-
|
|
46
|
-
def redact_value(self, value: str) -> str:
|
|
47
|
-
"""Redact known secret patterns in a value."""
|
|
48
|
-
result = value
|
|
49
|
-
for pattern in self.value_patterns:
|
|
50
|
-
result = re.sub(pattern, "***REDACTED***", result)
|
|
51
|
-
return result
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class RedactingFormatter(logging.Formatter):
|
|
55
|
-
"""Formatter that redacts sensitive information."""
|
|
56
|
-
|
|
57
|
-
def __init__(self, fmt: str | None = None, datefmt: str | None = None) -> None:
|
|
58
|
-
super().__init__(fmt, datefmt)
|
|
59
|
-
self.redaction_config = RedactionConfig()
|
|
60
|
-
|
|
61
|
-
def format(self, record: logging.LogRecord) -> str:
|
|
62
|
-
# Redact message
|
|
63
|
-
if isinstance(record.msg, str):
|
|
64
|
-
record.msg = self.redaction_config.redact_value(record.msg)
|
|
65
|
-
|
|
66
|
-
# Redact args
|
|
67
|
-
if record.args:
|
|
68
|
-
record.args = tuple(
|
|
69
|
-
self.redaction_config.redact_value(str(arg))
|
|
70
|
-
if isinstance(arg, str)
|
|
71
|
-
else arg
|
|
72
|
-
for arg in record.args
|
|
73
|
-
)
|
|
74
|
-
|
|
75
|
-
return super().format(record)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
class JsonlHandler(logging.Handler):
|
|
79
|
-
"""Handler that writes structured JSONL log entries with redaction."""
|
|
80
|
-
|
|
81
|
-
def __init__(self, log_file: Path) -> None:
|
|
82
|
-
super().__init__()
|
|
83
|
-
self.log_file = log_file
|
|
84
|
-
self.redaction_config = RedactionConfig()
|
|
85
|
-
|
|
86
|
-
# Set a basic formatter for timestamp formatting
|
|
87
|
-
self.setFormatter(logging.Formatter())
|
|
88
|
-
|
|
89
|
-
# Ensure parent directory exists
|
|
90
|
-
self.log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
91
|
-
|
|
92
|
-
def emit(self, record: logging.LogRecord) -> None:
|
|
93
|
-
try:
|
|
94
|
-
# P1 Fix: Redact the message before writing to JSONL
|
|
95
|
-
message = record.getMessage()
|
|
96
|
-
redacted_message = self.redaction_config.redact_value(message)
|
|
97
|
-
|
|
98
|
-
log_entry: dict[str, Any] = {
|
|
99
|
-
"timestamp": self.formatter.formatTime(record)
|
|
100
|
-
if self.formatter
|
|
101
|
-
else "",
|
|
102
|
-
"level": record.levelname,
|
|
103
|
-
"logger": record.name,
|
|
104
|
-
"message": redacted_message,
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
# Add extra fields if present (context dict)
|
|
108
|
-
if hasattr(record, "extra") and isinstance(record.extra, dict):
|
|
109
|
-
log_entry["context"] = self._redact_dict(record.extra)
|
|
110
|
-
|
|
111
|
-
with self.log_file.open("a", encoding="utf-8") as f:
|
|
112
|
-
f.write(json.dumps(log_entry) + "\n")
|
|
113
|
-
|
|
114
|
-
except Exception:
|
|
115
|
-
self.handleError(record)
|
|
116
|
-
|
|
117
|
-
def _redact_dict(self, data: dict[str, Any]) -> dict[str, Any]:
|
|
118
|
-
"""Recursively redact sensitive keys/values in a dictionary."""
|
|
119
|
-
result: dict[str, Any] = {}
|
|
120
|
-
for key, value in data.items():
|
|
121
|
-
if self.redaction_config.should_redact_key(key):
|
|
122
|
-
result[key] = "***REDACTED***"
|
|
123
|
-
elif isinstance(value, str):
|
|
124
|
-
result[key] = self.redaction_config.redact_value(value)
|
|
125
|
-
elif isinstance(value, dict):
|
|
126
|
-
result[key] = self._redact_dict(value) # Recursive call
|
|
127
|
-
else:
|
|
128
|
-
result[key] = value
|
|
129
|
-
return result
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
@dataclass
|
|
133
|
-
class LoggingConfig:
|
|
134
|
-
"""Configuration for logging setup."""
|
|
135
|
-
|
|
136
|
-
format: str = "console" # "console" or "jsonl"
|
|
137
|
-
artifacts_dir: Path = field(default_factory=lambda: Path("run_artifacts"))
|
|
138
|
-
log_file: Path | None = None
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def setup_logging(config: LoggingConfig) -> None:
|
|
142
|
-
"""Configure logging based on format selection.
|
|
143
|
-
|
|
144
|
-
Args:
|
|
145
|
-
config: Logging configuration.
|
|
146
|
-
"""
|
|
147
|
-
# Get root logger
|
|
148
|
-
root_logger = logging.getLogger()
|
|
149
|
-
root_logger.setLevel(logging.INFO)
|
|
150
|
-
|
|
151
|
-
# Remove existing handlers
|
|
152
|
-
root_logger.handlers.clear()
|
|
153
|
-
|
|
154
|
-
if config.format == "console":
|
|
155
|
-
# Console handler with redaction
|
|
156
|
-
handler = logging.StreamHandler()
|
|
157
|
-
formatter = RedactingFormatter(
|
|
158
|
-
"%(asctime)s - %(levelname)s - %(message)s",
|
|
159
|
-
)
|
|
160
|
-
handler.setFormatter(formatter)
|
|
161
|
-
root_logger.addHandler(handler)
|
|
162
|
-
|
|
163
|
-
elif config.format == "jsonl":
|
|
164
|
-
# JSONL file handler with redaction
|
|
165
|
-
if config.log_file is None:
|
|
166
|
-
config.log_file = config.artifacts_dir / f"run_{os.getpid()}.log.jsonl"
|
|
167
|
-
|
|
168
|
-
jsonl_handler: logging.Handler = JsonlHandler(config.log_file)
|
|
169
|
-
root_logger.addHandler(jsonl_handler)
|
|
170
|
-
|
|
171
|
-
else:
|
|
172
|
-
raise ValueError(f"Invalid log format: {config.format}")
|
|
1
|
+
"""Logging configuration with selective secret redaction.
|
|
2
|
+
|
|
3
|
+
Provides console and JSONL logging formats with precise redaction rules.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
import os
|
|
11
|
+
import re
|
|
12
|
+
from dataclasses import dataclass, field
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class RedactionConfig:
|
|
19
|
+
"""Configuration for selective secret redaction."""
|
|
20
|
+
|
|
21
|
+
# Known secret value patterns (regex)
|
|
22
|
+
value_patterns: list[str] = field(
|
|
23
|
+
default_factory=lambda: [
|
|
24
|
+
r"[A-Za-z0-9]{52}", # Azure DevOps PAT format (52 chars)
|
|
25
|
+
r"Bearer\s+[A-Za-z0-9\-._~+/]+=*", # Bearer tokens
|
|
26
|
+
]
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Explicit key deny-list (exact matches, case-insensitive)
|
|
30
|
+
key_denylist: set[str] = field(
|
|
31
|
+
default_factory=lambda: {
|
|
32
|
+
"pat",
|
|
33
|
+
"personal_access_token",
|
|
34
|
+
"auth_header",
|
|
35
|
+
"authorization",
|
|
36
|
+
"webhook_url",
|
|
37
|
+
"secret",
|
|
38
|
+
"password",
|
|
39
|
+
}
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
def should_redact_key(self, key: str) -> bool:
|
|
43
|
+
"""Check if a key should be redacted based on deny-list."""
|
|
44
|
+
return key.lower() in self.key_denylist
|
|
45
|
+
|
|
46
|
+
def redact_value(self, value: str) -> str:
|
|
47
|
+
"""Redact known secret patterns in a value."""
|
|
48
|
+
result = value
|
|
49
|
+
for pattern in self.value_patterns:
|
|
50
|
+
result = re.sub(pattern, "***REDACTED***", result)
|
|
51
|
+
return result
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class RedactingFormatter(logging.Formatter):
|
|
55
|
+
"""Formatter that redacts sensitive information."""
|
|
56
|
+
|
|
57
|
+
def __init__(self, fmt: str | None = None, datefmt: str | None = None) -> None:
|
|
58
|
+
super().__init__(fmt, datefmt)
|
|
59
|
+
self.redaction_config = RedactionConfig()
|
|
60
|
+
|
|
61
|
+
def format(self, record: logging.LogRecord) -> str:
|
|
62
|
+
# Redact message
|
|
63
|
+
if isinstance(record.msg, str):
|
|
64
|
+
record.msg = self.redaction_config.redact_value(record.msg)
|
|
65
|
+
|
|
66
|
+
# Redact args
|
|
67
|
+
if record.args:
|
|
68
|
+
record.args = tuple(
|
|
69
|
+
self.redaction_config.redact_value(str(arg))
|
|
70
|
+
if isinstance(arg, str)
|
|
71
|
+
else arg
|
|
72
|
+
for arg in record.args
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
return super().format(record)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class JsonlHandler(logging.Handler):
|
|
79
|
+
"""Handler that writes structured JSONL log entries with redaction."""
|
|
80
|
+
|
|
81
|
+
def __init__(self, log_file: Path) -> None:
|
|
82
|
+
super().__init__()
|
|
83
|
+
self.log_file = log_file
|
|
84
|
+
self.redaction_config = RedactionConfig()
|
|
85
|
+
|
|
86
|
+
# Set a basic formatter for timestamp formatting
|
|
87
|
+
self.setFormatter(logging.Formatter())
|
|
88
|
+
|
|
89
|
+
# Ensure parent directory exists
|
|
90
|
+
self.log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
91
|
+
|
|
92
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
93
|
+
try:
|
|
94
|
+
# P1 Fix: Redact the message before writing to JSONL
|
|
95
|
+
message = record.getMessage()
|
|
96
|
+
redacted_message = self.redaction_config.redact_value(message)
|
|
97
|
+
|
|
98
|
+
log_entry: dict[str, Any] = {
|
|
99
|
+
"timestamp": self.formatter.formatTime(record)
|
|
100
|
+
if self.formatter
|
|
101
|
+
else "",
|
|
102
|
+
"level": record.levelname,
|
|
103
|
+
"logger": record.name,
|
|
104
|
+
"message": redacted_message,
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
# Add extra fields if present (context dict)
|
|
108
|
+
if hasattr(record, "extra") and isinstance(record.extra, dict):
|
|
109
|
+
log_entry["context"] = self._redact_dict(record.extra)
|
|
110
|
+
|
|
111
|
+
with self.log_file.open("a", encoding="utf-8") as f:
|
|
112
|
+
f.write(json.dumps(log_entry) + "\n")
|
|
113
|
+
|
|
114
|
+
except Exception:
|
|
115
|
+
self.handleError(record)
|
|
116
|
+
|
|
117
|
+
def _redact_dict(self, data: dict[str, Any]) -> dict[str, Any]:
|
|
118
|
+
"""Recursively redact sensitive keys/values in a dictionary."""
|
|
119
|
+
result: dict[str, Any] = {}
|
|
120
|
+
for key, value in data.items():
|
|
121
|
+
if self.redaction_config.should_redact_key(key):
|
|
122
|
+
result[key] = "***REDACTED***"
|
|
123
|
+
elif isinstance(value, str):
|
|
124
|
+
result[key] = self.redaction_config.redact_value(value)
|
|
125
|
+
elif isinstance(value, dict):
|
|
126
|
+
result[key] = self._redact_dict(value) # Recursive call
|
|
127
|
+
else:
|
|
128
|
+
result[key] = value
|
|
129
|
+
return result
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
@dataclass
|
|
133
|
+
class LoggingConfig:
|
|
134
|
+
"""Configuration for logging setup."""
|
|
135
|
+
|
|
136
|
+
format: str = "console" # "console" or "jsonl"
|
|
137
|
+
artifacts_dir: Path = field(default_factory=lambda: Path("run_artifacts"))
|
|
138
|
+
log_file: Path | None = None
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def setup_logging(config: LoggingConfig) -> None:
|
|
142
|
+
"""Configure logging based on format selection.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
config: Logging configuration.
|
|
146
|
+
"""
|
|
147
|
+
# Get root logger
|
|
148
|
+
root_logger = logging.getLogger()
|
|
149
|
+
root_logger.setLevel(logging.INFO)
|
|
150
|
+
|
|
151
|
+
# Remove existing handlers
|
|
152
|
+
root_logger.handlers.clear()
|
|
153
|
+
|
|
154
|
+
if config.format == "console":
|
|
155
|
+
# Console handler with redaction
|
|
156
|
+
handler = logging.StreamHandler()
|
|
157
|
+
formatter = RedactingFormatter(
|
|
158
|
+
"%(asctime)s - %(levelname)s - %(message)s",
|
|
159
|
+
)
|
|
160
|
+
handler.setFormatter(formatter)
|
|
161
|
+
root_logger.addHandler(handler)
|
|
162
|
+
|
|
163
|
+
elif config.format == "jsonl":
|
|
164
|
+
# JSONL file handler with redaction
|
|
165
|
+
if config.log_file is None:
|
|
166
|
+
config.log_file = config.artifacts_dir / f"run_{os.getpid()}.log.jsonl"
|
|
167
|
+
|
|
168
|
+
jsonl_handler: logging.Handler = JsonlHandler(config.log_file)
|
|
169
|
+
root_logger.addHandler(jsonl_handler)
|
|
170
|
+
|
|
171
|
+
else:
|
|
172
|
+
raise ValueError(f"Invalid log format: {config.format}")
|