ado-git-repo-insights 1.2.1__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. ado_git_repo_insights/__init__.py +3 -3
  2. ado_git_repo_insights/cli.py +703 -354
  3. ado_git_repo_insights/config.py +186 -186
  4. ado_git_repo_insights/extractor/__init__.py +1 -1
  5. ado_git_repo_insights/extractor/ado_client.py +452 -246
  6. ado_git_repo_insights/extractor/pr_extractor.py +239 -239
  7. ado_git_repo_insights/ml/__init__.py +13 -0
  8. ado_git_repo_insights/ml/date_utils.py +70 -0
  9. ado_git_repo_insights/ml/forecaster.py +288 -0
  10. ado_git_repo_insights/ml/insights.py +497 -0
  11. ado_git_repo_insights/persistence/__init__.py +1 -1
  12. ado_git_repo_insights/persistence/database.py +193 -193
  13. ado_git_repo_insights/persistence/models.py +207 -145
  14. ado_git_repo_insights/persistence/repository.py +662 -376
  15. ado_git_repo_insights/transform/__init__.py +1 -1
  16. ado_git_repo_insights/transform/aggregators.py +950 -0
  17. ado_git_repo_insights/transform/csv_generator.py +132 -132
  18. ado_git_repo_insights/utils/__init__.py +1 -1
  19. ado_git_repo_insights/utils/datetime_utils.py +101 -101
  20. ado_git_repo_insights/utils/logging_config.py +172 -172
  21. ado_git_repo_insights/utils/run_summary.py +207 -206
  22. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/METADATA +56 -15
  23. ado_git_repo_insights-2.7.4.dist-info/RECORD +27 -0
  24. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/licenses/LICENSE +21 -21
  25. ado_git_repo_insights-1.2.1.dist-info/RECORD +0 -22
  26. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/WHEEL +0 -0
  27. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/entry_points.txt +0 -0
  28. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/top_level.txt +0 -0
@@ -1,172 +1,172 @@
1
- """Logging configuration with selective secret redaction.
2
-
3
- Provides console and JSONL logging formats with precise redaction rules.
4
- """
5
-
6
- from __future__ import annotations
7
-
8
- import json
9
- import logging
10
- import os
11
- import re
12
- from dataclasses import dataclass, field
13
- from pathlib import Path
14
- from typing import Any
15
-
16
-
17
- @dataclass
18
- class RedactionConfig:
19
- """Configuration for selective secret redaction."""
20
-
21
- # Known secret value patterns (regex)
22
- value_patterns: list[str] = field(
23
- default_factory=lambda: [
24
- r"[A-Za-z0-9]{52}", # Azure DevOps PAT format (52 chars)
25
- r"Bearer\s+[A-Za-z0-9\-._~+/]+=*", # Bearer tokens
26
- ]
27
- )
28
-
29
- # Explicit key deny-list (exact matches, case-insensitive)
30
- key_denylist: set[str] = field(
31
- default_factory=lambda: {
32
- "pat",
33
- "personal_access_token",
34
- "auth_header",
35
- "authorization",
36
- "webhook_url",
37
- "secret",
38
- "password",
39
- }
40
- )
41
-
42
- def should_redact_key(self, key: str) -> bool:
43
- """Check if a key should be redacted based on deny-list."""
44
- return key.lower() in self.key_denylist
45
-
46
- def redact_value(self, value: str) -> str:
47
- """Redact known secret patterns in a value."""
48
- result = value
49
- for pattern in self.value_patterns:
50
- result = re.sub(pattern, "***REDACTED***", result)
51
- return result
52
-
53
-
54
- class RedactingFormatter(logging.Formatter):
55
- """Formatter that redacts sensitive information."""
56
-
57
- def __init__(self, fmt: str | None = None, datefmt: str | None = None) -> None:
58
- super().__init__(fmt, datefmt)
59
- self.redaction_config = RedactionConfig()
60
-
61
- def format(self, record: logging.LogRecord) -> str:
62
- # Redact message
63
- if isinstance(record.msg, str):
64
- record.msg = self.redaction_config.redact_value(record.msg)
65
-
66
- # Redact args
67
- if record.args:
68
- record.args = tuple(
69
- self.redaction_config.redact_value(str(arg))
70
- if isinstance(arg, str)
71
- else arg
72
- for arg in record.args
73
- )
74
-
75
- return super().format(record)
76
-
77
-
78
- class JsonlHandler(logging.Handler):
79
- """Handler that writes structured JSONL log entries with redaction."""
80
-
81
- def __init__(self, log_file: Path) -> None:
82
- super().__init__()
83
- self.log_file = log_file
84
- self.redaction_config = RedactionConfig()
85
-
86
- # Set a basic formatter for timestamp formatting
87
- self.setFormatter(logging.Formatter())
88
-
89
- # Ensure parent directory exists
90
- self.log_file.parent.mkdir(parents=True, exist_ok=True)
91
-
92
- def emit(self, record: logging.LogRecord) -> None:
93
- try:
94
- # P1 Fix: Redact the message before writing to JSONL
95
- message = record.getMessage()
96
- redacted_message = self.redaction_config.redact_value(message)
97
-
98
- log_entry: dict[str, Any] = {
99
- "timestamp": self.formatter.formatTime(record)
100
- if self.formatter
101
- else "",
102
- "level": record.levelname,
103
- "logger": record.name,
104
- "message": redacted_message,
105
- }
106
-
107
- # Add extra fields if present (context dict)
108
- if hasattr(record, "extra") and isinstance(record.extra, dict):
109
- log_entry["context"] = self._redact_dict(record.extra)
110
-
111
- with self.log_file.open("a", encoding="utf-8") as f:
112
- f.write(json.dumps(log_entry) + "\n")
113
-
114
- except Exception:
115
- self.handleError(record)
116
-
117
- def _redact_dict(self, data: dict[str, Any]) -> dict[str, Any]:
118
- """Recursively redact sensitive keys/values in a dictionary."""
119
- result: dict[str, Any] = {}
120
- for key, value in data.items():
121
- if self.redaction_config.should_redact_key(key):
122
- result[key] = "***REDACTED***"
123
- elif isinstance(value, str):
124
- result[key] = self.redaction_config.redact_value(value)
125
- elif isinstance(value, dict):
126
- result[key] = self._redact_dict(value) # Recursive call
127
- else:
128
- result[key] = value
129
- return result
130
-
131
-
132
- @dataclass
133
- class LoggingConfig:
134
- """Configuration for logging setup."""
135
-
136
- format: str = "console" # "console" or "jsonl"
137
- artifacts_dir: Path = field(default_factory=lambda: Path("run_artifacts"))
138
- log_file: Path | None = None
139
-
140
-
141
- def setup_logging(config: LoggingConfig) -> None:
142
- """Configure logging based on format selection.
143
-
144
- Args:
145
- config: Logging configuration.
146
- """
147
- # Get root logger
148
- root_logger = logging.getLogger()
149
- root_logger.setLevel(logging.INFO)
150
-
151
- # Remove existing handlers
152
- root_logger.handlers.clear()
153
-
154
- if config.format == "console":
155
- # Console handler with redaction
156
- handler = logging.StreamHandler()
157
- formatter = RedactingFormatter(
158
- "%(asctime)s - %(levelname)s - %(message)s",
159
- )
160
- handler.setFormatter(formatter)
161
- root_logger.addHandler(handler)
162
-
163
- elif config.format == "jsonl":
164
- # JSONL file handler with redaction
165
- if config.log_file is None:
166
- config.log_file = config.artifacts_dir / f"run_{os.getpid()}.log.jsonl"
167
-
168
- jsonl_handler: logging.Handler = JsonlHandler(config.log_file)
169
- root_logger.addHandler(jsonl_handler)
170
-
171
- else:
172
- raise ValueError(f"Invalid log format: {config.format}")
1
+ """Logging configuration with selective secret redaction.
2
+
3
+ Provides console and JSONL logging formats with precise redaction rules.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import logging
10
+ import os
11
+ import re
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+
17
+ @dataclass
18
+ class RedactionConfig:
19
+ """Configuration for selective secret redaction."""
20
+
21
+ # Known secret value patterns (regex)
22
+ value_patterns: list[str] = field(
23
+ default_factory=lambda: [
24
+ r"[A-Za-z0-9]{52}", # Azure DevOps PAT format (52 chars)
25
+ r"Bearer\s+[A-Za-z0-9\-._~+/]+=*", # Bearer tokens
26
+ ]
27
+ )
28
+
29
+ # Explicit key deny-list (exact matches, case-insensitive)
30
+ key_denylist: set[str] = field(
31
+ default_factory=lambda: {
32
+ "pat",
33
+ "personal_access_token",
34
+ "auth_header",
35
+ "authorization",
36
+ "webhook_url",
37
+ "secret",
38
+ "password",
39
+ }
40
+ )
41
+
42
+ def should_redact_key(self, key: str) -> bool:
43
+ """Check if a key should be redacted based on deny-list."""
44
+ return key.lower() in self.key_denylist
45
+
46
+ def redact_value(self, value: str) -> str:
47
+ """Redact known secret patterns in a value."""
48
+ result = value
49
+ for pattern in self.value_patterns:
50
+ result = re.sub(pattern, "***REDACTED***", result)
51
+ return result
52
+
53
+
54
+ class RedactingFormatter(logging.Formatter):
55
+ """Formatter that redacts sensitive information."""
56
+
57
+ def __init__(self, fmt: str | None = None, datefmt: str | None = None) -> None:
58
+ super().__init__(fmt, datefmt)
59
+ self.redaction_config = RedactionConfig()
60
+
61
+ def format(self, record: logging.LogRecord) -> str:
62
+ # Redact message
63
+ if isinstance(record.msg, str):
64
+ record.msg = self.redaction_config.redact_value(record.msg)
65
+
66
+ # Redact args
67
+ if record.args:
68
+ record.args = tuple(
69
+ self.redaction_config.redact_value(str(arg))
70
+ if isinstance(arg, str)
71
+ else arg
72
+ for arg in record.args
73
+ )
74
+
75
+ return super().format(record)
76
+
77
+
78
+ class JsonlHandler(logging.Handler):
79
+ """Handler that writes structured JSONL log entries with redaction."""
80
+
81
+ def __init__(self, log_file: Path) -> None:
82
+ super().__init__()
83
+ self.log_file = log_file
84
+ self.redaction_config = RedactionConfig()
85
+
86
+ # Set a basic formatter for timestamp formatting
87
+ self.setFormatter(logging.Formatter())
88
+
89
+ # Ensure parent directory exists
90
+ self.log_file.parent.mkdir(parents=True, exist_ok=True)
91
+
92
+ def emit(self, record: logging.LogRecord) -> None:
93
+ try:
94
+ # P1 Fix: Redact the message before writing to JSONL
95
+ message = record.getMessage()
96
+ redacted_message = self.redaction_config.redact_value(message)
97
+
98
+ log_entry: dict[str, Any] = {
99
+ "timestamp": self.formatter.formatTime(record)
100
+ if self.formatter
101
+ else "",
102
+ "level": record.levelname,
103
+ "logger": record.name,
104
+ "message": redacted_message,
105
+ }
106
+
107
+ # Add extra fields if present (context dict)
108
+ if hasattr(record, "extra") and isinstance(record.extra, dict):
109
+ log_entry["context"] = self._redact_dict(record.extra)
110
+
111
+ with self.log_file.open("a", encoding="utf-8") as f:
112
+ f.write(json.dumps(log_entry) + "\n")
113
+
114
+ except Exception:
115
+ self.handleError(record)
116
+
117
+ def _redact_dict(self, data: dict[str, Any]) -> dict[str, Any]:
118
+ """Recursively redact sensitive keys/values in a dictionary."""
119
+ result: dict[str, Any] = {}
120
+ for key, value in data.items():
121
+ if self.redaction_config.should_redact_key(key):
122
+ result[key] = "***REDACTED***"
123
+ elif isinstance(value, str):
124
+ result[key] = self.redaction_config.redact_value(value)
125
+ elif isinstance(value, dict):
126
+ result[key] = self._redact_dict(value) # Recursive call
127
+ else:
128
+ result[key] = value
129
+ return result
130
+
131
+
132
+ @dataclass
133
+ class LoggingConfig:
134
+ """Configuration for logging setup."""
135
+
136
+ format: str = "console" # "console" or "jsonl"
137
+ artifacts_dir: Path = field(default_factory=lambda: Path("run_artifacts"))
138
+ log_file: Path | None = None
139
+
140
+
141
+ def setup_logging(config: LoggingConfig) -> None:
142
+ """Configure logging based on format selection.
143
+
144
+ Args:
145
+ config: Logging configuration.
146
+ """
147
+ # Get root logger
148
+ root_logger = logging.getLogger()
149
+ root_logger.setLevel(logging.INFO)
150
+
151
+ # Remove existing handlers
152
+ root_logger.handlers.clear()
153
+
154
+ if config.format == "console":
155
+ # Console handler with redaction
156
+ handler = logging.StreamHandler()
157
+ formatter = RedactingFormatter(
158
+ "%(asctime)s - %(levelname)s - %(message)s",
159
+ )
160
+ handler.setFormatter(formatter)
161
+ root_logger.addHandler(handler)
162
+
163
+ elif config.format == "jsonl":
164
+ # JSONL file handler with redaction
165
+ if config.log_file is None:
166
+ config.log_file = config.artifacts_dir / f"run_{os.getpid()}.log.jsonl"
167
+
168
+ jsonl_handler: logging.Handler = JsonlHandler(config.log_file)
169
+ root_logger.addHandler(jsonl_handler)
170
+
171
+ else:
172
+ raise ValueError(f"Invalid log format: {config.format}")