git-hot 0.6__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1870 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ # Copyright 1996-2026 Diomidis Spinellis
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ #
17
+ #
18
+ # Parse the output of
19
+ # git log -M -m --pretty=tformat:'commit %H %ct' --topo-order --reverse -U0
20
+ # to track the lifetime of individual lines
21
+ #
22
+
23
+ import argparse
24
+ import builtins
25
+ import datetime
26
+ import json
27
+ import os
28
+ import re
29
+ import shlex
30
+ import shutil
31
+ import statistics
32
+ import subprocess
33
+ import sys
34
+ from typing import Dict, Iterator
35
+
36
+ VERSION = "0.1"
37
+ ESCAPED_QUOTE = "\001"
38
+
39
+ class Color():
40
+ def __init__(self, args):
41
+ color_mode = getattr(args, "color", None)
42
+ if color_mode == "always":
43
+ self.use_color = True
44
+ elif color_mode == "never":
45
+ self.use_color = False
46
+ else:
47
+ self.use_color = sys.stdout.isatty()
48
+
49
+ @staticmethod
50
+ def _ansi_fg(n: int) -> str:
51
+ """Return an ANSI escape setting foreground color to n."""
52
+ return f"\033[38;5;{n}m"
53
+
54
+ def reset(self) -> str:
55
+ """Reset any set color."""
56
+ if not self.use_color:
57
+ return ""
58
+ else:
59
+ return "\033[0m"
60
+
61
+ def get(self, quartile: int) -> str:
62
+ """Return a coloring string associated with the specified quartile."""
63
+ if not self.use_color:
64
+ return ""
65
+ if quartile <= 0:
66
+ return self.reset()
67
+ if quartile == 1: # Bottom 25th percentile
68
+ return self._ansi_fg(33) # Light blue; cold; few
69
+ elif quartile == 2: # 25th-50th percentile
70
+ return self._ansi_fg(10) # Green; some
71
+ elif quartile == 3: # 50th to 75th percentile
72
+ return self._ansi_fg(11) # Yellow; many
73
+ else: # Top 25th percentile
74
+ return self._ansi_fg(9) # Bright red; red-hot; tons
75
+
76
+ def color(self, quartile: int) -> str:
77
+ return self.get(quartile)
78
+
79
+ def wrap(self, text: str, quartile: int) -> str:
80
+ prefix = self.color(quartile)
81
+ if not prefix:
82
+ return text
83
+ return f"{prefix}{text}{self.reset()}"
84
+
85
+
86
+ class ProcessingError(Exception):
87
+ """Fatal processing error reported without an exception trace."""
88
+
89
+ class FileDetails:
90
+ """Details tracked for a file while processing the diff stream."""
91
+
92
+ def __init__(self, path, lines=None, binary=False, change_lifetimes=None):
93
+ self.path = path
94
+ self.lines = list(lines) if lines is not None else []
95
+ self.binary = binary
96
+ self.change_lifetimes = list(change_lifetimes) if change_lifetimes is not None else []
97
+
98
+ def copy(self, path=None, change_lifetimes=None):
99
+ return FileDetails(
100
+ self.path if path is None else path,
101
+ [line.copy() for line in self.lines],
102
+ self.binary,
103
+ self.change_lifetimes if change_lifetimes is None else change_lifetimes,
104
+ )
105
+
106
+
107
+ class LineDetails:
108
+ """Details about a line's content, lifetime metadata, and composition."""
109
+
110
+ def __init__(
111
+ self,
112
+ content="",
113
+ birth_timestamp=None,
114
+ birth_hash=None,
115
+ content_history=None,
116
+ churn_count=0,
117
+ change_lifetimes=None,
118
+ delta=None,
119
+ length=0,
120
+ startspace=0,
121
+ string=0,
122
+ comment=0,
123
+ comma=0,
124
+ bracket=0,
125
+ access=0,
126
+ assignment=0,
127
+ scope=0,
128
+ array=0,
129
+ logical=0,
130
+ ):
131
+ self.content = content
132
+ self.birth_timestamp = birth_timestamp
133
+ self.birth_hash = birth_hash
134
+ self.content_history = list(content_history) if content_history is not None else [content]
135
+ self.churn_count = churn_count
136
+ self.change_lifetimes = list(change_lifetimes) if change_lifetimes is not None else []
137
+ self.delta = delta
138
+ self.length = length
139
+ self.startspace = startspace
140
+ self.string = string
141
+ self.comment = comment
142
+ self.comma = comma
143
+ self.bracket = bracket
144
+ self.access = access
145
+ self.assignment = assignment
146
+ self.scope = scope
147
+ self.array = array
148
+ self.logical = logical
149
+
150
+ def __str__(self):
151
+ return (
152
+ f"{self.length} {self.startspace} {self.string} {self.comment} "
153
+ f"{self.comma} {self.bracket} {self.access} {self.assignment} "
154
+ f"{self.scope} {self.array} {self.logical}"
155
+ )
156
+
157
+ def copy(self):
158
+ return LineDetails(
159
+ content=self.content,
160
+ birth_timestamp=self.birth_timestamp,
161
+ birth_hash=self.birth_hash,
162
+ content_history=self.content_history,
163
+ churn_count=self.churn_count,
164
+ change_lifetimes=self.change_lifetimes,
165
+ delta=self.delta,
166
+ length=self.length,
167
+ startspace=self.startspace,
168
+ string=self.string,
169
+ comment=self.comment,
170
+ comma=self.comma,
171
+ bracket=self.bracket,
172
+ access=self.access,
173
+ assignment=self.assignment,
174
+ scope=self.scope,
175
+ array=self.array,
176
+ logical=self.logical,
177
+ )
178
+
179
+ def render_record(self, args):
180
+ parts = [str(self.birth_timestamp)]
181
+ if self.delta is not None:
182
+ parts.append(str(self.delta))
183
+ if args.line_details:
184
+ parts.extend(["L", str(self)])
185
+ elif args.tokens:
186
+ parts.append(self.content.rstrip("\n"))
187
+ return " ".join(parts)
188
+
189
+ def render_deleted(self, args, death_timestamp):
190
+ return f"{self.render_record(args)} {death_timestamp}"
191
+
192
+ def render_alive(self, args):
193
+ if args.compressed:
194
+ return self.render_record(args)
195
+ return f"{self.render_record(args)} alive NA"
196
+
197
+ class InputReader:
198
+ def __init__(self):
199
+ self.line_number = 0
200
+ self.paths = None
201
+ self.line_iterator = None
202
+ self._index = 0
203
+ self._current = None
204
+ self._close_current = False
205
+
206
+ @classmethod
207
+ def from_paths(cls, paths):
208
+ """Construct an object to return lines from the specified paths."""
209
+ instance = cls()
210
+ instance.paths = paths
211
+ instance.line_iterator = None
212
+ return instance
213
+
214
+ @classmethod
215
+ def from_iterator(cls, iterator):
216
+ """Construct an object to return lines from the specified iterator."""
217
+ instance = cls()
218
+ instance.paths = None
219
+ instance.line_iterator = iterator
220
+ return instance
221
+
222
+ def close(self):
223
+ if self._current is not None and self._close_current:
224
+ self._current.close()
225
+ self._current = None
226
+ self._close_current = False
227
+
228
+ def _open_next(self):
229
+ self.close()
230
+ if self._index >= len(self.paths):
231
+ return False
232
+ path = self.paths[self._index]
233
+ self._index += 1
234
+ self._current = open(path, "r", encoding="utf-8", errors="surrogateescape", newline="")
235
+ self._close_current = True
236
+ return True
237
+
238
+ def _read_raw_from_paths(self):
239
+ """Return the next read line from the files specified in paths."""
240
+ while True:
241
+ if self._current is None:
242
+ if self.paths:
243
+ if not self._open_next():
244
+ return None
245
+ else:
246
+ self._current = utf8_stdin()
247
+ self._close_current = False
248
+ line = self._current.readline()
249
+ if line != "":
250
+ self.line_number += 1
251
+ return line
252
+ if not self.paths:
253
+ return None
254
+ if not self._open_next():
255
+ return None
256
+
257
+ def _read_raw_from_iterator(self):
258
+ """Return the next read line from the line_iterator iterator."""
259
+ line = next(self.line_iterator, "")
260
+ if line != "":
261
+ self.line_number += 1
262
+ return line
263
+ return None
264
+
265
+ def read_raw(self):
266
+ """Return the next read line, including the trailing newline."""
267
+ if self.paths is not None:
268
+ return self._read_raw_from_paths()
269
+ else:
270
+ return self._read_raw_from_iterator()
271
+
272
+ def read_chomp(self):
273
+ """Return the next read line, without any trailing newling."""
274
+ line = self.read_raw()
275
+ return None if line is None else chomp(line)
276
+
277
+
278
+ def chomp(line):
279
+ return line[:-1] if line.endswith("\n") else line
280
+
281
+
282
+ def utf8_stdin():
283
+ if hasattr(sys.stdin, "reconfigure"):
284
+ sys.stdin.reconfigure(encoding="utf-8", errors="surrogateescape", newline="")
285
+ return sys.stdin
286
+ return open(
287
+ sys.stdin.fileno(),
288
+ "r",
289
+ encoding="utf-8",
290
+ errors="surrogateescape",
291
+ newline="",
292
+ closefd=False,
293
+ )
294
+
295
+
296
+ # Return undef or true depending on whether the specified
297
+ # debug option is set
298
+ def debug_option(options, opt):
299
+ if options is None:
300
+ return False
301
+ return re.search(re.escape(opt), options) is not None
302
+
303
+
304
+ # Return a diff range as a [start, end) interval
305
+ def range_parse(diff_range):
306
+ match = re.search(r"[+-](\d+),(\d+)$", diff_range)
307
+ if match:
308
+ start = int(match.group(1))
309
+ count = int(match.group(2))
310
+ if count == 0:
311
+ return (0, 0)
312
+ return (start - 1, start + count - 1)
313
+ match = re.search(r"[+-](\d+)$", diff_range)
314
+ if match:
315
+ start = int(match.group(1))
316
+ return (start - 1, start)
317
+ raise ProcessingError("Expecting a diff range")
318
+
319
+
320
+ # Return true if we are supposed to output details regarding the specified file
321
+ # (if no -s option was passed or the file contains source code)
322
+ def output_source_code(name, source_only=False):
323
+ if not source_only:
324
+ return True
325
+ # Keep tokenize.pl:tokenize, lifetime.pl:output_source_code,
326
+ # repo-metrics-report.sh, analyze-moves.sh in sync.
327
+ return (
328
+ re.search(
329
+ r"\.(C|c|cc|cpp|cs|cxx|go|hh|hpp|h\+\+|c\+\+|h|H|hxx|"
330
+ r"java|((php[3457s]?)|pht|php-s)|py|rs)$",
331
+ name,
332
+ )
333
+ is not None
334
+ )
335
+
336
+
337
+ # Change escaped quotes into \001 so that the real ones can be used as delimiters
338
+ def hide_escaped_quotes(text):
339
+ return re.sub(r'([^\\])\\"', r"\1" + ESCAPED_QUOTE, text)
340
+
341
+
342
+ # Fix filename with embedded quotes and escapes
343
+ def unquote_unescape(name):
344
+ if '"' not in name:
345
+ return name
346
+ name = re.sub(r'([^\\])\\"', r"\1" + ESCAPED_QUOTE, name)
347
+ name = name.replace('"', "")
348
+ return unescape(name)
349
+
350
+
351
+ # Remove escapes and escaped quotes from the passed file name
352
+ def unescape(name):
353
+ def octal(match):
354
+ data = bytes(
355
+ int(part[1:], 8) for part in re.findall(r"\\[0-7]{3}", match.group(0))
356
+ )
357
+ return data.decode("utf-8", errors="surrogateescape")
358
+
359
+ name = name.replace(ESCAPED_QUOTE, '"')
360
+ name = name.replace(r"\t", "\t")
361
+ name = name.replace(r"\n", "\n")
362
+ name = name.replace(r"\"", '"')
363
+ name = re.sub(r"(?:\\[0-7]{3})+", octal, name)
364
+ name = name.replace(r"\\", "\\") # Must be last
365
+ return name
366
+
367
+
368
+ def count_pattern(pattern, text):
369
+ return len(re.findall(pattern, text))
370
+
371
+
372
+ # Return details about the line's composition
373
+ # The values returned appear in the end of this function
374
+ def line_details(line):
375
+ text = line
376
+ length = len(text)
377
+
378
+ # Count and remove strings
379
+ string = 0
380
+ while True:
381
+ text, count = re.subn(r'"[^"]*"', "", text, count=1)
382
+ if count == 0:
383
+ break
384
+ string += 1
385
+ while True:
386
+ text, count = re.subn(r"'[^']*'", "", text, count=1)
387
+ if count == 0:
388
+ break
389
+ string += 1
390
+
391
+ # Remove comments
392
+ comment = 0
393
+ for pattern in (r"/\*.*", r"#.*", r"//.*"):
394
+ new_text, count = re.subn(pattern, "", text, count=1)
395
+ if count:
396
+ text = new_text
397
+ comment = 1
398
+ break
399
+
400
+ # Spaces (and expanded tabs) at the beginning of the line
401
+ text = text.expandtabs(8)
402
+ match = re.match(r"^( *)", text)
403
+ startspace = len(match.group(1))
404
+
405
+ comma = count_pattern(r",", text)
406
+ bracket = count_pattern(r"\(", text)
407
+ access = count_pattern(r"\.[^0-9]|->", text)
408
+ assignment = count_pattern(r"[^<>!~=]=[^=]|<<=|>>=", text)
409
+ scope = count_pattern(r"\{|(:\s*$)", text)
410
+ # String (done earlier)
411
+ # Structure member access (combined with access)
412
+ # * can be pointer dereference or multiplication; ignore
413
+ # "if" ignore
414
+ array = count_pattern(r"\[", text)
415
+ # Comments (done earlier)
416
+ logical = count_pattern(
417
+ r"==|[^>]>=|[^<]<=|!=|[^<]<[^<]|[^>\-]>[^>]|\!|\|\||\&\&|\bor\b|\band\b|\bnot\b|\bis\b",
418
+ text,
419
+ )
420
+ return LineDetails(
421
+ content=line,
422
+ length=length,
423
+ startspace=startspace,
424
+ string=string,
425
+ comment=comment,
426
+ comma=comma,
427
+ bracket=bracket,
428
+ access=access,
429
+ assignment=assignment,
430
+ scope=scope,
431
+ array=array,
432
+ logical=logical,
433
+ )
434
+
435
+
436
+ def print_stderr_line(text):
437
+ try:
438
+ print(text, file=sys.stderr)
439
+ except UnicodeEncodeError:
440
+ encoding = sys.stderr.encoding or "utf-8"
441
+ data = f"{text}\n".encode(encoding, errors="backslashreplace")
442
+ if hasattr(sys.stderr, "buffer"):
443
+ sys.stderr.buffer.write(data)
444
+ sys.stderr.buffer.flush()
445
+ else:
446
+ sys.stderr.write(data.decode(encoding, errors="replace"))
447
+
448
+
449
+ def round_days(seconds):
450
+ return int((seconds / 86400.0) + 0.5)
451
+
452
+
453
+ def days(seconds):
454
+ return round_days(seconds)
455
+
456
+
457
+ def isodate(epoch_seconds):
458
+ return datetime.datetime.utcfromtimestamp(epoch_seconds).strftime("%Y-%m-%d")
459
+
460
+
461
+ def utf8_surrogateescape_text():
462
+ """Return subprocess text-mode arguments matching the repo's I/O policy."""
463
+ return {
464
+ "text": True,
465
+ "encoding": "utf-8",
466
+ "errors": "surrogateescape",
467
+ }
468
+
469
+
470
+ def require_flat_values(values):
471
+ """Return a flat homogeneous sequence or raise for nested values."""
472
+ if not isinstance(values, (list, tuple)):
473
+ raise TypeError("aggregate functions require a flat sequence")
474
+ # Values are assumed to be homogeneous, so sampling the first is enough.
475
+ if values and isinstance(values[0], (list, tuple)):
476
+ raise TypeError("nested sequences require explicit aggregation")
477
+ return values
478
+
479
+
480
+ def median(values):
481
+ """Return the integer median of a flat sequence."""
482
+ ordered = sorted(require_flat_values(values))
483
+ if not ordered:
484
+ return 0
485
+ middle = len(ordered) // 2
486
+ if len(ordered) % 2:
487
+ return ordered[middle]
488
+ return int((ordered[middle - 1] + ordered[middle]) / 2.0)
489
+
490
+
491
+ def mean(values):
492
+ """Return the integer mean of a flat sequence."""
493
+ values = require_flat_values(values)
494
+ if not values:
495
+ return 0
496
+ return int(sum(values) / len(values))
497
+
498
+
499
+ def max_value(values):
500
+ """Return the maximum value in a flat sequence."""
501
+ values = require_flat_values(values)
502
+ return 0 if not values else builtins.max(values)
503
+
504
+
505
+ def min_value(values):
506
+ """Return the minimum value in a flat sequence."""
507
+ values = require_flat_values(values)
508
+ return 0 if not values else builtins.min(values)
509
+
510
+
511
+ def quartile_rank(value, population):
512
+ """Rank value in the population's quartiles using statistical cut points."""
513
+ if isinstance(value, (list, tuple)):
514
+ raise TypeError("quartile rank requires a scalar value")
515
+ values = require_flat_values(population)
516
+ if not values:
517
+ return 1
518
+ if len(values) == 1:
519
+ return 1
520
+ quartiles = statistics.quantiles(values, n=4, method="inclusive")
521
+ if value <= quartiles[0]:
522
+ return 1
523
+ if value <= quartiles[1]:
524
+ return 2
525
+ if value <= quartiles[2]:
526
+ return 3
527
+ return 4
528
+
529
+
530
+ def evaluate_format(fmt, context, description):
531
+ try:
532
+ return eval(f"f{fmt!r}", {"__builtins__": {}}, context)
533
+ except Exception as exc:
534
+ raise ProcessingError(
535
+ f"Invalid {description} format string {fmt!r}: {exc}"
536
+ ) from None
537
+
538
+
539
+ class LineFormatter:
540
+ def __init__(self, fmt, color, color_domain="churn"):
541
+ self.fmt = fmt
542
+ self.color = color
543
+ self.color_domain = color_domain
544
+ self.explicit_color = "color(" in fmt
545
+ self.current_timestamp = 0
546
+ self.file_change_lifetimes = []
547
+ self.file_line_churns = []
548
+ self.file_line_ages = []
549
+ self.file_line_change_lifetimes = []
550
+ self.repo_line_churns = []
551
+ self.repo_line_ages = []
552
+ self.repo_line_change_lifetimes = []
553
+
554
+ def bind_repo(self, repo_line_churns, repo_line_ages, repo_line_change_lifetimes):
555
+ self.repo_line_churns = repo_line_churns
556
+ self.repo_line_ages = repo_line_ages
557
+ self.repo_line_change_lifetimes = repo_line_change_lifetimes
558
+
559
+ def bind_file(
560
+ self,
561
+ details,
562
+ current_timestamp,
563
+ repo_line_churns,
564
+ repo_line_ages,
565
+ repo_line_change_lifetimes,
566
+ ):
567
+ """Bind the current file and repo populations for line formatting."""
568
+ self.current_timestamp = current_timestamp
569
+ self.file_change_lifetimes = details.change_lifetimes
570
+ self.file_line_churns = [line.churn_count for line in details.lines]
571
+ self.file_line_ages = [current_timestamp - line.birth_timestamp for line in details.lines]
572
+ self.file_line_change_lifetimes = [list(line.change_lifetimes) for line in details.lines]
573
+ self.bind_repo(repo_line_churns, repo_line_ages, repo_line_change_lifetimes)
574
+
575
+ def default_quartile(self, line, age):
576
+ """Color a reconstructed line against the current file's populations."""
577
+ if self.color_domain == "age":
578
+ return quartile_rank(age, self.file_line_ages)
579
+ if self.color_domain == "lifetime":
580
+ return quartile_rank(
581
+ median(line.change_lifetimes),
582
+ list(map(median, self.file_line_change_lifetimes)),
583
+ )
584
+ return quartile_rank(line.churn_count, self.file_line_churns)
585
+
586
+ def format(self, line):
587
+ age = (
588
+ 0
589
+ if line.birth_timestamp is None
590
+ else int(self.current_timestamp - line.birth_timestamp)
591
+ )
592
+ context = {
593
+ "churn": line.churn_count,
594
+ "age": age,
595
+ "hash": line.birth_hash,
596
+ "change_lifetimes": line.change_lifetimes,
597
+ "lifetime_median": median(self.file_change_lifetimes),
598
+ "lifetime_mean": mean(self.file_change_lifetimes),
599
+ "birthtime": line.birth_timestamp,
600
+ "line": line.content.rstrip("\n"),
601
+ "file_line_churns": self.file_line_churns,
602
+ "file_line_ages": self.file_line_ages,
603
+ "file_line_change_lifetimes": self.file_line_change_lifetimes,
604
+ "repo_line_churns": self.repo_line_churns,
605
+ "repo_line_ages": self.repo_line_ages,
606
+ "repo_line_change_lifetimes": self.repo_line_change_lifetimes,
607
+ "days": days,
608
+ "isodate": isodate,
609
+ "max": max_value,
610
+ "min": min_value,
611
+ "median": median,
612
+ "mean": mean,
613
+ "quartile_rank": quartile_rank,
614
+ "color": self.color.color,
615
+ "color_reset": self.color.reset,
616
+ "list": list,
617
+ "map": map,
618
+ }
619
+ rendered = evaluate_format(self.fmt, context, "line output")
620
+ if self.explicit_color:
621
+ if self.color.use_color:
622
+ rendered += self.color.reset()
623
+ elif self.color.use_color:
624
+ rendered = self.color.wrap(rendered, self.default_quartile(line, age))
625
+ return rendered + "\n"
626
+
627
+
628
+ class FileFormatter:
629
+ def __init__(self, fmt, color, color_domain="churn"):
630
+ self.fmt = fmt
631
+ self.color = color
632
+ self.color_domain = color_domain
633
+ self.explicit_color = "color(" in fmt
634
+ self.repo_line_churns = []
635
+ self.repo_line_ages = []
636
+ self.repo_line_change_lifetimes = []
637
+
638
+ def bind_repo(self, repo_line_churns, repo_line_ages, repo_line_change_lifetimes):
639
+ self.repo_line_churns = repo_line_churns
640
+ self.repo_line_ages = repo_line_ages
641
+ self.repo_line_change_lifetimes = repo_line_change_lifetimes
642
+
643
+ def default_quartile(self, churns, change_lifetimes, ages):
644
+ """Color a file-metrics line against the repository's file populations."""
645
+ if self.color_domain == "age":
646
+ return quartile_rank(
647
+ median(ages),
648
+ list(map(median, self.repo_line_ages)),
649
+ )
650
+ if self.color_domain == "lifetime":
651
+ return quartile_rank(
652
+ median(list(map(median, change_lifetimes))),
653
+ [
654
+ median(list(map(median, file_change_lifetimes)))
655
+ for file_change_lifetimes in self.repo_line_change_lifetimes
656
+ ],
657
+ )
658
+ return quartile_rank(
659
+ max_value(churns),
660
+ list(map(max_value, self.repo_line_churns)),
661
+ )
662
+
663
+ def format(self, path, churns, change_lifetimes, ages):
664
+ context = {
665
+ "path": path,
666
+ "churn": churns,
667
+ "change_lifetime": change_lifetimes,
668
+ "changed_lifetime": change_lifetimes,
669
+ "line_age": ages,
670
+ "line_churns": churns,
671
+ "line_change_lifetimes": change_lifetimes,
672
+ "line_ages": ages,
673
+ "file_line_churns": churns,
674
+ "file_line_change_lifetimes": change_lifetimes,
675
+ "file_line_ages": ages,
676
+ "repo_line_churns": self.repo_line_churns,
677
+ "repo_line_ages": self.repo_line_ages,
678
+ "repo_line_change_lifetimes": self.repo_line_change_lifetimes,
679
+ "max": max_value,
680
+ "min": min_value,
681
+ "median": median,
682
+ "mean": mean,
683
+ "days": days,
684
+ "quartile_rank": quartile_rank,
685
+ "color": self.color.color,
686
+ "color_reset": self.color.reset,
687
+ "list": list,
688
+ "map": map,
689
+ }
690
+ rendered = evaluate_format(self.fmt, context, "file output")
691
+ if self.explicit_color:
692
+ if self.color.use_color:
693
+ rendered += self.color.reset()
694
+ elif self.color.use_color:
695
+ rendered = self.color.wrap(
696
+ rendered,
697
+ self.default_quartile(churns, change_lifetimes, ages),
698
+ )
699
+ return rendered
700
+
701
+ def get_paged_output(use_color=False):
702
+ """Return a stream that outputs to a color-supporting pager
703
+ as specified by Git configuration."""
704
+ use_pager = sys.stdout.isatty()
705
+
706
+ if not use_pager:
707
+ return sys.stdout, None
708
+
709
+ pager = subprocess.check_output(
710
+ ["git", "var", "GIT_PAGER"],
711
+ **utf8_surrogateescape_text(),
712
+ ).strip()
713
+
714
+ env = os.environ.copy()
715
+
716
+ if use_color:
717
+ # Ensure less(1) will pass-through color escapes
718
+ pager_cmd = shlex.split(pager)
719
+ pager_exe = pager_cmd[0]
720
+
721
+ if os.path.basename(pager_exe) == "less":
722
+ env["LESS"] = env.get("LESS", "") + " -R"
723
+
724
+ p = subprocess.Popen(
725
+ pager,
726
+ stdin=subprocess.PIPE,
727
+ env=env,
728
+ shell=True,
729
+ **utf8_surrogateescape_text(),
730
+ )
731
+
732
+ return p.stdin, p
733
+
734
+
735
+ class Processor:
736
+ def __init__(self, args):
737
+ self.args = args
738
+ self.git_hot_cli = not hasattr(args, "input_files")
739
+ self.pager_proc = None
740
+ self.git_hot_total_commits = None
741
+ self.git_hot_completed_commits = 0
742
+ self.git_hot_progress_active = False
743
+ if hasattr(args, "input_files"):
744
+ # lifetime.py CLI: Read the output of difflog.sh.
745
+ self.reader = InputReader.from_paths(args.input_files)
746
+ # Other processing specific to lifetime.py CLI
747
+ self.out = sys.stderr if args.redirect_output else sys.stdout
748
+ else:
749
+ # git-hot CLI: Invoke Git commands to obtain input.
750
+ # With path specified report churn for that file.
751
+ # Otherwise report metrics for all files and, optionally,
752
+ # report in the specified directory churn for all files.
753
+ self.reader = InputReader.from_iterator(
754
+ self.stream_git_history(args.path))
755
+ self.args.file_metrics = args.path is None
756
+ self.args.growth_file = None
757
+ self.args.compressed = False
758
+ self.args.source_only = False
759
+ self.args.line_details = False
760
+ self.args.tokens = False
761
+ self.args.delta = False
762
+ self.args.json_metrics = False
763
+ self.args.end_hash = False
764
+
765
+ self.color = Color(args)
766
+ if self.git_hot_cli:
767
+ self.out, self.pager_proc = get_paged_output(self.color.use_color)
768
+ else:
769
+ self.out = sys.stderr if args.redirect_output else sys.stdout
770
+ self.line_formatter = LineFormatter(
771
+ self.args.output_format
772
+ or ("{churn:>{5}d} {line}"
773
+ if self.args.churn_dir or self.selected_file_details_mode()
774
+ else "{line}")
775
+ , self.color, getattr(self.args, "color_domain", "churn"))
776
+
777
+ self.file_formatter = FileFormatter(self.args.output_format
778
+ or "{max(churn):5d} {days(median(changed_lifetime)):5d} "
779
+ "{days(median(line_age)):5d} {path}",
780
+ self.color,
781
+ getattr(self.args, "color_domain", "churn"),
782
+ )
783
+
784
+ self.growth_file = None
785
+
786
+ self.loc = 0
787
+ self.prev_loc = 0
788
+
789
+ # Reconstruct the repository contents from its log -D R
790
+ self.debug_reconstruction = self.debug_option("R")
791
+ self.debug_print_reconstruction = self.debug_printer(self.debug_reconstruction)
792
+ # Show results of splicing operations -D S
793
+ self.debug_splice = self.debug_option("S")
794
+ self.debug_print_splice = self.debug_printer(self.debug_splice)
795
+ # Show each commit SHA, timestamp header -D H
796
+ self.debug_commit_header = self.debug_option("H")
797
+ self.debug_print_commit_header = self.debug_printer(self.debug_commit_header)
798
+ # Show diff headers -D D
799
+ self.debug_diff_header = self.debug_option("D")
800
+ self.debug_print_diff_header = self.debug_printer(self.debug_diff_header)
801
+ # Show diff extended headers -D E
802
+ self.debug_diff_extended = self.debug_option("E")
803
+ self.debug_print_diff_extended = self.debug_printer(self.debug_diff_extended)
804
+ # Show range headers -D @
805
+ self.debug_range_header = self.debug_option("@")
806
+ self.debug_print_range_header = self.debug_printer(self.debug_range_header)
807
+ # Show commit set changes -D C
808
+ self.debug_commit_changes = self.debug_option("C")
809
+ self.debug_print_commit_changes = self.debug_printer(self.debug_commit_changes)
810
+ # Show push to change set operations -D P
811
+ self.debug_push_cc = self.debug_option("P")
812
+ self.debug_print_push_cc = self.debug_printer(self.debug_push_cc)
813
+
814
+ # Show LoC change processing -D L
815
+ self.debug_loc = self.debug_option("L")
816
+ self.debug_print_loc = self.debug_printer(self.debug_loc)
817
+
818
+ # Show Git invocations -D g
819
+ self.debug_git = self.debug_option("g")
820
+ self.debug_print_git = self.debug_printer(self.debug_git)
821
+
822
+ # Old and new changed files
823
+ self.old = None
824
+ self.new = None
825
+ # One of inplace, copy, rename, del
826
+ self.op = None
827
+
828
+ # Details of current commit
829
+ self.commit = None
830
+ self.hash = None
831
+ self.timestamp = None
832
+
833
+ # File line timestamps (or contents when debugging through reconstruction)
834
+ self.flt = {}
835
+
836
+ # Commit changes. To preserve the isolation between changes performed
837
+ # during a commit, all changes are recorded here and then atomically
838
+ # committed at the end.
839
+ # Each record has:
840
+ # op {set, del}
841
+ # path
842
+ # lines
843
+ self.cc = []
844
+
845
+ # Records of deleted lines
846
+ # Output at the end of a commit in order to report
847
+ # commit size, if needed
848
+ self.delete_records = []
849
+
850
+ # Number of lines added to new file
851
+ self.added_lines = 0
852
+ # Number of lines removed from old and new file
853
+ self.removed_lines = 0
854
+ # Reference to copy of the old and new file contents
855
+ self.oref = None
856
+ self.nref = None
857
+ self.oref_change_lifetimes = None
858
+ self.nref_change_lifetimes = None
859
+ self.current_line = None
860
+
861
+ def debug_option(self, opt):
862
+ return debug_option(self.args.debug_options, opt)
863
+
864
+ def debug_printer(self, enabled):
865
+ return self.print_out if enabled else self.noop_print_out
866
+
867
+ def print_out(self, text, end="\n"):
868
+ print(text, end=end, file=self.out)
869
+
870
+ def noop_print_out(self, text, end="\n"):
871
+ pass
872
+
873
+ def report_progress(self):
874
+ if self.args.quiet or self.debug_reconstruction:
875
+ return
876
+ if self.git_hot_cli and self.git_hot_total_commits:
877
+ self.git_hot_completed_commits += 1
878
+ percent = int((self.git_hot_completed_commits * 100) / self.git_hot_total_commits)
879
+ message = (
880
+ f"\rProcessing commits: {percent:3d}% "
881
+ f"({self.git_hot_completed_commits}/{self.git_hot_total_commits})"
882
+ )
883
+ print(message, end="", file=sys.stderr, flush=True)
884
+ self.git_hot_progress_active = True
885
+ return
886
+ print(f"commit {self.hash} {self.timestamp}", file=sys.stderr)
887
+
888
+ def report_progress_done(self):
889
+ """Finish the progress reporting output."""
890
+ if self.git_hot_progress_active:
891
+ print(", done.", file=sys.stderr, flush=True)
892
+ self.git_hot_progress_active = False
893
+
894
+ def checked_command_output(self, args):
895
+ """Return command stdout, raising ProcessingError with stderr on failure."""
896
+ self.debug_print_git(f"Run: {' '.join(args)}")
897
+ completed = subprocess.run(
898
+ args,
899
+ stdout=subprocess.PIPE,
900
+ stderr=subprocess.PIPE,
901
+ check=False,
902
+ **utf8_surrogateescape_text(),
903
+ )
904
+ if completed.returncode != 0:
905
+ raise ProcessingError(completed.stderr.rstrip() or f"Command failed: {' '.join(args)}")
906
+ return completed.stdout
907
+
908
+ def run(self):
909
+ try:
910
+ if self.args.growth_file:
911
+ self.growth_file = open(
912
+ self.args.growth_file,
913
+ "w",
914
+ encoding="utf-8",
915
+ errors="surrogateescape",
916
+ newline="",
917
+ )
918
+
919
+ state = "commit"
920
+ self.current_line = self.reader.read_chomp()
921
+ if self.current_line is None:
922
+ return
923
+
924
+ while True:
925
+ if state == "commit":
926
+ state = self.process_commit_state()
927
+ elif state == "diff":
928
+ state = self.process_diff_state()
929
+ elif state == "range":
930
+ state = self.process_range_state()
931
+ elif state == "EOF":
932
+ break
933
+ else:
934
+ self.bail_out(f"Invalid state {state}")
935
+
936
+ self.process_last_commit()
937
+ if self.json_metrics_mode():
938
+ self.dump_json_metrics()
939
+ elif self.debug_reconstruction or self.args.churn_dir:
940
+ self.reconstruct()
941
+ elif self.dump_selected_file_details():
942
+ pass
943
+ elif self.args.file_metrics:
944
+ self.dump_file_metrics()
945
+ else:
946
+ self.dump_alive()
947
+ finally:
948
+ self.reader.close()
949
+ if self.growth_file is not None:
950
+ self.growth_file.close()
951
+ self.report_progress_done()
952
+ if self.pager_proc:
953
+ self.out.close()
954
+ self.pager_proc.wait()
955
+
956
+ def file_commits(self, file: str) -> Dict[str, str]:
957
+ """Return a dictionary from commit SHAs to the corresponding file name."""
958
+
959
+ args = [
960
+ "git",
961
+ "log",
962
+ "-C", "-C", "-M", "-M",
963
+ "--name-only",
964
+ "--pretty=format:%H",
965
+ "--follow",
966
+ "--",
967
+ file,
968
+ ]
969
+ sha_to_file: Dict[str, str] = {}
970
+ line_number = 0
971
+ for line in self.checked_command_output(args).splitlines():
972
+ line = line.strip()
973
+ if line_number % 3 == 0: # SHA record
974
+ sha = line
975
+ elif line_number % 3 == 1: # file name
976
+ sha_to_file[sha] = line
977
+ line_number += 1
978
+
979
+ return sha_to_file
980
+
981
+ def stream_git_history(self, file: str=None) -> Iterator[str]:
982
+ """
983
+ Yields lines from `git show` / `git diff` for commits touching `file`,
984
+ in topo order (daglp assumed installed), taking into account renames.
985
+ """
986
+
987
+ # Obtaim map for this file's commits to the corresponding file name.
988
+ # The file name may differ due to renames.
989
+ if file:
990
+ sha_to_file = self.file_commits(file)
991
+
992
+ # Create the longest path through all the repo's commits.
993
+ # git-log | daglp
994
+
995
+ log_output = self.checked_command_output(
996
+ ["git", "log", "--topo-order", "--pretty=format:%H %at %P"]
997
+ )
998
+ self.debug_print_git("Run: daglp")
999
+ daglp = subprocess.run(
1000
+ ["daglp"],
1001
+ input=log_output,
1002
+ stdout=subprocess.PIPE,
1003
+ stderr=subprocess.PIPE,
1004
+ check=False,
1005
+ **utf8_surrogateescape_text(),
1006
+ )
1007
+ if daglp.returncode != 0:
1008
+ raise ProcessingError(daglp.stderr.rstrip() or "daglp failed")
1009
+
1010
+ commit_path = []
1011
+ for line in daglp.stdout.splitlines():
1012
+ parts = line.strip().split()
1013
+ sha, ts = parts[0], parts[1]
1014
+ file_name = sha_to_file.get(sha, None) if file else None
1015
+ if file and not file_name:
1016
+ continue
1017
+ commit_path.append((sha, ts, file_name))
1018
+ self.git_hot_total_commits = len(commit_path)
1019
+
1020
+ prev_sha = None
1021
+ prev_file_name = None
1022
+
1023
+ for sha, ts, file_name in commit_path:
1024
+
1025
+ # Get the diff for this commit
1026
+ if prev_sha is None:
1027
+ # --- first commit ---
1028
+ args = [
1029
+ "git",
1030
+ "show",
1031
+ "--pretty=tformat:commit %H %at",
1032
+ "--topo-order",
1033
+ "--reverse",
1034
+ "-U0",
1035
+ sha,
1036
+ "--",
1037
+ ]
1038
+ if file:
1039
+ args += file_name
1040
+ else:
1041
+ # No --pretty commit header here, so construct it manually.
1042
+ yield f"commit {sha} {ts}\n"
1043
+ yield "\n"
1044
+
1045
+ # --- diff with prev_sha ---
1046
+ args = [
1047
+ "git",
1048
+ "-c", "diff.renameLimit=30000",
1049
+ "diff",
1050
+ "-m", "-M", "-C", "-U0",
1051
+ f"{prev_sha}..{sha}",
1052
+ "--",
1053
+ ]
1054
+ if file:
1055
+ args += [file_name, prev_file_name]
1056
+ diff = subprocess.Popen(
1057
+ args,
1058
+ stdout=subprocess.PIPE,
1059
+ **utf8_surrogateescape_text(),
1060
+ )
1061
+ self.debug_print_git(f"Run: {' '.join(args)}")
1062
+
1063
+ # --- stream output ---
1064
+ for out_line in diff.stdout:
1065
+ self.debug_print_git(f"Line {self.reader.line_number}: {out_line}", end="")
1066
+ yield out_line
1067
+
1068
+ diff.wait()
1069
+ prev_sha = sha
1070
+ if file:
1071
+ prev_file_name = file_name
1072
+
1073
+ def process_commit_state(self):
1074
+ if self.hash is not None:
1075
+ self.process_last_commit()
1076
+ fields = self.current_line.split()
1077
+ if len(fields) < 3 or fields[0] != "commit":
1078
+ self.bail_out("Expecting commit")
1079
+ self.commit, self.hash, self.timestamp = fields[0], fields[1], fields[2]
1080
+ if self.args.compressed:
1081
+ self.print_out(f"commit {self.hash} {self.timestamp}")
1082
+ else:
1083
+ self.debug_print_commit_header(f"commit {self.hash} {self.timestamp}")
1084
+ self.report_progress()
1085
+
1086
+ # Separator
1087
+ line = self.reader.read_raw()
1088
+ if line is None:
1089
+ return "EOF"
1090
+ if re.match(r"^$", line):
1091
+ line = self.reader.read_raw()
1092
+ if line is None:
1093
+ return "EOF"
1094
+ if line.startswith("diff "):
1095
+ self.current_line = chomp(line)
1096
+ return "diff"
1097
+ if line.startswith("commit "):
1098
+ # This happens on an empty commit with git diff
1099
+ self.current_line = chomp(line)
1100
+ return "commit"
1101
+ self.current_line = chomp(line)
1102
+ self.bail_out("Expecting diff, commit, or EOF")
1103
+ if line.startswith("commit "):
1104
+ # This happens on an empty commit
1105
+ self.current_line = chomp(line)
1106
+ return "commit"
1107
+ self.current_line = chomp(line)
1108
+ self.bail_out("Expecting an empty line or commit")
1109
+ return "EOF"
1110
+
1111
+ def process_diff_state(self):
1112
+ # Diff header
1113
+ line = hide_escaped_quotes(self.current_line)
1114
+ match = (
1115
+ re.match(r"^diff --git a/([^ ]*) b/(.*)", line)
1116
+ or re.match(r'^diff --git "a/((?:[^"\\]|\\.)*)" "b/((?:[^"\\]|\\.)*)"', line)
1117
+ or re.match(r'^diff --git a/([^ ]*) "b/((?:[^"\\]|\\.)*)"', line)
1118
+ or re.match(r'^diff --git "a/((?:[^"\\]|\\.)*)" b/(.*)', line)
1119
+ or re.match(r"^diff --git a/(.*) b/(.*)", line)
1120
+ )
1121
+ if not match:
1122
+ self.bail_out("Expecting a diff command")
1123
+ self.old = match.group(1)
1124
+ self.new = match.group(2)
1125
+ if '"' in line:
1126
+ self.old = unescape(self.old)
1127
+ self.new = unescape(self.new)
1128
+
1129
+ self.debug_print_diff_header(self.current_line)
1130
+ self.debug_print_diff_header(f"old=[{self.old}] new=[{self.new}]")
1131
+
1132
+ old_file = self.flt.get(self.old)
1133
+ new_file = self.flt.get(self.new)
1134
+ self.oref = [line.copy() for line in old_file.lines] if old_file is not None else []
1135
+ self.oref_change_lifetimes = list(old_file.change_lifetimes) if old_file is not None else []
1136
+ if self.old == self.new:
1137
+ self.nref = self.oref
1138
+ self.nref_change_lifetimes = self.oref_change_lifetimes
1139
+ elif new_file is not None:
1140
+ self.nref = [line.copy() for line in new_file.lines]
1141
+ self.nref_change_lifetimes = list(new_file.change_lifetimes)
1142
+ else:
1143
+ self.nref = []
1144
+ self.nref_change_lifetimes = []
1145
+
1146
+ state = "EOF"
1147
+ # Read the "extended header lines" to handle copies and renames
1148
+ from_path = None
1149
+ self.op = "inplace"
1150
+ while True:
1151
+ raw = self.reader.read_raw()
1152
+ if raw is None:
1153
+ return state
1154
+ self.debug_print_diff_extended("diff extended header: " + raw, end="")
1155
+ line = chomp(raw)
1156
+ if line.startswith("--- "):
1157
+ # Start of a file difference
1158
+ # --- a/main.c
1159
+
1160
+ # +++ b/main.c
1161
+ self.reader.read_raw()
1162
+
1163
+ # Range
1164
+ self.current_line = self.reader.read_chomp()
1165
+ state = "range"
1166
+ self.added_lines = 0
1167
+ self.removed_lines = 0
1168
+ return state
1169
+ match = re.match(r"^(copy|rename) from (.*)", line)
1170
+ if match:
1171
+ from_path = unquote_unescape(match.group(2))
1172
+ continue
1173
+ match = re.match(r"^rename to (.*)", line)
1174
+ if match:
1175
+ to_path = unquote_unescape(match.group(1))
1176
+ self.op = "rename"
1177
+ if from_path is None:
1178
+ self.bail_out("Missing rename from")
1179
+ source = self.flt.get(from_path, FileDetails(from_path))
1180
+ self.cc.append({"op": "del", "path": from_path})
1181
+ self.cc.append(
1182
+ {
1183
+ "op": "set",
1184
+ "path": to_path,
1185
+ "lines": [line.copy() for line in source.lines],
1186
+ "binary": source.binary,
1187
+ "change_lifetimes": list(source.change_lifetimes),
1188
+ }
1189
+ )
1190
+ old_details = self.flt.get(self.old, FileDetails(self.old))
1191
+ self.oref = [line.copy() for line in old_details.lines]
1192
+ self.oref_change_lifetimes = list(
1193
+ old_details.change_lifetimes
1194
+ )
1195
+ self.nref = self.oref
1196
+ self.nref_change_lifetimes = self.oref_change_lifetimes
1197
+ continue
1198
+ match = re.match(r"^copy to (.*)", line)
1199
+ if match:
1200
+ to_path = unquote_unescape(match.group(1))
1201
+ self.op = "copy"
1202
+ if from_path is None:
1203
+ self.bail_out("Missing copy from")
1204
+ source = self.flt.get(from_path, FileDetails(from_path))
1205
+ self.cc.append(
1206
+ {
1207
+ "op": "set",
1208
+ "path": to_path,
1209
+ "lines": [line.copy() for line in source.lines],
1210
+ "binary": source.binary,
1211
+ "change_lifetimes": [],
1212
+ }
1213
+ )
1214
+ if self.args.growth_file and self.output_source_code(to_path):
1215
+ self.loc += len(source.lines)
1216
+ old_details = self.flt.get(self.old, FileDetails(self.old))
1217
+ self.nref = [line.copy() for line in old_details.lines]
1218
+ self.nref_change_lifetimes = []
1219
+ continue
1220
+ if line.startswith("commit "):
1221
+ self.current_line = line
1222
+ return "commit"
1223
+ if line.startswith("diff --git "):
1224
+ self.current_line = line
1225
+ return "diff"
1226
+ if line.startswith("new file mode "):
1227
+ self.cc.append(
1228
+ {
1229
+ "op": "set",
1230
+ "path": self.old,
1231
+ "lines": [],
1232
+ "binary": False,
1233
+ "change_lifetimes": [],
1234
+ }
1235
+ )
1236
+ continue
1237
+ if line.startswith("deleted file mode "):
1238
+ self.op = "del"
1239
+ self.cc.append({"op": "del", "path": self.old})
1240
+ # Print death times of deleted file's lines
1241
+ if (
1242
+ not self.debug_reconstruction
1243
+ and not self.args.churn_dir
1244
+ and not self.json_metrics_mode()
1245
+ and self.output_source_code(self.old)
1246
+ ):
1247
+ for line_record in self.flt.get(self.old, FileDetails(self.old)).lines:
1248
+ if self.args.compressed:
1249
+ self.print_out(line_record.render_record(self.args))
1250
+ else:
1251
+ self.delete_records.append(
1252
+ line_record.render_deleted(self.args, self.timestamp)
1253
+ )
1254
+ continue
1255
+ if re.match(r"^Binary files ([^ ]*) and ([^ ]*) differ", line):
1256
+ current = self.flt.get(self.old)
1257
+ if current is None:
1258
+ current = FileDetails(self.old)
1259
+ self.flt[self.old] = current
1260
+ current.binary = True
1261
+ raw = self.reader.read_raw()
1262
+ if raw is None:
1263
+ return "EOF"
1264
+ if raw.startswith("commit "):
1265
+ self.current_line = chomp(raw)
1266
+ return "commit"
1267
+ if raw.startswith("diff --git "):
1268
+ self.current_line = chomp(raw)
1269
+ return "diff"
1270
+ self.current_line = chomp(raw)
1271
+ self.bail_out("Expected diff, commit, or EOF")
1272
+ return state
1273
+
1274
+ def process_range_state(self):
1275
+ # Ranges within files
1276
+ self.debug_print_range_header(self.current_line)
1277
+ fields = self.current_line.split()
1278
+ if len(fields) < 3:
1279
+ self.bail_out("Expecting a diff range")
1280
+ at1, old_range, new_range = fields[0], fields[1], fields[2]
1281
+ at2 = fields[3] if len(fields) > 3 else None
1282
+ if at1 != "@@" or at2 != "@@":
1283
+ self.bail_out("Expecting a diff range")
1284
+ try:
1285
+ old_start, old_end = range_parse(old_range)
1286
+ new_start, new_end = range_parse(new_range)
1287
+ except ProcessingError:
1288
+ self.bail_out("Expecting a diff range")
1289
+
1290
+ line = self.reader.read_raw()
1291
+ new_offset = self.added_lines - self.removed_lines
1292
+ if self.oref is self.nref:
1293
+ old_offset = new_offset
1294
+ else:
1295
+ old_offset = -self.removed_lines
1296
+ old_file = self.flt.get(self.old)
1297
+ binary = old_file.binary if old_file is not None else False
1298
+ output = self.output_source_code(self.old)
1299
+ deleted_lines = []
1300
+ for i in range(old_start, old_end):
1301
+ if binary:
1302
+ line = self.reader.read_raw()
1303
+ continue
1304
+ if line is None or not line.startswith("-"):
1305
+ self.current_line = chomp(line) if line is not None else None
1306
+ self.bail_out("Expecting a removed line")
1307
+ if output:
1308
+ self.loc -= 1
1309
+ pos = i + old_offset
1310
+ if 0 <= pos < len(self.oref):
1311
+ deleted_line = self.oref[pos]
1312
+ if self.debug_reconstruction:
1313
+ # Verify that the -removed line matches the previous +recorded one.
1314
+ if deleted_line.content != line[1:]:
1315
+ self.bail_out(f"Expecting at({i} + {old_offset}) {deleted_line.content}")
1316
+ elif output and not self.json_metrics_mode():
1317
+ if self.args.compressed:
1318
+ self.print_out(deleted_line.render_record(self.args))
1319
+ else:
1320
+ self.delete_records.append(
1321
+ deleted_line.render_deleted(self.args, self.timestamp)
1322
+ )
1323
+ deleted_lines.append(deleted_line.copy())
1324
+ self.oref_change_lifetimes.append(
1325
+ int(self.timestamp) - deleted_line.birth_timestamp
1326
+ )
1327
+ else:
1328
+ warning = (
1329
+ f"Warning: {self.hash} line {self.reader.line_number} "
1330
+ f"unencountered line {self.old}:{i + 1}"
1331
+ )
1332
+ print(
1333
+ warning,
1334
+ file=sys.stderr,
1335
+ )
1336
+ line = self.reader.read_raw()
1337
+ remove_len = old_end - old_start
1338
+ self.debug_print_splice(f"before oref={len(self.oref) - 1} ns={old_start} len={remove_len}")
1339
+ if not binary and remove_len != 0:
1340
+ del self.oref[old_start + old_offset : old_start + old_offset + remove_len]
1341
+ if self.oref is not self.nref:
1342
+ del self.nref[old_start + new_offset : old_start + new_offset + remove_len]
1343
+ self.debug_print_splice(f"after oref={len(self.oref) - 1}")
1344
+ if line is not None and line.startswith("\"):
1345
+ line = self.reader.read_raw()
1346
+ add = []
1347
+ line_count = 0
1348
+ equal_length_change = old_end - old_start == new_end - new_start
1349
+ for i in range(new_start, new_end):
1350
+ if line is None or not line.startswith("+"):
1351
+ self.current_line = chomp(line) if line is not None else None
1352
+ self.bail_out("Expecting an added line")
1353
+ if equal_length_change and line_count < len(deleted_lines):
1354
+ prior_line = deleted_lines[line_count]
1355
+ churn_count = prior_line.churn_count + 1
1356
+ change_lifetimes = list(prior_line.change_lifetimes)
1357
+ change_lifetimes.append(int(self.timestamp) - prior_line.birth_timestamp)
1358
+ if self.json_metrics_mode():
1359
+ content_history = prior_line.content_history + [line[1:]]
1360
+ else:
1361
+ content_history = None
1362
+ else:
1363
+ churn_count = 0
1364
+ change_lifetimes = []
1365
+ content_history = None
1366
+ new_line = LineDetails(
1367
+ content=line[1:],
1368
+ birth_timestamp=int(self.timestamp),
1369
+ birth_hash=self.hash,
1370
+ content_history=content_history,
1371
+ churn_count=churn_count,
1372
+ change_lifetimes=change_lifetimes,
1373
+ )
1374
+ if self.args.line_details:
1375
+ counts = line_details(line[1:])
1376
+ new_line.length = counts.length
1377
+ new_line.startspace = counts.startspace
1378
+ new_line.string = counts.string
1379
+ new_line.comment = counts.comment
1380
+ new_line.comma = counts.comma
1381
+ new_line.bracket = counts.bracket
1382
+ new_line.access = counts.access
1383
+ new_line.assignment = counts.assignment
1384
+ new_line.scope = counts.scope
1385
+ new_line.array = counts.array
1386
+ new_line.logical = counts.logical
1387
+ add.append(new_line)
1388
+ if not binary and output:
1389
+ self.loc += 1
1390
+ line_count += 1
1391
+ line = self.reader.read_raw()
1392
+ add_len = new_end - new_start
1393
+ self.debug_print_splice(f"before nref={len(self.nref) - 1} ns={new_start} len={add_len}")
1394
+ if not binary and add_len > 0:
1395
+ self.nref[new_start:new_start] = add
1396
+ self.added_lines += add_len
1397
+ self.removed_lines += remove_len
1398
+ self.debug_print_splice(f"after nref={len(self.nref) - 1}")
1399
+ if line is not None and line.startswith("\"):
1400
+ line = self.reader.read_raw()
1401
+ if line is None:
1402
+ self.push_to_cc()
1403
+ return "EOF"
1404
+ if line.startswith("@@ "):
1405
+ self.current_line = chomp(line)
1406
+ return "range"
1407
+ if line.startswith("diff --git "):
1408
+ self.current_line = chomp(line)
1409
+ self.push_to_cc()
1410
+ return "diff"
1411
+ if line.startswith("commit "):
1412
+ self.current_line = chomp(line)
1413
+ self.push_to_cc()
1414
+ return "commit"
1415
+ self.current_line = chomp(line)
1416
+ self.bail_out("Expected diff, @@, commit, or EOF")
1417
+ return "EOF"
1418
+
1419
+ # Write the commit's effect on the project's LOC value
1420
+ def process_last_commit(self):
1421
+ if self.hash is None:
1422
+ return
1423
+ delta = self.loc - self.prev_loc
1424
+
1425
+ self.debug_print_loc(f"prev_loc={self.prev_loc} loc={self.loc} delta={delta}")
1426
+
1427
+ # Print records of deleted lines
1428
+ eol = f" {delta}\n" if self.args.delta else "\n"
1429
+ if (
1430
+ not self.args.file_metrics
1431
+ and not self.selected_file_details_mode()
1432
+ and not self.json_metrics_mode()
1433
+ ):
1434
+ for record in self.delete_records:
1435
+ print(record, end=eol, file=self.out)
1436
+ self.delete_records = []
1437
+
1438
+ self.commit_changes()
1439
+ if self.growth_file is not None:
1440
+ print(f"{self.timestamp} {self.loc}", file=self.growth_file)
1441
+ self.prev_loc = self.loc
1442
+
1443
+ # Reconstruct the state of the Git tree based on the log
1444
+ def reconstruct(self):
1445
+ base_dir = self.args.churn_dir or "RECONSTRUCTION"
1446
+ shutil.rmtree(base_dir, ignore_errors=True)
1447
+ for path, details in self.flt.items():
1448
+ if path == "/dev/null":
1449
+ continue
1450
+ if details is None:
1451
+ continue
1452
+ full_path = os.path.join(base_dir, *path.split("/"))
1453
+ directory = os.path.dirname(full_path)
1454
+ if directory:
1455
+ os.makedirs(directory, exist_ok=True)
1456
+ with open(
1457
+ full_path,
1458
+ "w",
1459
+ encoding="utf-8",
1460
+ errors="surrogateescape",
1461
+ newline="",
1462
+ ) as out:
1463
+ self.write_reconstructed_lines(out, details)
1464
+
1465
+ def write_reconstructed_lines(self, out, details):
1466
+ current_timestamp = int(self.timestamp) if self.timestamp is not None else 0
1467
+ (
1468
+ repo_line_churns,
1469
+ repo_line_ages,
1470
+ repo_line_change_lifetimes,
1471
+ ) = self.repo_line_populations(current_timestamp)
1472
+ self.line_formatter.bind_file(
1473
+ details,
1474
+ current_timestamp,
1475
+ repo_line_churns,
1476
+ repo_line_ages,
1477
+ repo_line_change_lifetimes,
1478
+ )
1479
+ for line in details.lines:
1480
+ out.write(self.line_formatter.format(line))
1481
+
1482
+ def dump_selected_file_details(self):
1483
+ """Write the reconstructed contents of a selected git-hot path to stdout."""
1484
+ if not self.selected_file_details_mode():
1485
+ return False
1486
+ for path, details in self.flt.items():
1487
+ if path == "/dev/null" or details is None:
1488
+ continue
1489
+ self.write_reconstructed_lines(self.out, details)
1490
+ return True
1491
+
1492
+ def dump_alive(self):
1493
+ """Print birth timestamps of files that are still alive."""
1494
+ if self.args.compressed:
1495
+ self.print_out("END")
1496
+ eol = "\n"
1497
+ else:
1498
+ eol = " alive NA\n"
1499
+
1500
+ # For each file
1501
+ for path, details in self.flt.items():
1502
+ if path == "/dev/null":
1503
+ continue
1504
+ if details is None:
1505
+ continue
1506
+ if not self.output_source_code(path):
1507
+ continue
1508
+ for line in details.lines:
1509
+ print(line.render_record(self.args), end=eol, file=self.out)
1510
+
1511
+ def dump_file_metrics(self):
1512
+ current_timestamp = int(self.timestamp)
1513
+ (
1514
+ repo_line_churns,
1515
+ repo_line_ages,
1516
+ repo_line_change_lifetimes,
1517
+ ) = self.repo_line_populations(current_timestamp)
1518
+ self.file_formatter.bind_repo(repo_line_churns, repo_line_ages, repo_line_change_lifetimes)
1519
+ for path in sorted(self.flt):
1520
+ if path == "/dev/null":
1521
+ continue
1522
+ details = self.flt[path]
1523
+ if details is None:
1524
+ continue
1525
+ if not self.output_source_code(path):
1526
+ continue
1527
+ churns = [line.churn_count for line in details.lines]
1528
+ change_lifetimes = list(details.change_lifetimes)
1529
+ ages = [current_timestamp - line.birth_timestamp for line in details.lines]
1530
+ print(
1531
+ self.file_formatter.format(path, churns, change_lifetimes, ages),
1532
+ file=self.out,
1533
+ )
1534
+
1535
+ def dump_json_metrics(self):
1536
+ """Write all collected file and line metrics as JSON."""
1537
+ current_timestamp = int(self.timestamp) if self.timestamp is not None else 0
1538
+ files = []
1539
+ for path in sorted(self.flt):
1540
+ if path == "/dev/null":
1541
+ continue
1542
+ details = self.flt[path]
1543
+ if details is None:
1544
+ continue
1545
+ if not self.output_source_code(path):
1546
+ continue
1547
+ files.append(self.json_file_metrics(path, details, current_timestamp))
1548
+ json.dump(
1549
+ {
1550
+ "commit": self.hash,
1551
+ "timestamp": current_timestamp,
1552
+ "files": files,
1553
+ },
1554
+ self.out,
1555
+ ensure_ascii=False,
1556
+ indent=2,
1557
+ )
1558
+ print(file=self.out)
1559
+
1560
+ def json_file_metrics(self, path, details, current_timestamp):
1561
+ """Return JSON-serializable metrics for a single tracked file."""
1562
+ return {
1563
+ "path": path,
1564
+ "binary": details.binary,
1565
+ "change_lifetimes": list(details.change_lifetimes),
1566
+ "lines": [
1567
+ self.json_line_metrics(index, line, current_timestamp)
1568
+ for index, line in enumerate(details.lines, start=1)
1569
+ ],
1570
+ }
1571
+
1572
+ def json_line_metrics(self, line_number, line, current_timestamp):
1573
+ """Return JSON-serializable metrics for a single tracked line."""
1574
+ return {
1575
+ "line_number": line_number,
1576
+ "content": line.content,
1577
+ "contents": list(line.content_history),
1578
+ "birth_timestamp": line.birth_timestamp,
1579
+ "birth_hash": line.birth_hash,
1580
+ "age": current_timestamp - line.birth_timestamp,
1581
+ "churn": line.churn_count,
1582
+ "change_lifetimes": list(line.change_lifetimes),
1583
+ "delta": line.delta,
1584
+ "length": line.length,
1585
+ "startspace": line.startspace,
1586
+ "string": line.string,
1587
+ "comment": line.comment,
1588
+ "comma": line.comma,
1589
+ "bracket": line.bracket,
1590
+ "access": line.access,
1591
+ "assignment": line.assignment,
1592
+ "scope": line.scope,
1593
+ "array": line.array,
1594
+ "logical": line.logical,
1595
+ }
1596
+
1597
+ def repo_line_populations(self, current_timestamp):
1598
+ repo_line_churns = []
1599
+ repo_line_ages = []
1600
+ repo_line_change_lifetimes = []
1601
+ for path, details in self.flt.items():
1602
+ if path == "/dev/null" or details is None:
1603
+ continue
1604
+ repo_line_churns.append([line.churn_count for line in details.lines])
1605
+ repo_line_ages.append(
1606
+ [current_timestamp - line.birth_timestamp for line in details.lines]
1607
+ )
1608
+ repo_line_change_lifetimes.append(
1609
+ [list(line.change_lifetimes) for line in details.lines]
1610
+ )
1611
+ return repo_line_churns, repo_line_ages, repo_line_change_lifetimes
1612
+
1613
+ def bail_out(self, expect):
1614
+ context = self.current_line
1615
+ if context is None:
1616
+ context = "EOF"
1617
+ raise ProcessingError(
1618
+ f"commit {self.hash} {self.timestamp}; line {self.reader.line_number}: "
1619
+ f"unexpected {context} ({expect})"
1620
+ )
1621
+
1622
+ # Commit the commit changes recorded in @cc
1623
+ def commit_changes(self):
1624
+ for rec in self.cc:
1625
+ self.debug_print_commit_changes(f"Change ({rec['op']}) {rec['path']}")
1626
+ if rec["op"] == "set":
1627
+ lines = rec["lines"]
1628
+ # Mark lines coming from commits with the commit's size
1629
+ if self.args.delta:
1630
+ delta = self.loc - self.prev_loc
1631
+ for line in lines:
1632
+ line.delta = delta
1633
+ existing_lifetimes = self.flt.get(
1634
+ rec["path"],
1635
+ FileDetails(rec["path"]),
1636
+ ).change_lifetimes
1637
+ self.flt[rec["path"]] = FileDetails(
1638
+ rec["path"],
1639
+ lines,
1640
+ rec.get("binary", False),
1641
+ rec.get("change_lifetimes", existing_lifetimes),
1642
+ )
1643
+ elif rec["op"] == "del":
1644
+ self.flt.pop(rec["path"], None)
1645
+ else:
1646
+ self.bail_out(f"Unknown change record {rec['op']}")
1647
+ self.cc = []
1648
+
1649
+ # Check if used has specified to stop at this commit.
1650
+ if self.args.end_hash is not None and self.args.end_hash == self.hash:
1651
+ self.reconstruct()
1652
+ raise SystemExit(0)
1653
+
1654
+ # Push the old and new references to the change set
1655
+ def push_to_cc(self):
1656
+ self.debug_print_push_cc(f"op={self.op} {self.old} {self.new}")
1657
+ if self.op == "del":
1658
+ return
1659
+ old_binary = self.flt.get(self.old).binary if self.old in self.flt else False
1660
+ new_binary = self.flt.get(self.new).binary if self.new in self.flt else old_binary
1661
+ if self.oref is not self.nref and self.op != "copy":
1662
+ self.cc.append(
1663
+ {
1664
+ "op": "set",
1665
+ "path": self.old,
1666
+ "lines": self.oref,
1667
+ "binary": old_binary,
1668
+ "change_lifetimes": self.oref_change_lifetimes,
1669
+ }
1670
+ )
1671
+ self.cc.append(
1672
+ {
1673
+ "op": "set",
1674
+ "path": self.new,
1675
+ "lines": self.nref,
1676
+ "binary": new_binary,
1677
+ "change_lifetimes": self.nref_change_lifetimes,
1678
+ }
1679
+ )
1680
+
1681
+ def output_source_code(self, name):
1682
+ return output_source_code(name, self.args.source_only)
1683
+
1684
+ def selected_file_details_mode(self):
1685
+ return getattr(self.args, "path", None) is not None and not self.args.churn_dir
1686
+
1687
+ def json_metrics_mode(self):
1688
+ return getattr(self.args, "json_metrics", False)
1689
+
1690
+ def lifetime_argument_parser():
1691
+ """Return a CLI parser for the original script used for research"""
1692
+ parser = argparse.ArgumentParser(
1693
+ prog="lifetime",
1694
+ description="Explore line lifetime and churn"
1695
+ )
1696
+ parser.add_argument(
1697
+ "-c",
1698
+ dest="compressed",
1699
+ action="store_true",
1700
+ help=(
1701
+ "Output in a compressed format: line death times can be obtained "
1702
+ "from commit markers and alive lines appear after a line marked END."
1703
+ ),
1704
+ )
1705
+ parser.add_argument(
1706
+ "-C",
1707
+ dest="churn_dir",
1708
+ metavar="dir",
1709
+ help="Reconstruct source files with lines preceded by churn count",
1710
+ )
1711
+ parser.add_argument("-d", dest="delta", action="store_true", help="Report the LoC delta")
1712
+ parser.add_argument(
1713
+ "-e",
1714
+ dest="end_hash",
1715
+ metavar="SHA",
1716
+ help="End processing after the specified commit hash",
1717
+ )
1718
+ parser.add_argument(
1719
+ "-E",
1720
+ dest="redirect_output",
1721
+ action="store_true",
1722
+ help="Redirect output to stderr",
1723
+ )
1724
+ parser.add_argument(
1725
+ "-f",
1726
+ dest="file_metrics",
1727
+ action="store_true",
1728
+ help="List current files with churn and age metrics",
1729
+ )
1730
+ parser.add_argument(
1731
+ "-g",
1732
+ dest="growth_file",
1733
+ metavar="file",
1734
+ help="Create a file with total LoC for each commit",
1735
+ )
1736
+ parser.add_argument(
1737
+ "-j",
1738
+ dest="json_metrics",
1739
+ action="store_true",
1740
+ help="Output collected file and line metrics as JSON",
1741
+ )
1742
+ parser.add_argument(
1743
+ "-l",
1744
+ dest="line_details",
1745
+ action="store_true",
1746
+ help="Output number of token types contained in each line",
1747
+ )
1748
+ parser.add_argument(
1749
+ "-s",
1750
+ dest="source_only",
1751
+ action="store_true",
1752
+ help="Report only source code files",
1753
+ )
1754
+ parser.add_argument("-t", dest="tokens", action="store_true", help="Show tokens with lifetime")
1755
+ parser.add_argument(
1756
+ "--format",
1757
+ dest="output_format",
1758
+ default=None,
1759
+ help="Format output using a Python f-string",
1760
+ )
1761
+ parser.add_argument("input_files", nargs="*")
1762
+ return parser
1763
+
1764
+
1765
+ def git_hot_argument_parser():
1766
+ """Return a CLI parser for the git-hot Git extension."""
1767
+
1768
+ parser = argparse.ArgumentParser(
1769
+ prog="git-hot",
1770
+ description="Report code lifetime and churn.",
1771
+ usage="%(prog)s [-h] [-d dir] [-q] [ref] [[--] path]",
1772
+ add_help=True,
1773
+ )
1774
+
1775
+ parser.add_argument(
1776
+ "-d", "--dir",
1777
+ metavar="dir",
1778
+ dest="churn_dir",
1779
+ help="Reconstruct source files with lines preceded by churn count",
1780
+ )
1781
+
1782
+ parser.add_argument(
1783
+ "--format",
1784
+ dest="output_format",
1785
+ default=None,
1786
+ help="Format file output using a Python f-string",
1787
+ )
1788
+
1789
+ parser.add_argument(
1790
+ "ref",
1791
+ nargs="?",
1792
+ default=None,
1793
+ help="Git reference",
1794
+ )
1795
+
1796
+ parser.add_argument(
1797
+ "path",
1798
+ nargs="?",
1799
+ help="Report line details for the specified file",
1800
+ )
1801
+ return parser
1802
+
1803
+ def parse_main_args(argv=None, prog=None):
1804
+ """Parse command-line arguments for the selected CLI variant."""
1805
+
1806
+ # The program offers two different CLIs. Choose based on invocation name.
1807
+ prog = prog or sys.argv[0]
1808
+ if "git-hot" in prog:
1809
+ parser = git_hot_argument_parser()
1810
+ git_hot = True
1811
+ else:
1812
+ parser = lifetime_argument_parser()
1813
+ git_hot = False
1814
+
1815
+ parser.add_argument("-q", "--quiet", dest="quiet", action="store_true",
1816
+ help="Quiet progress output")
1817
+ parser.add_argument("-D", "--debug", dest="debug_options", metavar="opts",
1818
+ help="Debug as specified by the letters in opts")
1819
+ parser.add_argument(
1820
+ "--color",
1821
+ choices=["always", "never"],
1822
+ default=None,
1823
+ help="Control colored output",
1824
+ )
1825
+
1826
+ parser.add_argument(
1827
+ "--color-domain",
1828
+ choices=["churn", "age", "lifetime"],
1829
+ default="churn",
1830
+ help="Color lines by churn, age, or lifetime",
1831
+ )
1832
+
1833
+
1834
+ # Custom argument parsing for the Git "[ref] [[--] path]" convention
1835
+ argv = sys.argv[1:] if argv is None else list(argv)
1836
+
1837
+ if git_hot and "--" in argv:
1838
+ idx = argv.index("--")
1839
+ pre = argv[:idx]
1840
+ post = argv[idx + 1:]
1841
+
1842
+ if len(post) > 1:
1843
+ parser.error("at most one path allowed after --")
1844
+
1845
+ args = parser.parse_args(pre)
1846
+
1847
+ if getattr(args, "path", None) is not None:
1848
+ parser.error("path specified before --")
1849
+
1850
+ args.path = post[0] if post else None
1851
+ return args
1852
+
1853
+ return parser.parse_args(argv)
1854
+
1855
+
1856
+ def main(argv=None):
1857
+ try:
1858
+ args = parse_main_args(argv)
1859
+ processor = Processor(args)
1860
+ processor.run()
1861
+ return 0
1862
+ except SystemExit as exc:
1863
+ return int(exc.code)
1864
+ except ProcessingError as exc:
1865
+ print_stderr_line(f"Error: {exc}")
1866
+ return 1
1867
+
1868
+
1869
+ if __name__ == "__main__":
1870
+ sys.exit(main())