better-git-of-theseus 0.4.2__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {better_git_of_theseus-0.4.2.dist-info → better_git_of_theseus-0.5.0.dist-info}/METADATA +4 -2
- {better_git_of_theseus-0.4.2.dist-info → better_git_of_theseus-0.5.0.dist-info}/RECORD +9 -9
- git_of_theseus/app.py +88 -8
- git_of_theseus/cmd.py +2 -3
- git_of_theseus/plotly_plots.py +71 -111
- {better_git_of_theseus-0.4.2.dist-info → better_git_of_theseus-0.5.0.dist-info}/WHEEL +0 -0
- {better_git_of_theseus-0.4.2.dist-info → better_git_of_theseus-0.5.0.dist-info}/entry_points.txt +0 -0
- {better_git_of_theseus-0.4.2.dist-info → better_git_of_theseus-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {better_git_of_theseus-0.4.2.dist-info → better_git_of_theseus-0.5.0.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: better-git-of-theseus
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Plot stats on Git repositories with interactive Plotly charts
|
|
5
5
|
Home-page: https://github.com/onewesong/better-git-of-theseus
|
|
6
6
|
Author: Erik Bernhardsson
|
|
@@ -16,6 +16,7 @@ Requires-Dist: plotly
|
|
|
16
16
|
Requires-Dist: streamlit
|
|
17
17
|
Requires-Dist: python-dateutil
|
|
18
18
|
Requires-Dist: scipy
|
|
19
|
+
Requires-Dist: matplotlib
|
|
19
20
|
Dynamic: author
|
|
20
21
|
Dynamic: author-email
|
|
21
22
|
Dynamic: description
|
|
@@ -41,7 +42,8 @@ Dynamic: summary
|
|
|
41
42
|
|
|
42
43
|
**Better Git of Theseus** is a modern refactor of the original [git-of-theseus](https://github.com/erikbern/git-of-theseus). It provides a fully interactive Web Dashboard powered by **Streamlit** and **Plotly**, making it easier than ever to visualize how your code evolves over time.
|
|
43
44
|
|
|
44
|
-

|
|
46
|
+

|
|
45
47
|
|
|
46
48
|
## Key Enhancements
|
|
47
49
|
|
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
better_git_of_theseus-0.
|
|
1
|
+
better_git_of_theseus-0.5.0.dist-info/licenses/LICENSE,sha256=yNNDAWUe1WLKnuUcRp9X95C-yP2lfGl69m97Ftw-DUw,11345
|
|
2
2
|
git_of_theseus/__init__.py,sha256=LeG5tCOgvZMmKOjmO_HRg54sWF2K3-lTBf8H_vHMFio,273
|
|
3
3
|
git_of_theseus/analyze.py,sha256=78E1G2FdSS9VZd0jKSnO5gpXwzNCjtzkSAxSzadYM3A,21547
|
|
4
|
-
git_of_theseus/app.py,sha256=
|
|
5
|
-
git_of_theseus/cmd.py,sha256=
|
|
4
|
+
git_of_theseus/app.py,sha256=6GuBWC3WaN0VkwHAmKM_6ZNalf7NcXgXYq3ZP6XMDvY,8463
|
|
5
|
+
git_of_theseus/cmd.py,sha256=kvi3sgC0ICj7PvXuzCQsijU5swS3JkfAtIT5yZRNbFo,550
|
|
6
6
|
git_of_theseus/line_plot.py,sha256=LegoVy0VEFT4sM5fYCES-I_2H9UaerCopDI3J2dyHeU,3117
|
|
7
|
-
git_of_theseus/plotly_plots.py,sha256=
|
|
7
|
+
git_of_theseus/plotly_plots.py,sha256=Ru-Xwj8c4XdDJfzjDUqU0CxddmLq76LTtX5r0VqSYvs,7267
|
|
8
8
|
git_of_theseus/stack_plot.py,sha256=q4-YlW3PyiwbIBFeHBA3dsdR1I_XKUQD74hAuSfhIR4,3150
|
|
9
9
|
git_of_theseus/survival_plot.py,sha256=NEITAa0pMD9uJVsPd7JA71ucavnG1RxgC-F6Jk-K5bE,4868
|
|
10
10
|
git_of_theseus/utils.py,sha256=Xw2udch9ixSgFInGhIC4_RJ_9IB3E8MmV1dmznavCWc,1026
|
|
11
|
-
better_git_of_theseus-0.
|
|
12
|
-
better_git_of_theseus-0.
|
|
13
|
-
better_git_of_theseus-0.
|
|
14
|
-
better_git_of_theseus-0.
|
|
15
|
-
better_git_of_theseus-0.
|
|
11
|
+
better_git_of_theseus-0.5.0.dist-info/METADATA,sha256=I4B84JL2IDJ-ICFiK3Su4uxkcqumW-h4PAWQdl0h1BI,3602
|
|
12
|
+
better_git_of_theseus-0.5.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
13
|
+
better_git_of_theseus-0.5.0.dist-info/entry_points.txt,sha256=dEBL6oCDAozY13Y_qxS_6-qkyCA7R2TpjoLH6QJR72g,66
|
|
14
|
+
better_git_of_theseus-0.5.0.dist-info/top_level.txt,sha256=2kpp8WgiBzqVLxua_mBS00Nj4cUORaRbJi121THJ_0o,15
|
|
15
|
+
better_git_of_theseus-0.5.0.dist-info/RECORD,,
|
git_of_theseus/app.py
CHANGED
|
@@ -4,13 +4,26 @@ import tempfile
|
|
|
4
4
|
import shutil
|
|
5
5
|
try:
|
|
6
6
|
from git_of_theseus.analyze import analyze
|
|
7
|
-
from git_of_theseus.plotly_plots import plotly_stack_plot, plotly_line_plot, plotly_survival_plot
|
|
7
|
+
from git_of_theseus.plotly_plots import plotly_stack_plot, plotly_line_plot, plotly_survival_plot, plotly_bar_plot
|
|
8
8
|
except ImportError:
|
|
9
9
|
from analyze import analyze
|
|
10
|
-
from plotly_plots import plotly_stack_plot, plotly_line_plot, plotly_survival_plot
|
|
10
|
+
from plotly_plots import plotly_stack_plot, plotly_line_plot, plotly_survival_plot, plotly_bar_plot
|
|
11
11
|
|
|
12
12
|
st.set_page_config(page_title="Git of Theseus Dash", layout="wide")
|
|
13
13
|
|
|
14
|
+
# GitHub Link in Sidebar
|
|
15
|
+
st.sidebar.markdown(
|
|
16
|
+
"""
|
|
17
|
+
<div style="display: flex; align-items: center; margin-bottom: 20px;">
|
|
18
|
+
<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" width="30" style="margin-right: 10px;">
|
|
19
|
+
<a href="https://github.com/onewesong/better-git-of-theseus" target="_blank" style="text-decoration: none; color: inherit; font-weight: bold;">
|
|
20
|
+
better-git-of-theseus
|
|
21
|
+
</a>
|
|
22
|
+
</div>
|
|
23
|
+
""",
|
|
24
|
+
unsafe_allow_html=True
|
|
25
|
+
)
|
|
26
|
+
|
|
14
27
|
st.title("📊 Git of Theseus - Repository Analysis")
|
|
15
28
|
|
|
16
29
|
import sys
|
|
@@ -18,12 +31,49 @@ import sys
|
|
|
18
31
|
# Sidebar Configuration
|
|
19
32
|
st.sidebar.header("Configuration")
|
|
20
33
|
|
|
34
|
+
with st.sidebar.expander("📖 How to use", expanded=False):
|
|
35
|
+
st.markdown("""
|
|
36
|
+
**Better Git of Theseus** is a tool to analyze the evolution of Git repositories.
|
|
37
|
+
|
|
38
|
+
### Plots Explained:
|
|
39
|
+
- **Stack Plot**: Shows code growth over time, broken down by cohort (when code was added).
|
|
40
|
+
- **Line Plot**: Shows trends across different dimensions (Author, Extension, etc.).
|
|
41
|
+
- **Distribution**: Shows the **current** distribution (Who contributed most, which file types are dominant).
|
|
42
|
+
- **Survival Plot**: Estimates how long a line of code typically lasts before being modified or deleted.
|
|
43
|
+
|
|
44
|
+
### Tips:
|
|
45
|
+
- **Cohort Format**: `%Y` (Yearly) and `%Y-%m` (Monthly) are recommended.
|
|
46
|
+
- **Mailmap**: Use a `.mailmap` file in the repo root to resolve duplicate author names.
|
|
47
|
+
""")
|
|
48
|
+
|
|
21
49
|
default_repo = "."
|
|
22
50
|
if len(sys.argv) > 1:
|
|
23
51
|
default_repo = sys.argv[1]
|
|
24
52
|
|
|
25
|
-
repo_path =
|
|
26
|
-
|
|
53
|
+
repo_path = default_repo
|
|
54
|
+
# Path display removed as per user request
|
|
55
|
+
|
|
56
|
+
# Fetch branches for the selectbox
|
|
57
|
+
try:
|
|
58
|
+
import git
|
|
59
|
+
repo = git.Repo(repo_path)
|
|
60
|
+
# Get local branches
|
|
61
|
+
branches = [h.name for h in repo.heads]
|
|
62
|
+
|
|
63
|
+
# Try to determine the best default branch (active one, or master/main)
|
|
64
|
+
try:
|
|
65
|
+
current_active = repo.active_branch.name
|
|
66
|
+
except:
|
|
67
|
+
current_active = "master"
|
|
68
|
+
|
|
69
|
+
if current_active in branches:
|
|
70
|
+
branches.remove(current_active)
|
|
71
|
+
|
|
72
|
+
options = [current_active] + sorted(branches)
|
|
73
|
+
branch = st.sidebar.selectbox("Branch", options=options)
|
|
74
|
+
except Exception as e:
|
|
75
|
+
# Fallback if git repo access fails
|
|
76
|
+
branch = st.sidebar.text_input("Branch", value="master")
|
|
27
77
|
|
|
28
78
|
with st.sidebar.expander("Analysis Parameters"):
|
|
29
79
|
cohortfm = st.text_input(
|
|
@@ -35,9 +85,23 @@ with st.sidebar.expander("Analysis Parameters"):
|
|
|
35
85
|
"- `%Y-W%W`: Week (e.g., 2023-W01)\n"
|
|
36
86
|
"- `%Y-%m-%d`: Day"
|
|
37
87
|
)
|
|
38
|
-
interval = st.number_input(
|
|
39
|
-
|
|
40
|
-
|
|
88
|
+
interval = st.number_input(
|
|
89
|
+
"Analysis Interval (seconds)",
|
|
90
|
+
value=7 * 24 * 60 * 60,
|
|
91
|
+
help="The time step between data points. Default is 604800s (7 days). Larger values are faster; smaller values result in smoother curves."
|
|
92
|
+
)
|
|
93
|
+
st.caption(f"Current resolution: {interval / 86400:.1f} days")
|
|
94
|
+
|
|
95
|
+
procs = st.number_input(
|
|
96
|
+
"Parallel Processes",
|
|
97
|
+
value=2,
|
|
98
|
+
min_value=1,
|
|
99
|
+
help="Number of concurrent processes. Increase to speed up analysis on multi-core CPUs, but note it increases RAM usage."
|
|
100
|
+
)
|
|
101
|
+
ignore = st.text_area(
|
|
102
|
+
"Ignore Patterns",
|
|
103
|
+
help="Glob patterns to ignore (comma separated), e.g.: 'tests/**, *.md'"
|
|
104
|
+
).split(",")
|
|
41
105
|
ignore = [i.strip() for i in ignore if i.strip()]
|
|
42
106
|
|
|
43
107
|
@st.cache_data(show_spinner=False)
|
|
@@ -71,7 +135,7 @@ if st.sidebar.button("🚀 Run Analysis") or (len(sys.argv) > 1 and st.session_s
|
|
|
71
135
|
# Main View
|
|
72
136
|
if st.session_state.analysis_results:
|
|
73
137
|
results = st.session_state.analysis_results
|
|
74
|
-
tab1, tab2, tab3 = st.tabs(["Stack Plot", "Line Plot", "Survival Plot"])
|
|
138
|
+
tab1, tab2, tab3, tab4 = st.tabs(["Stack Plot", "Line Plot", "Distribution", "Survival Plot"])
|
|
75
139
|
|
|
76
140
|
with tab1:
|
|
77
141
|
st.header("Stack Plot")
|
|
@@ -115,6 +179,22 @@ if st.session_state.analysis_results:
|
|
|
115
179
|
st.warning(f"Data for {data_source_label_line} not found.")
|
|
116
180
|
|
|
117
181
|
with tab3:
|
|
182
|
+
st.header("Latest Distribution")
|
|
183
|
+
col1, col2 = st.columns([1, 3])
|
|
184
|
+
with col1:
|
|
185
|
+
data_source_label_bar = st.selectbox("Data Source", list(source_map.keys()), key="bar_source")
|
|
186
|
+
data_key_bar = source_map[data_source_label_bar]
|
|
187
|
+
max_n_bar = st.slider("Max Series", 5, 100, 30, key="bar_max_n")
|
|
188
|
+
with col2:
|
|
189
|
+
project_name = os.path.basename(os.path.abspath(repo_path))
|
|
190
|
+
data_bar = results.get(data_key_bar)
|
|
191
|
+
if data_bar:
|
|
192
|
+
fig = plotly_bar_plot(data_bar, max_n=max_n_bar, title=f"{project_name} - {data_source_label_bar}")
|
|
193
|
+
st.plotly_chart(fig, width="stretch")
|
|
194
|
+
else:
|
|
195
|
+
st.warning(f"Data for {data_source_label_bar} not found.")
|
|
196
|
+
|
|
197
|
+
with tab4:
|
|
118
198
|
st.header("Survival Plot")
|
|
119
199
|
col1, col2 = st.columns([1, 3])
|
|
120
200
|
with col1:
|
git_of_theseus/cmd.py
CHANGED
|
@@ -7,9 +7,8 @@ def main():
|
|
|
7
7
|
cmd_dir = os.path.dirname(os.path.abspath(__file__))
|
|
8
8
|
app_path = os.path.join(cmd_dir, "app.py")
|
|
9
9
|
|
|
10
|
-
#
|
|
11
|
-
repo_path =
|
|
12
|
-
repo_path = os.path.abspath(repo_path)
|
|
10
|
+
# Always use the current working directory
|
|
11
|
+
repo_path = os.path.abspath(os.getcwd())
|
|
13
12
|
|
|
14
13
|
# Run streamlit
|
|
15
14
|
# We pass the repo_path as an argument to the streamlit script
|
git_of_theseus/plotly_plots.py
CHANGED
|
@@ -7,12 +7,17 @@ import math
|
|
|
7
7
|
import os
|
|
8
8
|
from .utils import generate_n_colors
|
|
9
9
|
|
|
10
|
+
# Harmonious, professional color palette (Modern & Muted)
|
|
11
|
+
# Inspired by Tableau 20 and modern UI systems
|
|
12
|
+
PREMIUM_PALETTE = [
|
|
13
|
+
"#4E79A7", "#A0CBE8", "#F28E2B", "#FFBE7D", "#59A14F",
|
|
14
|
+
"#8CD17D", "#B6992D", "#F1CE63", "#499894", "#86BCB6",
|
|
15
|
+
"#E15759", "#FF9D9A", "#79706E", "#BAB0AC", "#D37295",
|
|
16
|
+
"#FABFD2", "#B07AA1", "#D4A1D2", "#9D7660", "#D7B5A6"
|
|
17
|
+
]
|
|
18
|
+
|
|
10
19
|
def _process_stack_line_data(data, max_n=20, normalize=False):
|
|
11
|
-
# Handle dict or file path
|
|
12
|
-
# If it's a file path, load it? But app.py passes dict now.
|
|
13
|
-
# Let's assume dict for now as per app.py refactor.
|
|
14
20
|
if not isinstance(data, dict):
|
|
15
|
-
# Fallback if needed, though app.py sends dict
|
|
16
21
|
import json
|
|
17
22
|
data = json.load(open(data))
|
|
18
23
|
|
|
@@ -20,34 +25,16 @@ def _process_stack_line_data(data, max_n=20, normalize=False):
|
|
|
20
25
|
labels = data["labels"]
|
|
21
26
|
ts = [dateutil.parser.parse(t) for t in data["ts"]]
|
|
22
27
|
|
|
23
|
-
# Sort and filter top N
|
|
24
28
|
if y.shape[0] > max_n:
|
|
25
|
-
# Sort by max value in the series
|
|
26
29
|
js = sorted(range(len(labels)), key=lambda j: max(y[j]), reverse=True)
|
|
27
|
-
|
|
28
|
-
# Calculate other sum
|
|
29
30
|
other_indices = js[max_n:]
|
|
30
31
|
if other_indices:
|
|
31
32
|
other_sum = np.sum([y[j] for j in other_indices], axis=0)
|
|
32
|
-
|
|
33
|
-
# Top N indices
|
|
34
33
|
top_js = sorted(js[:max_n], key=lambda j: labels[j])
|
|
35
|
-
|
|
36
34
|
y = np.array([y[j] for j in top_js] + [other_sum])
|
|
37
35
|
labels = [labels[j] for j in top_js] + ["other"]
|
|
38
|
-
else:
|
|
39
|
-
# Should hopefully not happen if shape[0] > max_n
|
|
40
|
-
pass
|
|
41
|
-
else:
|
|
42
|
-
# Sort alphabetically for consistency
|
|
43
|
-
js = range(len(labels))
|
|
44
|
-
# strictly speaking existing code didn't sort if <= max_n?
|
|
45
|
-
# "labels = data['labels']" in existing code.
|
|
46
|
-
pass
|
|
47
|
-
|
|
48
|
-
y_sums = np.sum(y, axis=0)
|
|
49
36
|
|
|
50
|
-
|
|
37
|
+
y_sums = np.sum(y, axis=0)
|
|
51
38
|
y_sums[y_sums == 0] = 1.0
|
|
52
39
|
|
|
53
40
|
if normalize:
|
|
@@ -57,43 +44,39 @@ def _process_stack_line_data(data, max_n=20, normalize=False):
|
|
|
57
44
|
|
|
58
45
|
def plotly_stack_plot(data, max_n=20, normalize=False, title=None):
|
|
59
46
|
ts, y, labels = _process_stack_line_data(data, max_n, normalize)
|
|
60
|
-
|
|
61
47
|
fig = go.Figure()
|
|
62
48
|
|
|
63
|
-
# Use a nice color palette
|
|
64
|
-
colors = px.colors.qualitative.Plotly
|
|
65
|
-
if len(labels) > len(colors):
|
|
66
|
-
colors = px.colors.qualitative.Dark24 # More colors if needed
|
|
67
|
-
|
|
68
49
|
for i, label in enumerate(labels):
|
|
69
|
-
color =
|
|
50
|
+
color = PREMIUM_PALETTE[i % len(PREMIUM_PALETTE)]
|
|
70
51
|
fig.add_trace(go.Scatter(
|
|
71
52
|
x=ts,
|
|
72
53
|
y=y[i],
|
|
73
54
|
mode='lines',
|
|
74
55
|
name=label,
|
|
75
|
-
stackgroup='one',
|
|
76
|
-
line=dict(width=0.5, color=
|
|
77
|
-
fillcolor=color
|
|
56
|
+
stackgroup='one',
|
|
57
|
+
line=dict(width=0.5, color='rgba(255,255,255,0.3)'),
|
|
58
|
+
fillcolor=color,
|
|
59
|
+
hoverinfo='x+y+name'
|
|
78
60
|
))
|
|
79
61
|
|
|
80
62
|
fig.update_layout(
|
|
81
|
-
title=dict(text=title, x=0.5) if title else None,
|
|
63
|
+
title=dict(text=title, x=0.5, font=dict(size=20)) if title else None,
|
|
82
64
|
yaxis=dict(
|
|
83
|
-
title="Share of
|
|
84
|
-
range=[0, 100] if normalize else None
|
|
65
|
+
title="Share of LoC (%)" if normalize else "Lines of Code",
|
|
66
|
+
range=[0, 100.1] if normalize else None,
|
|
67
|
+
gridcolor='rgba(128,128,128,0.2)'
|
|
85
68
|
),
|
|
86
|
-
xaxis=dict(title="Date"),
|
|
69
|
+
xaxis=dict(title="Date", gridcolor='rgba(128,128,128,0.2)'),
|
|
87
70
|
hovermode="x unified",
|
|
88
|
-
margin=dict(l=20, r=20, t=
|
|
71
|
+
margin=dict(l=20, r=20, t=60, b=20),
|
|
72
|
+
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
|
|
73
|
+
plot_bgcolor='rgba(0,0,0,0)',
|
|
74
|
+
paper_bgcolor='rgba(0,0,0,0)'
|
|
89
75
|
)
|
|
90
|
-
|
|
91
|
-
|
|
92
76
|
return fig
|
|
93
77
|
|
|
94
78
|
def plotly_line_plot(data, max_n=20, normalize=False, title=None):
|
|
95
79
|
ts, y, labels = _process_stack_line_data(data, max_n, normalize)
|
|
96
|
-
|
|
97
80
|
fig = go.Figure()
|
|
98
81
|
|
|
99
82
|
for i, label in enumerate(labels):
|
|
@@ -102,40 +85,30 @@ def plotly_line_plot(data, max_n=20, normalize=False, title=None):
|
|
|
102
85
|
y=y[i],
|
|
103
86
|
mode='lines',
|
|
104
87
|
name=label,
|
|
105
|
-
line=dict(width=2)
|
|
88
|
+
line=dict(width=2.5, color=PREMIUM_PALETTE[i % len(PREMIUM_PALETTE)])
|
|
106
89
|
))
|
|
107
90
|
|
|
108
91
|
fig.update_layout(
|
|
109
|
-
title=dict(text=title, x=0.5) if title else None,
|
|
92
|
+
title=dict(text=title, x=0.5, font=dict(size=20)) if title else None,
|
|
110
93
|
yaxis=dict(
|
|
111
|
-
title="Share of
|
|
112
|
-
range=[0, 100] if normalize else None
|
|
94
|
+
title="Share of LoC (%)" if normalize else "Lines of Code",
|
|
95
|
+
range=[0, 100.1] if normalize else None,
|
|
96
|
+
gridcolor='rgba(128,128,128,0.2)'
|
|
113
97
|
),
|
|
114
|
-
xaxis=dict(title="Date"),
|
|
98
|
+
xaxis=dict(title="Date", gridcolor='rgba(128,128,128,0.2)'),
|
|
115
99
|
hovermode="x unified",
|
|
116
|
-
margin=dict(l=20, r=20, t=
|
|
100
|
+
margin=dict(l=20, r=20, t=60, b=20),
|
|
101
|
+
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
|
|
102
|
+
plot_bgcolor='rgba(0,0,0,0)',
|
|
103
|
+
paper_bgcolor='rgba(0,0,0,0)'
|
|
117
104
|
)
|
|
118
|
-
|
|
119
|
-
|
|
120
105
|
return fig
|
|
121
106
|
|
|
122
107
|
def plotly_survival_plot(commit_history, exp_fit=False, years=5, title=None):
|
|
123
|
-
# Logic copied from survival_plot.py
|
|
124
|
-
# commit_history is {sha: [[ts, count], ...]}
|
|
125
|
-
|
|
126
108
|
deltas = collections.defaultdict(lambda: np.zeros(2))
|
|
127
109
|
total_n = 0
|
|
128
110
|
YEAR = 365.25 * 24 * 60 * 60
|
|
129
111
|
|
|
130
|
-
# Process history
|
|
131
|
-
# Input might be a list of histories if we support multiple inputs,
|
|
132
|
-
# but based on app.py we pass a single result["survival"] dict.
|
|
133
|
-
# However, existing survival_plot took a LIST of filenames.
|
|
134
|
-
# Let's support the single dict passed from app.py.
|
|
135
|
-
|
|
136
|
-
# The logic in survival_plot.py iterates over input_fns, loads them, and computes `all_deltas`.
|
|
137
|
-
# Here we assume `commit_history` IS the content of one such file (the dict).
|
|
138
|
-
|
|
139
112
|
for commit, history in commit_history.items():
|
|
140
113
|
t0, orig_count = history[0]
|
|
141
114
|
total_n += orig_count
|
|
@@ -145,99 +118,86 @@ def plotly_survival_plot(commit_history, exp_fit=False, years=5, title=None):
|
|
|
145
118
|
last_count = count
|
|
146
119
|
deltas[history[-1][0] - t0] += (-last_count, -orig_count)
|
|
147
120
|
|
|
148
|
-
# Calculate curve
|
|
149
121
|
P = 1.0
|
|
150
|
-
xs = []
|
|
151
|
-
ys = []
|
|
152
|
-
|
|
153
|
-
# Sort deltas by time
|
|
122
|
+
xs, ys = [], []
|
|
154
123
|
sorted_times = sorted(deltas.keys())
|
|
155
124
|
|
|
156
|
-
total_k = total_n # unused?
|
|
157
|
-
|
|
158
125
|
for t in sorted_times:
|
|
159
126
|
delta_k, delta_n = deltas[t]
|
|
160
127
|
xs.append(t / YEAR)
|
|
161
128
|
ys.append(100.0 * P)
|
|
162
|
-
|
|
163
129
|
if total_n > 0:
|
|
164
130
|
P *= 1 + delta_k / total_n
|
|
165
|
-
|
|
166
|
-
# total_k += delta_k
|
|
167
131
|
total_n += delta_n
|
|
168
|
-
|
|
169
|
-
if P < 0.05:
|
|
170
|
-
break
|
|
132
|
+
if P < 0.05: break
|
|
171
133
|
|
|
172
134
|
fig = go.Figure()
|
|
173
|
-
|
|
174
|
-
# Main survival curve
|
|
175
135
|
fig.add_trace(go.Scatter(
|
|
176
136
|
x=xs, y=ys,
|
|
177
137
|
mode='lines',
|
|
178
138
|
name='Survival Rate',
|
|
179
|
-
line=dict(color=
|
|
139
|
+
line=dict(color=PREMIUM_PALETTE[0], width=3)
|
|
180
140
|
))
|
|
181
141
|
|
|
182
|
-
# Exponential fit
|
|
183
142
|
if exp_fit:
|
|
184
143
|
try:
|
|
185
144
|
import scipy.optimize
|
|
186
|
-
|
|
187
|
-
# Define loss function for fit
|
|
188
145
|
def fit(k):
|
|
189
|
-
loss = 0.0
|
|
190
|
-
|
|
191
|
-
# Need to iterate again or reuse data?
|
|
192
|
-
# The original code re-iterates.
|
|
193
|
-
|
|
194
|
-
# Simplified for single dataset:
|
|
195
|
-
curr_total_n = 0
|
|
196
|
-
for _, history in commit_history.items():
|
|
197
|
-
curr_total_n += history[0][1]
|
|
198
|
-
|
|
199
|
-
P_fit = 1.0
|
|
200
|
-
curr_total_n_fit = curr_total_n
|
|
201
|
-
|
|
146
|
+
loss, curr_total_n = 0.0, sum(h[0][1] for h in commit_history.values())
|
|
147
|
+
P_fit, curr_total_n_fit = 1.0, curr_total_n
|
|
202
148
|
for t in sorted_times:
|
|
203
149
|
delta_k, delta_n = deltas[t]
|
|
204
150
|
pred = curr_total_n_fit * math.exp(-k * t / YEAR)
|
|
205
151
|
loss += (curr_total_n_fit * P_fit - pred) ** 2
|
|
206
|
-
if curr_total_n_fit > 0:
|
|
207
|
-
P_fit *= 1 + delta_k / curr_total_n_fit
|
|
152
|
+
if curr_total_n_fit > 0: P_fit *= 1 + delta_k / curr_total_n_fit
|
|
208
153
|
curr_total_n_fit += delta_n
|
|
209
154
|
return loss
|
|
210
|
-
|
|
211
155
|
k_opt = scipy.optimize.fmin(fit, 0.5, maxiter=50, disp=False)[0]
|
|
212
|
-
|
|
213
156
|
ts_fit = np.linspace(0, years, 100)
|
|
214
157
|
ys_fit = [100.0 * math.exp(-k_opt * t) for t in ts_fit]
|
|
215
|
-
|
|
216
158
|
half_life = math.log(2) / k_opt
|
|
217
|
-
|
|
218
159
|
fig.add_trace(go.Scatter(
|
|
219
160
|
x=ts_fit, y=ys_fit,
|
|
220
161
|
mode='lines',
|
|
221
162
|
name=f"Exp. Fit (Half-life: {half_life:.2f} yrs)",
|
|
222
|
-
line=dict(color=
|
|
163
|
+
line=dict(color=PREMIUM_PALETTE[10], dash='dash', width=2)
|
|
223
164
|
))
|
|
224
|
-
|
|
225
|
-
except ImportError:
|
|
226
|
-
pass # Or warn user
|
|
165
|
+
except ImportError: pass
|
|
227
166
|
|
|
228
167
|
fig.update_layout(
|
|
229
168
|
title=dict(text=title, x=0.5) if title else None,
|
|
230
|
-
yaxis=dict(
|
|
231
|
-
|
|
232
|
-
range=[0, 100]
|
|
233
|
-
),
|
|
234
|
-
xaxis=dict(
|
|
235
|
-
title="Years",
|
|
236
|
-
range=[0, years]
|
|
237
|
-
),
|
|
169
|
+
yaxis=dict(title="Lines still present (%)", range=[0, 105], gridcolor='rgba(128,128,128,0.2)'),
|
|
170
|
+
xaxis=dict(title="Years", range=[0, years], gridcolor='rgba(128,128,128,0.2)'),
|
|
238
171
|
hovermode="x unified",
|
|
239
172
|
margin=dict(l=20, r=20, t=50, b=20),
|
|
173
|
+
plot_bgcolor='rgba(0,0,0,0)',
|
|
174
|
+
paper_bgcolor='rgba(0,0,0,0)'
|
|
240
175
|
)
|
|
176
|
+
return fig
|
|
241
177
|
|
|
178
|
+
def plotly_bar_plot(data, max_n=20, title=None):
|
|
179
|
+
_, y, labels = _process_stack_line_data(data, max_n, normalize=False)
|
|
180
|
+
latest_values = [row[-1] for row in y]
|
|
181
|
+
indices = sorted(range(len(labels)), key=lambda i: latest_values[i], reverse=True)
|
|
182
|
+
sorted_labels = [labels[i] for i in indices]
|
|
183
|
+
sorted_values = [latest_values[i] for i in indices]
|
|
184
|
+
|
|
185
|
+
fig = go.Figure(go.Bar(
|
|
186
|
+
x=sorted_labels,
|
|
187
|
+
y=sorted_values,
|
|
188
|
+
marker=dict(
|
|
189
|
+
color=sorted_values,
|
|
190
|
+
colorscale=[[i/(len(PREMIUM_PALETTE)-1), c] for i, c in enumerate(PREMIUM_PALETTE)],
|
|
191
|
+
showscale=False
|
|
192
|
+
)
|
|
193
|
+
))
|
|
242
194
|
|
|
195
|
+
fig.update_layout(
|
|
196
|
+
title=dict(text=f"{title} (Latest)" if title else "Latest Distribution", x=0.5),
|
|
197
|
+
yaxis=dict(title="Lines of Code", gridcolor='rgba(128,128,128,0.2)'),
|
|
198
|
+
xaxis=dict(title=""),
|
|
199
|
+
margin=dict(l=20, r=20, t=50, b=100),
|
|
200
|
+
plot_bgcolor='rgba(0,0,0,0)',
|
|
201
|
+
paper_bgcolor='rgba(0,0,0,0)'
|
|
202
|
+
)
|
|
243
203
|
return fig
|
|
File without changes
|
{better_git_of_theseus-0.4.2.dist-info → better_git_of_theseus-0.5.0.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{better_git_of_theseus-0.4.2.dist-info → better_git_of_theseus-0.5.0.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{better_git_of_theseus-0.4.2.dist-info → better_git_of_theseus-0.5.0.dist-info}/top_level.txt
RENAMED
|
File without changes
|