git-pkgs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +10 -0
- data/LICENSE +661 -0
- data/README.md +279 -0
- data/Rakefile +8 -0
- data/benchmark_bulk.rb +167 -0
- data/benchmark_db.rb +138 -0
- data/benchmark_detailed.rb +151 -0
- data/benchmark_full.rb +131 -0
- data/docs/schema.md +129 -0
- data/exe/git-pkgs +6 -0
- data/lib/git/pkgs/analyzer.rb +270 -0
- data/lib/git/pkgs/cli.rb +73 -0
- data/lib/git/pkgs/commands/blame.rb +142 -0
- data/lib/git/pkgs/commands/branch.rb +337 -0
- data/lib/git/pkgs/commands/diff.rb +131 -0
- data/lib/git/pkgs/commands/history.rb +127 -0
- data/lib/git/pkgs/commands/hooks.rb +131 -0
- data/lib/git/pkgs/commands/info.rb +109 -0
- data/lib/git/pkgs/commands/init.rb +267 -0
- data/lib/git/pkgs/commands/list.rb +159 -0
- data/lib/git/pkgs/commands/outdated.rb +122 -0
- data/lib/git/pkgs/commands/search.rb +152 -0
- data/lib/git/pkgs/commands/stats.rb +157 -0
- data/lib/git/pkgs/commands/tree.rb +124 -0
- data/lib/git/pkgs/commands/update.rb +147 -0
- data/lib/git/pkgs/commands/why.rb +82 -0
- data/lib/git/pkgs/database.rb +143 -0
- data/lib/git/pkgs/models/branch.rb +18 -0
- data/lib/git/pkgs/models/branch_commit.rb +14 -0
- data/lib/git/pkgs/models/commit.rb +29 -0
- data/lib/git/pkgs/models/dependency_change.rb +21 -0
- data/lib/git/pkgs/models/dependency_snapshot.rb +27 -0
- data/lib/git/pkgs/models/manifest.rb +21 -0
- data/lib/git/pkgs/repository.rb +125 -0
- data/lib/git/pkgs/version.rb +7 -0
- data/lib/git/pkgs.rb +37 -0
- metadata +138 -0
data/README.md
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
# git-pkgs
|
|
2
|
+
|
|
3
|
+
A git subcommand for tracking package dependencies across git history. Analyzes your repository to show when dependencies were added, modified, or removed, who made those changes, and why.
|
|
4
|
+
|
|
5
|
+
## Why this exists
|
|
6
|
+
|
|
7
|
+
Your lockfile shows what dependencies you have. It doesn't show how you got here. `git log Gemfile.lock` is useless noise.
|
|
8
|
+
|
|
9
|
+
git-pkgs indexes your dependency history into a queryable database. You can ask: when did we add this? who added it? what changed between these two releases? has anyone touched this in the last year?
|
|
10
|
+
|
|
11
|
+
It works across ecosystems. Gemfile, package.json, Dockerfile, GitHub Actions workflows - one unified history instead of separate tools per ecosystem.
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
gem install git-pkgs
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Quick start
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
cd your-repo
|
|
23
|
+
git pkgs init # analyze history (one-time, ~300 commits/sec)
|
|
24
|
+
git pkgs stats # see overview
|
|
25
|
+
git pkgs blame # who added each dependency
|
|
26
|
+
git pkgs history rails # track a package over time
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Commands
|
|
30
|
+
|
|
31
|
+
### Initialize the database
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
git pkgs init
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
Walks through git history and builds a SQLite database of dependency changes, stored in `.git/pkgs.sqlite3`.
|
|
38
|
+
|
|
39
|
+
Options:
|
|
40
|
+
- `--branch=NAME` - analyze a specific branch (default: default branch)
|
|
41
|
+
- `--since=SHA` - start analysis from a specific commit
|
|
42
|
+
- `--force` - rebuild the database from scratch
|
|
43
|
+
- `--hooks` - install git hooks for auto-updating
|
|
44
|
+
|
|
45
|
+
Example output:
|
|
46
|
+
```
|
|
47
|
+
Analyzing branch: main
|
|
48
|
+
Processing commit 5191/5191...
|
|
49
|
+
Done!
|
|
50
|
+
Analyzed 5191 commits
|
|
51
|
+
Found 2531 commits with dependency changes
|
|
52
|
+
Stored 28239 snapshots (every 20 changes)
|
|
53
|
+
Blob cache: 3141 unique blobs, 2349 had cache hits
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Database info
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
git pkgs info
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Shows database size and row counts:
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
Database Info
|
|
66
|
+
========================================
|
|
67
|
+
|
|
68
|
+
Location: /path/to/repo/.git/pkgs.sqlite3
|
|
69
|
+
Size: 8.3 MB
|
|
70
|
+
|
|
71
|
+
Row Counts
|
|
72
|
+
----------------------------------------
|
|
73
|
+
Branches 1
|
|
74
|
+
Commits 3988
|
|
75
|
+
Branch-Commits 3988
|
|
76
|
+
Manifests 9
|
|
77
|
+
Dependency Changes 4732
|
|
78
|
+
Dependency Snapshots 28239
|
|
79
|
+
----------------------------------
|
|
80
|
+
Total 40957
|
|
81
|
+
|
|
82
|
+
Snapshot Coverage
|
|
83
|
+
----------------------------------------
|
|
84
|
+
Commits with dependency changes: 2531
|
|
85
|
+
Commits with snapshots: 127
|
|
86
|
+
Coverage: 5.0% (1 snapshot per ~20 changes)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### List dependencies
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
git pkgs list
|
|
93
|
+
git pkgs list --commit=abc123
|
|
94
|
+
git pkgs list --ecosystem=rubygems
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Example output:
|
|
98
|
+
```
|
|
99
|
+
Gemfile (rubygems):
|
|
100
|
+
bootsnap >= 0 [runtime]
|
|
101
|
+
bootstrap = 4.6.2 [runtime]
|
|
102
|
+
bugsnag >= 0 [runtime]
|
|
103
|
+
rails = 8.0.1 [runtime]
|
|
104
|
+
sidekiq >= 0 [runtime]
|
|
105
|
+
...
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### View package history
|
|
109
|
+
|
|
110
|
+
```bash
|
|
111
|
+
git pkgs history rails
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Shows when the package was added, version changes, and removal:
|
|
115
|
+
|
|
116
|
+
```
|
|
117
|
+
History for rails:
|
|
118
|
+
|
|
119
|
+
2016-12-16 Added = 5.0.0.1
|
|
120
|
+
Commit: e323669 Hello World
|
|
121
|
+
Author: Andrew Nesbitt <andrew@example.com>
|
|
122
|
+
Manifest: Gemfile
|
|
123
|
+
|
|
124
|
+
2016-12-21 Updated = 5.0.0.1 -> = 5.0.1
|
|
125
|
+
Commit: 0c70eee Update rails to 5.0.1
|
|
126
|
+
Author: Andrew Nesbitt <andrew@example.com>
|
|
127
|
+
Manifest: Gemfile
|
|
128
|
+
|
|
129
|
+
2024-11-21 Updated = 7.2.2 -> = 8.0.0
|
|
130
|
+
Commit: 86a07f4 Upgrade to Rails 8
|
|
131
|
+
Author: Andrew Nesbitt <andrew@example.com>
|
|
132
|
+
Manifest: Gemfile
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Blame
|
|
136
|
+
|
|
137
|
+
Show who added each current dependency:
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
git pkgs blame
|
|
141
|
+
git pkgs blame --ecosystem=rubygems
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
Example output:
|
|
145
|
+
```
|
|
146
|
+
Gemfile (rubygems):
|
|
147
|
+
bootsnap Andrew Nesbitt 2018-04-10 7da4369
|
|
148
|
+
bootstrap Andrew Nesbitt 2018-08-02 0b39dc0
|
|
149
|
+
bugsnag Andrew Nesbitt 2016-12-23 a87f1bf
|
|
150
|
+
factory_bot Lewis Buckley 2017-12-25 f6cceb0
|
|
151
|
+
faraday Andrew Nesbitt 2021-11-25 98de229
|
|
152
|
+
jwt Andrew Nesbitt 2018-09-10 a39f0ea
|
|
153
|
+
octokit Andrew Nesbitt 2016-12-16 e323669
|
|
154
|
+
omniauth-rails_csrf_protection dependabot[bot] 2021-11-02 02474ab
|
|
155
|
+
rails Andrew Nesbitt 2016-12-16 e323669
|
|
156
|
+
sidekiq Mark Tareshawty 2018-02-19 29a1c70
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Show statistics
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
git pkgs stats
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Example output:
|
|
166
|
+
```
|
|
167
|
+
Dependency Statistics
|
|
168
|
+
========================================
|
|
169
|
+
|
|
170
|
+
Branch: main
|
|
171
|
+
Commits analyzed: 3988
|
|
172
|
+
Commits with changes: 2531
|
|
173
|
+
|
|
174
|
+
Current Dependencies
|
|
175
|
+
--------------------
|
|
176
|
+
Total: 250
|
|
177
|
+
rubygems: 232
|
|
178
|
+
actions: 14
|
|
179
|
+
docker: 4
|
|
180
|
+
|
|
181
|
+
Dependency Changes
|
|
182
|
+
--------------------
|
|
183
|
+
Total changes: 4732
|
|
184
|
+
added: 391
|
|
185
|
+
modified: 4200
|
|
186
|
+
removed: 141
|
|
187
|
+
|
|
188
|
+
Most Changed Dependencies
|
|
189
|
+
-------------------------
|
|
190
|
+
rails (rubygems): 135 changes
|
|
191
|
+
pagy (rubygems): 116 changes
|
|
192
|
+
nokogiri (rubygems): 85 changes
|
|
193
|
+
puma (rubygems): 73 changes
|
|
194
|
+
|
|
195
|
+
Manifest Files
|
|
196
|
+
--------------
|
|
197
|
+
Gemfile (rubygems): 294 changes
|
|
198
|
+
Gemfile.lock (rubygems): 4269 changes
|
|
199
|
+
.github/workflows/ci.yml (actions): 36 changes
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### Explain why a dependency exists
|
|
203
|
+
|
|
204
|
+
```bash
|
|
205
|
+
git pkgs why rails
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
Shows the commit that added the dependency with author and message.
|
|
209
|
+
|
|
210
|
+
### Dependency tree
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
git pkgs tree
|
|
214
|
+
git pkgs tree --ecosystem=rubygems
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
Shows dependencies grouped by type (runtime, development, etc).
|
|
218
|
+
|
|
219
|
+
### Diff between commits
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
git pkgs diff --from=abc123 --to=def456
|
|
223
|
+
git pkgs diff --from=HEAD~10
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
Shows added, removed, and modified packages with version info.
|
|
227
|
+
|
|
228
|
+
### Keep database updated
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
git pkgs update
|
|
232
|
+
```
|
|
233
|
+
|
|
234
|
+
Or install git hooks to update automatically after commits and merges:
|
|
235
|
+
|
|
236
|
+
```bash
|
|
237
|
+
git pkgs hooks --install
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## Performance
|
|
241
|
+
|
|
242
|
+
Benchmarked on a MacBook Pro analyzing [octobox](https://github.com/octobox/octobox) (5191 commits, 8 years of history): init takes about 18 seconds at roughly 300 commits/sec, producing an 8.3 MB database. About half the commits (2531) had dependency changes.
|
|
243
|
+
|
|
244
|
+
Optimizations:
|
|
245
|
+
- Bulk inserts with transaction batching (100 commits per transaction)
|
|
246
|
+
- Blob SHA caching (75% cache hit rate for repeated manifest content)
|
|
247
|
+
- Deferred index creation during bulk load
|
|
248
|
+
- Sparse snapshots (every 20 dependency-changing commits) for storage efficiency
|
|
249
|
+
- SQLite WAL mode for write performance
|
|
250
|
+
|
|
251
|
+
## Supported ecosystems
|
|
252
|
+
|
|
253
|
+
git-pkgs uses [ecosystems-bibliothecary](https://github.com/ecosyste-ms/bibliothecary) for parsing, supporting:
|
|
254
|
+
|
|
255
|
+
Actions, Anaconda, BentoML, Bower, Cargo, CocoaPods, Cog, CPAN, CRAN, CycloneDX, Docker, Dub, DVC, Elm, Go, Haxelib, Homebrew, Julia, Maven, Meteor, MLflow, npm, NuGet, Ollama, Packagist, Pub, PyPI, RubyGems, Shards, SPDX, Vcpkg
|
|
256
|
+
|
|
257
|
+
## How it works
|
|
258
|
+
|
|
259
|
+
git-pkgs walks your git history, extracts dependency files at each commit, and diffs them to detect changes. Results are stored in a SQLite database for fast querying.
|
|
260
|
+
|
|
261
|
+
The database schema stores:
|
|
262
|
+
- Commits with dependency changes
|
|
263
|
+
- Dependency changes (added/modified/removed) with before/after versions
|
|
264
|
+
- Periodic snapshots of full dependency state for efficient point-in-time queries
|
|
265
|
+
|
|
266
|
+
See [docs/schema.md](docs/schema.md) for full schema documentation.
|
|
267
|
+
|
|
268
|
+
## Development
|
|
269
|
+
|
|
270
|
+
```bash
|
|
271
|
+
git clone https://github.com/andrew/git-pkgs
|
|
272
|
+
cd git-pkgs
|
|
273
|
+
bin/setup
|
|
274
|
+
rake test
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
## License
|
|
278
|
+
|
|
279
|
+
AGPL-3.0
|
data/Rakefile
ADDED
data/benchmark_bulk.rb
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "git/pkgs"
|
|
6
|
+
require "benchmark"
|
|
7
|
+
|
|
8
|
+
repo_path = ARGV[0] || "/Users/andrew/code/octobox"
|
|
9
|
+
sample_size = (ARGV[1] || 500).to_i
|
|
10
|
+
|
|
11
|
+
# In-memory with WAL mode equivalent (journal_mode=memory for in-memory DB)
|
|
12
|
+
Git::Pkgs::Database.connect_memory
|
|
13
|
+
ActiveRecord::Base.connection.execute("PRAGMA synchronous = OFF")
|
|
14
|
+
ActiveRecord::Base.connection.execute("PRAGMA journal_mode = MEMORY")
|
|
15
|
+
|
|
16
|
+
repo = Git::Pkgs::Repository.new(repo_path)
|
|
17
|
+
analyzer = Git::Pkgs::Analyzer.new(repo)
|
|
18
|
+
|
|
19
|
+
walker = repo.walk(repo.default_branch)
|
|
20
|
+
commits = walker.take(sample_size)
|
|
21
|
+
|
|
22
|
+
puts "Bulk insert benchmark: #{commits.size} commits"
|
|
23
|
+
puts "=" * 60
|
|
24
|
+
|
|
25
|
+
# Pre-collect all data
|
|
26
|
+
all_commits = []
|
|
27
|
+
all_branch_commits = []
|
|
28
|
+
all_changes = []
|
|
29
|
+
all_snapshots = []
|
|
30
|
+
|
|
31
|
+
snapshot = {}
|
|
32
|
+
branch = Git::Pkgs::Models::Branch.find_or_create("main")
|
|
33
|
+
position = 0
|
|
34
|
+
manifests_cache = {}
|
|
35
|
+
|
|
36
|
+
now = Time.now
|
|
37
|
+
|
|
38
|
+
collect_time = Benchmark.realtime do
|
|
39
|
+
commits.each do |rugged_commit|
|
|
40
|
+
next if repo.merge_commit?(rugged_commit)
|
|
41
|
+
position += 1
|
|
42
|
+
|
|
43
|
+
result = analyzer.analyze_commit(rugged_commit, snapshot)
|
|
44
|
+
|
|
45
|
+
all_commits << {
|
|
46
|
+
sha: rugged_commit.oid,
|
|
47
|
+
message: rugged_commit.message,
|
|
48
|
+
author_name: rugged_commit.author[:name],
|
|
49
|
+
author_email: rugged_commit.author[:email],
|
|
50
|
+
committed_at: rugged_commit.time,
|
|
51
|
+
has_dependency_changes: result && result[:changes].any?,
|
|
52
|
+
created_at: now,
|
|
53
|
+
updated_at: now
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
all_branch_commits << {
|
|
57
|
+
branch_id: branch.id,
|
|
58
|
+
commit_position: position, # placeholder, need to resolve after commit insert
|
|
59
|
+
commit_sha: rugged_commit.oid
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
next unless result && result[:changes].any?
|
|
63
|
+
|
|
64
|
+
result[:changes].each do |change|
|
|
65
|
+
manifest_key = change[:manifest_path]
|
|
66
|
+
unless manifests_cache[manifest_key]
|
|
67
|
+
manifests_cache[manifest_key] = Git::Pkgs::Models::Manifest.find_or_create(
|
|
68
|
+
path: change[:manifest_path],
|
|
69
|
+
platform: change[:platform],
|
|
70
|
+
kind: change[:kind]
|
|
71
|
+
)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
all_changes << {
|
|
75
|
+
commit_sha: rugged_commit.oid,
|
|
76
|
+
manifest_path: manifest_key,
|
|
77
|
+
name: change[:name],
|
|
78
|
+
platform: change[:platform],
|
|
79
|
+
change_type: change[:change_type],
|
|
80
|
+
requirement: change[:requirement],
|
|
81
|
+
previous_requirement: change[:previous_requirement],
|
|
82
|
+
dependency_type: change[:dependency_type],
|
|
83
|
+
created_at: now,
|
|
84
|
+
updated_at: now
|
|
85
|
+
}
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
snapshot = result[:snapshot]
|
|
89
|
+
|
|
90
|
+
snapshot.each do |(manifest_path, name), dep_info|
|
|
91
|
+
all_snapshots << {
|
|
92
|
+
commit_sha: rugged_commit.oid,
|
|
93
|
+
manifest_path: manifest_path,
|
|
94
|
+
name: name,
|
|
95
|
+
platform: dep_info[:platform],
|
|
96
|
+
requirement: dep_info[:requirement],
|
|
97
|
+
dependency_type: dep_info[:dependency_type],
|
|
98
|
+
created_at: now,
|
|
99
|
+
updated_at: now
|
|
100
|
+
}
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
puts "Collection time: #{collect_time.round(3)}s"
|
|
106
|
+
puts "Data collected:"
|
|
107
|
+
puts " Commits: #{all_commits.size}"
|
|
108
|
+
puts " Changes: #{all_changes.size}"
|
|
109
|
+
puts " Snapshots: #{all_snapshots.size}"
|
|
110
|
+
|
|
111
|
+
# Bulk insert
|
|
112
|
+
insert_time = Benchmark.realtime do
|
|
113
|
+
# Insert commits
|
|
114
|
+
Git::Pkgs::Models::Commit.insert_all(all_commits) if all_commits.any?
|
|
115
|
+
|
|
116
|
+
# Build SHA -> ID map
|
|
117
|
+
commit_ids = Git::Pkgs::Models::Commit.where(sha: all_commits.map { |c| c[:sha] }).pluck(:sha, :id).to_h
|
|
118
|
+
manifest_ids = Git::Pkgs::Models::Manifest.pluck(:path, :id).to_h
|
|
119
|
+
|
|
120
|
+
# Insert branch_commits with resolved IDs
|
|
121
|
+
branch_commit_records = all_branch_commits.map do |bc|
|
|
122
|
+
{
|
|
123
|
+
branch_id: bc[:branch_id],
|
|
124
|
+
commit_id: commit_ids[bc[:commit_sha]],
|
|
125
|
+
position: bc[:commit_position]
|
|
126
|
+
}
|
|
127
|
+
end
|
|
128
|
+
Git::Pkgs::Models::BranchCommit.insert_all(branch_commit_records) if branch_commit_records.any?
|
|
129
|
+
|
|
130
|
+
# Insert changes with resolved IDs
|
|
131
|
+
change_records = all_changes.map do |c|
|
|
132
|
+
{
|
|
133
|
+
commit_id: commit_ids[c[:commit_sha]],
|
|
134
|
+
manifest_id: manifest_ids[c[:manifest_path]],
|
|
135
|
+
name: c[:name],
|
|
136
|
+
platform: c[:platform],
|
|
137
|
+
change_type: c[:change_type],
|
|
138
|
+
requirement: c[:requirement],
|
|
139
|
+
previous_requirement: c[:previous_requirement],
|
|
140
|
+
dependency_type: c[:dependency_type],
|
|
141
|
+
created_at: c[:created_at],
|
|
142
|
+
updated_at: c[:updated_at]
|
|
143
|
+
}
|
|
144
|
+
end
|
|
145
|
+
Git::Pkgs::Models::DependencyChange.insert_all(change_records) if change_records.any?
|
|
146
|
+
|
|
147
|
+
# Insert snapshots with resolved IDs
|
|
148
|
+
snapshot_records = all_snapshots.map do |s|
|
|
149
|
+
{
|
|
150
|
+
commit_id: commit_ids[s[:commit_sha]],
|
|
151
|
+
manifest_id: manifest_ids[s[:manifest_path]],
|
|
152
|
+
name: s[:name],
|
|
153
|
+
platform: s[:platform],
|
|
154
|
+
requirement: s[:requirement],
|
|
155
|
+
dependency_type: s[:dependency_type],
|
|
156
|
+
created_at: s[:created_at],
|
|
157
|
+
updated_at: s[:updated_at]
|
|
158
|
+
}
|
|
159
|
+
end
|
|
160
|
+
Git::Pkgs::Models::DependencySnapshot.insert_all(snapshot_records) if snapshot_records.any?
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
puts "Insert time: #{insert_time.round(3)}s"
|
|
164
|
+
|
|
165
|
+
total = collect_time + insert_time
|
|
166
|
+
puts "\nTotal: #{total.round(3)}s"
|
|
167
|
+
puts "Throughput: #{(all_commits.size / total).round(1)} commits/sec"
|
data/benchmark_db.rb
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "git/pkgs"
|
|
6
|
+
require "benchmark"
|
|
7
|
+
|
|
8
|
+
repo_path = ARGV[0] || "/Users/andrew/code/octobox"
|
|
9
|
+
sample_size = (ARGV[1] || 200).to_i
|
|
10
|
+
|
|
11
|
+
Git::Pkgs::Database.connect_memory
|
|
12
|
+
|
|
13
|
+
repo = Git::Pkgs::Repository.new(repo_path)
|
|
14
|
+
analyzer = Git::Pkgs::Analyzer.new(repo)
|
|
15
|
+
|
|
16
|
+
walker = repo.walk(repo.default_branch)
|
|
17
|
+
commits = walker.take(sample_size)
|
|
18
|
+
|
|
19
|
+
puts "DB operation breakdown: #{commits.size} commits"
|
|
20
|
+
puts "=" * 60
|
|
21
|
+
|
|
22
|
+
timings = {
|
|
23
|
+
commit_create: 0.0,
|
|
24
|
+
branch_commit_create: 0.0,
|
|
25
|
+
commit_update: 0.0,
|
|
26
|
+
manifest_find_create: 0.0,
|
|
27
|
+
change_create: 0.0,
|
|
28
|
+
snapshot_create: 0.0
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
counts = {
|
|
32
|
+
commits: 0,
|
|
33
|
+
branch_commits: 0,
|
|
34
|
+
changes: 0,
|
|
35
|
+
snapshots: 0
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
snapshot = {}
|
|
39
|
+
branch = Git::Pkgs::Models::Branch.find_or_create("main")
|
|
40
|
+
position = 0
|
|
41
|
+
|
|
42
|
+
commits.each do |rugged_commit|
|
|
43
|
+
next if repo.merge_commit?(rugged_commit)
|
|
44
|
+
position += 1
|
|
45
|
+
|
|
46
|
+
result = analyzer.analyze_commit(rugged_commit, snapshot)
|
|
47
|
+
|
|
48
|
+
commit = nil
|
|
49
|
+
timings[:commit_create] += Benchmark.realtime do
|
|
50
|
+
commit = Git::Pkgs::Models::Commit.find_or_create_from_rugged(rugged_commit)
|
|
51
|
+
end
|
|
52
|
+
counts[:commits] += 1
|
|
53
|
+
|
|
54
|
+
timings[:branch_commit_create] += Benchmark.realtime do
|
|
55
|
+
Git::Pkgs::Models::BranchCommit.find_or_create_by(
|
|
56
|
+
branch: branch,
|
|
57
|
+
commit: commit,
|
|
58
|
+
position: position
|
|
59
|
+
)
|
|
60
|
+
end
|
|
61
|
+
counts[:branch_commits] += 1
|
|
62
|
+
|
|
63
|
+
next unless result && result[:changes].any?
|
|
64
|
+
|
|
65
|
+
timings[:commit_update] += Benchmark.realtime do
|
|
66
|
+
commit.update(has_dependency_changes: true)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
result[:changes].each do |change|
|
|
70
|
+
manifest = nil
|
|
71
|
+
timings[:manifest_find_create] += Benchmark.realtime do
|
|
72
|
+
manifest = Git::Pkgs::Models::Manifest.find_or_create(
|
|
73
|
+
path: change[:manifest_path],
|
|
74
|
+
platform: change[:platform],
|
|
75
|
+
kind: change[:kind]
|
|
76
|
+
)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
timings[:change_create] += Benchmark.realtime do
|
|
80
|
+
Git::Pkgs::Models::DependencyChange.create!(
|
|
81
|
+
commit: commit,
|
|
82
|
+
manifest: manifest,
|
|
83
|
+
name: change[:name],
|
|
84
|
+
platform: change[:platform],
|
|
85
|
+
change_type: change[:change_type],
|
|
86
|
+
requirement: change[:requirement],
|
|
87
|
+
previous_requirement: change[:previous_requirement],
|
|
88
|
+
dependency_type: change[:dependency_type]
|
|
89
|
+
)
|
|
90
|
+
end
|
|
91
|
+
counts[:changes] += 1
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
snapshot = result[:snapshot]
|
|
95
|
+
|
|
96
|
+
snapshot.each do |(manifest_path, name), dep_info|
|
|
97
|
+
timings[:snapshot_create] += Benchmark.realtime do
|
|
98
|
+
manifest = Git::Pkgs::Models::Manifest.find_by(path: manifest_path)
|
|
99
|
+
Git::Pkgs::Models::DependencySnapshot.find_or_create_by(
|
|
100
|
+
commit: commit,
|
|
101
|
+
manifest: manifest,
|
|
102
|
+
name: name
|
|
103
|
+
) do |s|
|
|
104
|
+
s.platform = dep_info[:platform]
|
|
105
|
+
s.requirement = dep_info[:requirement]
|
|
106
|
+
s.dependency_type = dep_info[:dependency_type]
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
counts[:snapshots] += 1
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
total = timings.values.sum
|
|
114
|
+
|
|
115
|
+
puts "\nDB operation breakdown:"
|
|
116
|
+
puts "-" * 60
|
|
117
|
+
timings.each do |op, time|
|
|
118
|
+
pct = total > 0 ? (time / total * 100).round(1) : 0
|
|
119
|
+
puts " #{op.to_s.ljust(22)} #{time.round(3).to_s.rjust(8)}s (#{pct}%)"
|
|
120
|
+
end
|
|
121
|
+
puts "-" * 60
|
|
122
|
+
puts " #{'Total'.ljust(22)} #{total.round(3).to_s.rjust(8)}s"
|
|
123
|
+
|
|
124
|
+
puts "\nRecord counts:"
|
|
125
|
+
puts " Commits: #{counts[:commits]}"
|
|
126
|
+
puts " BranchCommits: #{counts[:branch_commits]}"
|
|
127
|
+
puts " Changes: #{counts[:changes]}"
|
|
128
|
+
puts " Snapshots: #{counts[:snapshots]}"
|
|
129
|
+
|
|
130
|
+
puts "\nPer-operation averages:"
|
|
131
|
+
puts " commit_create: #{(timings[:commit_create] / counts[:commits] * 1000).round(3)}ms"
|
|
132
|
+
puts " branch_commit_create: #{(timings[:branch_commit_create] / counts[:branch_commits] * 1000).round(3)}ms"
|
|
133
|
+
if counts[:changes] > 0
|
|
134
|
+
puts " change_create: #{(timings[:change_create] / counts[:changes] * 1000).round(3)}ms"
|
|
135
|
+
end
|
|
136
|
+
if counts[:snapshots] > 0
|
|
137
|
+
puts " snapshot_create: #{(timings[:snapshot_create] / counts[:snapshots] * 1000).round(3)}ms"
|
|
138
|
+
end
|