git-pkgs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +10 -0
- data/LICENSE +661 -0
- data/README.md +279 -0
- data/Rakefile +8 -0
- data/benchmark_bulk.rb +167 -0
- data/benchmark_db.rb +138 -0
- data/benchmark_detailed.rb +151 -0
- data/benchmark_full.rb +131 -0
- data/docs/schema.md +129 -0
- data/exe/git-pkgs +6 -0
- data/lib/git/pkgs/analyzer.rb +270 -0
- data/lib/git/pkgs/cli.rb +73 -0
- data/lib/git/pkgs/commands/blame.rb +142 -0
- data/lib/git/pkgs/commands/branch.rb +337 -0
- data/lib/git/pkgs/commands/diff.rb +131 -0
- data/lib/git/pkgs/commands/history.rb +127 -0
- data/lib/git/pkgs/commands/hooks.rb +131 -0
- data/lib/git/pkgs/commands/info.rb +109 -0
- data/lib/git/pkgs/commands/init.rb +267 -0
- data/lib/git/pkgs/commands/list.rb +159 -0
- data/lib/git/pkgs/commands/outdated.rb +122 -0
- data/lib/git/pkgs/commands/search.rb +152 -0
- data/lib/git/pkgs/commands/stats.rb +157 -0
- data/lib/git/pkgs/commands/tree.rb +124 -0
- data/lib/git/pkgs/commands/update.rb +147 -0
- data/lib/git/pkgs/commands/why.rb +82 -0
- data/lib/git/pkgs/database.rb +143 -0
- data/lib/git/pkgs/models/branch.rb +18 -0
- data/lib/git/pkgs/models/branch_commit.rb +14 -0
- data/lib/git/pkgs/models/commit.rb +29 -0
- data/lib/git/pkgs/models/dependency_change.rb +21 -0
- data/lib/git/pkgs/models/dependency_snapshot.rb +27 -0
- data/lib/git/pkgs/models/manifest.rb +21 -0
- data/lib/git/pkgs/repository.rb +125 -0
- data/lib/git/pkgs/version.rb +7 -0
- data/lib/git/pkgs.rb +37 -0
- metadata +138 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "git/pkgs"
|
|
6
|
+
require "benchmark"
|
|
7
|
+
|
|
8
|
+
repo_path = ARGV[0] || "/Users/andrew/code/octobox"
|
|
9
|
+
sample_size = (ARGV[1] || 500).to_i
|
|
10
|
+
|
|
11
|
+
repo = Git::Pkgs::Repository.new(repo_path)
|
|
12
|
+
analyzer = Git::Pkgs::Analyzer.new(repo)
|
|
13
|
+
|
|
14
|
+
walker = repo.walk(repo.default_branch)
|
|
15
|
+
commits = walker.take(sample_size)
|
|
16
|
+
|
|
17
|
+
puts "Benchmarking #{commits.size} commits from #{repo_path}"
|
|
18
|
+
puts "=" * 60
|
|
19
|
+
|
|
20
|
+
timings = {
|
|
21
|
+
walk_iteration: 0.0,
|
|
22
|
+
blob_paths: 0.0,
|
|
23
|
+
regex_check: 0.0,
|
|
24
|
+
identify_manifests: 0.0,
|
|
25
|
+
parse_manifests: 0.0,
|
|
26
|
+
db_operations: 0.0
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
counts = {
|
|
30
|
+
total: 0,
|
|
31
|
+
merge_commits: 0,
|
|
32
|
+
regex_passed: 0,
|
|
33
|
+
identify_passed: 0,
|
|
34
|
+
has_changes: 0,
|
|
35
|
+
paths_by_commit: []
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
platform_times = Hash.new(0.0)
|
|
39
|
+
platform_counts = Hash.new(0)
|
|
40
|
+
|
|
41
|
+
commits.each do |rugged_commit|
|
|
42
|
+
counts[:total] += 1
|
|
43
|
+
|
|
44
|
+
if repo.merge_commit?(rugged_commit)
|
|
45
|
+
counts[:merge_commits] += 1
|
|
46
|
+
next
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Phase 1: Extract diff/file paths
|
|
50
|
+
blob_paths = nil
|
|
51
|
+
timings[:blob_paths] += Benchmark.realtime do
|
|
52
|
+
blob_paths = repo.blob_paths(rugged_commit)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
all_paths = blob_paths.map { |p| p[:path] }
|
|
56
|
+
counts[:paths_by_commit] << all_paths.size
|
|
57
|
+
|
|
58
|
+
# Phase 2: Quick regex check
|
|
59
|
+
regex_match = nil
|
|
60
|
+
timings[:regex_check] += Benchmark.realtime do
|
|
61
|
+
regex_match = analyzer.might_have_manifests?(all_paths)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
next unless regex_match
|
|
65
|
+
counts[:regex_passed] += 1
|
|
66
|
+
|
|
67
|
+
# Phase 3: Bibliothecary identify_manifests
|
|
68
|
+
added_paths = blob_paths.select { |p| p[:status] == :added }.map { |p| p[:path] }
|
|
69
|
+
modified_paths = blob_paths.select { |p| p[:status] == :modified }.map { |p| p[:path] }
|
|
70
|
+
removed_paths = blob_paths.select { |p| p[:status] == :deleted }.map { |p| p[:path] }
|
|
71
|
+
|
|
72
|
+
added_manifests = modified_manifests = removed_manifests = nil
|
|
73
|
+
timings[:identify_manifests] += Benchmark.realtime do
|
|
74
|
+
added_manifests = Bibliothecary.identify_manifests(added_paths)
|
|
75
|
+
modified_manifests = Bibliothecary.identify_manifests(modified_paths)
|
|
76
|
+
removed_manifests = Bibliothecary.identify_manifests(removed_paths)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
all_manifests = added_manifests + modified_manifests + removed_manifests
|
|
80
|
+
next if all_manifests.empty?
|
|
81
|
+
counts[:identify_passed] += 1
|
|
82
|
+
|
|
83
|
+
# Phase 4: Parse manifests (with platform tracking)
|
|
84
|
+
timings[:parse_manifests] += Benchmark.realtime do
|
|
85
|
+
all_manifests.each do |manifest_path|
|
|
86
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
87
|
+
|
|
88
|
+
blob_oid = repo.blob_oid_at_commit(rugged_commit, manifest_path)
|
|
89
|
+
if blob_oid
|
|
90
|
+
content = repo.blob_content(blob_oid)
|
|
91
|
+
if content
|
|
92
|
+
result = Bibliothecary.analyse_file(manifest_path, content).first
|
|
93
|
+
if result
|
|
94
|
+
platform_counts[result[:platform]] += 1
|
|
95
|
+
platform_times[result[:platform]] += Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
counts[:has_changes] += 1
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
total_time = timings.values.sum
|
|
106
|
+
|
|
107
|
+
puts "\nTiming breakdown:"
|
|
108
|
+
puts "-" * 60
|
|
109
|
+
timings.each do |phase, time|
|
|
110
|
+
pct = (time / total_time * 100).round(1)
|
|
111
|
+
puts " #{phase.to_s.ljust(20)} #{time.round(3).to_s.rjust(8)}s (#{pct}%)"
|
|
112
|
+
end
|
|
113
|
+
puts "-" * 60
|
|
114
|
+
puts " #{'Total'.ljust(20)} #{total_time.round(3).to_s.rjust(8)}s"
|
|
115
|
+
|
|
116
|
+
puts "\nCommit counts:"
|
|
117
|
+
puts "-" * 60
|
|
118
|
+
puts " Total commits: #{counts[:total]}"
|
|
119
|
+
puts " Merge commits: #{counts[:merge_commits]} (skipped)"
|
|
120
|
+
puts " Regex passed: #{counts[:regex_passed]} (#{(counts[:regex_passed].to_f / (counts[:total] - counts[:merge_commits]) * 100).round(1)}%)"
|
|
121
|
+
puts " Identify passed: #{counts[:identify_passed]}"
|
|
122
|
+
puts " Has actual changes: #{counts[:has_changes]}"
|
|
123
|
+
|
|
124
|
+
if counts[:paths_by_commit].any?
|
|
125
|
+
avg_paths = counts[:paths_by_commit].sum.to_f / counts[:paths_by_commit].size
|
|
126
|
+
max_paths = counts[:paths_by_commit].max
|
|
127
|
+
puts "\nPaths per commit:"
|
|
128
|
+
puts " Average: #{avg_paths.round(1)}"
|
|
129
|
+
puts " Max: #{max_paths}"
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
if platform_times.any?
|
|
133
|
+
puts "\nTime by platform:"
|
|
134
|
+
puts "-" * 60
|
|
135
|
+
platform_times.sort_by { |_, v| -v }.each do |platform, time|
|
|
136
|
+
count = platform_counts[platform]
|
|
137
|
+
avg = (time / count * 1000).round(2)
|
|
138
|
+
puts " #{platform.ljust(20)} #{time.round(3).to_s.rjust(8)}s (#{count} files, #{avg}ms avg)"
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
puts "\nPer-commit averages:"
|
|
143
|
+
non_merge = counts[:total] - counts[:merge_commits]
|
|
144
|
+
puts " blob_paths: #{(timings[:blob_paths] / non_merge * 1000).round(3)}ms"
|
|
145
|
+
puts " regex_check: #{(timings[:regex_check] / non_merge * 1000).round(3)}ms"
|
|
146
|
+
if counts[:regex_passed] > 0
|
|
147
|
+
puts " identify_manifests: #{(timings[:identify_manifests] / counts[:regex_passed] * 1000).round(3)}ms (when regex passes)"
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
commits_per_sec = counts[:total] / total_time
|
|
151
|
+
puts "\nThroughput: #{commits_per_sec.round(1)} commits/sec"
|
data/benchmark_full.rb
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "git/pkgs"
|
|
6
|
+
require "benchmark"
|
|
7
|
+
|
|
8
|
+
repo_path = ARGV[0] || "/Users/andrew/code/octobox"
|
|
9
|
+
sample_size = (ARGV[1] || 500).to_i
|
|
10
|
+
|
|
11
|
+
# Setup in-memory database for fair comparison
|
|
12
|
+
Git::Pkgs::Database.connect_memory
|
|
13
|
+
|
|
14
|
+
repo = Git::Pkgs::Repository.new(repo_path)
|
|
15
|
+
analyzer = Git::Pkgs::Analyzer.new(repo)
|
|
16
|
+
|
|
17
|
+
walker = repo.walk(repo.default_branch)
|
|
18
|
+
commits = walker.take(sample_size)
|
|
19
|
+
|
|
20
|
+
puts "Full pipeline benchmark: #{commits.size} commits"
|
|
21
|
+
puts "=" * 60
|
|
22
|
+
|
|
23
|
+
timings = {
|
|
24
|
+
git_diff: 0.0,
|
|
25
|
+
filtering: 0.0,
|
|
26
|
+
parsing: 0.0,
|
|
27
|
+
db_writes: 0.0
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
snapshot = {}
|
|
31
|
+
branch = Git::Pkgs::Models::Branch.find_or_create("main")
|
|
32
|
+
position = 0
|
|
33
|
+
|
|
34
|
+
commits.each do |rugged_commit|
|
|
35
|
+
next if repo.merge_commit?(rugged_commit)
|
|
36
|
+
position += 1
|
|
37
|
+
|
|
38
|
+
# Git diff extraction
|
|
39
|
+
blob_paths = nil
|
|
40
|
+
timings[:git_diff] += Benchmark.realtime do
|
|
41
|
+
blob_paths = repo.blob_paths(rugged_commit)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
all_paths = blob_paths.map { |p| p[:path] }
|
|
45
|
+
|
|
46
|
+
# Filtering (regex + identify_manifests)
|
|
47
|
+
result = nil
|
|
48
|
+
timings[:filtering] += Benchmark.realtime do
|
|
49
|
+
next unless analyzer.might_have_manifests?(all_paths)
|
|
50
|
+
|
|
51
|
+
added_paths = blob_paths.select { |p| p[:status] == :added }.map { |p| p[:path] }
|
|
52
|
+
modified_paths = blob_paths.select { |p| p[:status] == :modified }.map { |p| p[:path] }
|
|
53
|
+
removed_paths = blob_paths.select { |p| p[:status] == :deleted }.map { |p| p[:path] }
|
|
54
|
+
|
|
55
|
+
added_manifests = Bibliothecary.identify_manifests(added_paths)
|
|
56
|
+
modified_manifests = Bibliothecary.identify_manifests(modified_paths)
|
|
57
|
+
removed_manifests = Bibliothecary.identify_manifests(removed_paths)
|
|
58
|
+
|
|
59
|
+
result = (added_manifests + modified_manifests + removed_manifests).any?
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Full analysis with parsing
|
|
63
|
+
analysis_result = nil
|
|
64
|
+
if result
|
|
65
|
+
timings[:parsing] += Benchmark.realtime do
|
|
66
|
+
analysis_result = analyzer.analyze_commit(rugged_commit, snapshot)
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Database writes
|
|
71
|
+
timings[:db_writes] += Benchmark.realtime do
|
|
72
|
+
commit = Git::Pkgs::Models::Commit.find_or_create_from_rugged(rugged_commit)
|
|
73
|
+
Git::Pkgs::Models::BranchCommit.find_or_create_by(
|
|
74
|
+
branch: branch,
|
|
75
|
+
commit: commit,
|
|
76
|
+
position: position
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
if analysis_result && analysis_result[:changes].any?
|
|
80
|
+
commit.update(has_dependency_changes: true)
|
|
81
|
+
|
|
82
|
+
analysis_result[:changes].each do |change|
|
|
83
|
+
manifest = Git::Pkgs::Models::Manifest.find_or_create(
|
|
84
|
+
path: change[:manifest_path],
|
|
85
|
+
platform: change[:platform],
|
|
86
|
+
kind: change[:kind]
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
Git::Pkgs::Models::DependencyChange.create!(
|
|
90
|
+
commit: commit,
|
|
91
|
+
manifest: manifest,
|
|
92
|
+
name: change[:name],
|
|
93
|
+
platform: change[:platform],
|
|
94
|
+
change_type: change[:change_type],
|
|
95
|
+
requirement: change[:requirement],
|
|
96
|
+
previous_requirement: change[:previous_requirement],
|
|
97
|
+
dependency_type: change[:dependency_type]
|
|
98
|
+
)
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
snapshot = analysis_result[:snapshot]
|
|
102
|
+
|
|
103
|
+
snapshot.each do |(manifest_path, name), dep_info|
|
|
104
|
+
manifest = Git::Pkgs::Models::Manifest.find_by(path: manifest_path)
|
|
105
|
+
Git::Pkgs::Models::DependencySnapshot.find_or_create_by(
|
|
106
|
+
commit: commit,
|
|
107
|
+
manifest: manifest,
|
|
108
|
+
name: name
|
|
109
|
+
) do |s|
|
|
110
|
+
s.platform = dep_info[:platform]
|
|
111
|
+
s.requirement = dep_info[:requirement]
|
|
112
|
+
s.dependency_type = dep_info[:dependency_type]
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
total = timings.values.sum
|
|
120
|
+
|
|
121
|
+
puts "\nFull pipeline breakdown:"
|
|
122
|
+
puts "-" * 60
|
|
123
|
+
timings.each do |phase, time|
|
|
124
|
+
pct = total > 0 ? (time / total * 100).round(1) : 0
|
|
125
|
+
puts " #{phase.to_s.ljust(15)} #{time.round(3).to_s.rjust(8)}s (#{pct}%)"
|
|
126
|
+
end
|
|
127
|
+
puts "-" * 60
|
|
128
|
+
puts " #{'Total'.ljust(15)} #{total.round(3).to_s.rjust(8)}s"
|
|
129
|
+
|
|
130
|
+
puts "\nThroughput: #{(position / total).round(1)} commits/sec"
|
|
131
|
+
puts "Cache stats: #{analyzer.cache_stats}"
|
data/docs/schema.md
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Database Schema
|
|
2
|
+
|
|
3
|
+
git-pkgs stores dependency history in a SQLite database at `.git/pkgs.sqlite3`.
|
|
4
|
+
|
|
5
|
+
## Tables
|
|
6
|
+
|
|
7
|
+
### branches
|
|
8
|
+
|
|
9
|
+
Tracks which branches have been analyzed.
|
|
10
|
+
|
|
11
|
+
| Column | Type | Description |
|
|
12
|
+
|--------|------|-------------|
|
|
13
|
+
| id | integer | Primary key |
|
|
14
|
+
| name | string | Branch name (e.g., "main", "develop") |
|
|
15
|
+
| last_analyzed_sha | string | SHA of last commit analyzed for incremental updates |
|
|
16
|
+
| created_at | datetime | |
|
|
17
|
+
| updated_at | datetime | |
|
|
18
|
+
|
|
19
|
+
Indexes: `name` (unique)
|
|
20
|
+
|
|
21
|
+
### commits
|
|
22
|
+
|
|
23
|
+
Stores commit metadata for commits that have been analyzed.
|
|
24
|
+
|
|
25
|
+
| Column | Type | Description |
|
|
26
|
+
|--------|------|-------------|
|
|
27
|
+
| id | integer | Primary key |
|
|
28
|
+
| sha | string | Full commit SHA |
|
|
29
|
+
| message | text | Commit message |
|
|
30
|
+
| author_name | string | Author name |
|
|
31
|
+
| author_email | string | Author email |
|
|
32
|
+
| committed_at | datetime | Commit timestamp |
|
|
33
|
+
| has_dependency_changes | boolean | True if this commit modified dependencies |
|
|
34
|
+
| created_at | datetime | |
|
|
35
|
+
| updated_at | datetime | |
|
|
36
|
+
|
|
37
|
+
Indexes: `sha` (unique)
|
|
38
|
+
|
|
39
|
+
### branch_commits
|
|
40
|
+
|
|
41
|
+
Join table linking commits to branches. A commit can belong to multiple branches.
|
|
42
|
+
|
|
43
|
+
| Column | Type | Description |
|
|
44
|
+
|--------|------|-------------|
|
|
45
|
+
| id | integer | Primary key |
|
|
46
|
+
| branch_id | integer | Foreign key to branches |
|
|
47
|
+
| commit_id | integer | Foreign key to commits |
|
|
48
|
+
| position | integer | Order of commit in branch history |
|
|
49
|
+
|
|
50
|
+
Indexes: `(branch_id, commit_id)` (unique)
|
|
51
|
+
|
|
52
|
+
### manifests
|
|
53
|
+
|
|
54
|
+
Stores manifest file metadata.
|
|
55
|
+
|
|
56
|
+
| Column | Type | Description |
|
|
57
|
+
|--------|------|-------------|
|
|
58
|
+
| id | integer | Primary key |
|
|
59
|
+
| path | string | File path (e.g., "Gemfile", "package.json") |
|
|
60
|
+
| platform | string | Package manager (e.g., "rubygems", "npm") |
|
|
61
|
+
| kind | string | Manifest type (e.g., "manifest", "lockfile") |
|
|
62
|
+
| created_at | datetime | |
|
|
63
|
+
| updated_at | datetime | |
|
|
64
|
+
|
|
65
|
+
Indexes: `path`
|
|
66
|
+
|
|
67
|
+
### dependency_changes
|
|
68
|
+
|
|
69
|
+
Records each dependency addition, modification, or removal.
|
|
70
|
+
|
|
71
|
+
| Column | Type | Description |
|
|
72
|
+
|--------|------|-------------|
|
|
73
|
+
| id | integer | Primary key |
|
|
74
|
+
| commit_id | integer | Foreign key to commits |
|
|
75
|
+
| manifest_id | integer | Foreign key to manifests |
|
|
76
|
+
| name | string | Package name |
|
|
77
|
+
| platform | string | Package manager |
|
|
78
|
+
| change_type | string | "added", "modified", or "removed" |
|
|
79
|
+
| requirement | string | Version constraint after change |
|
|
80
|
+
| previous_requirement | string | Version constraint before change (for modifications) |
|
|
81
|
+
| dependency_type | string | "runtime", "development", etc. |
|
|
82
|
+
| created_at | datetime | |
|
|
83
|
+
| updated_at | datetime | |
|
|
84
|
+
|
|
85
|
+
Indexes: `name`, `platform`, `(commit_id, name)`
|
|
86
|
+
|
|
87
|
+
### dependency_snapshots
|
|
88
|
+
|
|
89
|
+
Stores the complete dependency state at each commit that has changes. Enables O(1) queries for "what dependencies existed at commit X" without replaying history.
|
|
90
|
+
|
|
91
|
+
| Column | Type | Description |
|
|
92
|
+
|--------|------|-------------|
|
|
93
|
+
| id | integer | Primary key |
|
|
94
|
+
| commit_id | integer | Foreign key to commits |
|
|
95
|
+
| manifest_id | integer | Foreign key to manifests |
|
|
96
|
+
| name | string | Package name |
|
|
97
|
+
| platform | string | Package manager |
|
|
98
|
+
| requirement | string | Version constraint |
|
|
99
|
+
| dependency_type | string | "runtime", "development", etc. |
|
|
100
|
+
| created_at | datetime | |
|
|
101
|
+
| updated_at | datetime | |
|
|
102
|
+
|
|
103
|
+
Indexes: `(commit_id, manifest_id, name)` (unique), `name`, `platform`
|
|
104
|
+
|
|
105
|
+
## Relationships
|
|
106
|
+
|
|
107
|
+
```
|
|
108
|
+
branches ──┬── branch_commits ──┬── commits
|
|
109
|
+
│ │
|
|
110
|
+
│ ├── dependency_changes ──── manifests
|
|
111
|
+
│ │
|
|
112
|
+
│ └── dependency_snapshots ── manifests
|
|
113
|
+
│
|
|
114
|
+
└── last_analyzed_sha (references commits.sha)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Design Notes
|
|
118
|
+
|
|
119
|
+
**Why snapshots?**
|
|
120
|
+
|
|
121
|
+
Without snapshots, answering "what dependencies existed at commit X" requires replaying all changes from the beginning. With snapshots, it's a single query. The tradeoff is storage space, but SQLite handles this well.
|
|
122
|
+
|
|
123
|
+
**Why branch_commits?**
|
|
124
|
+
|
|
125
|
+
Git commits are branch-agnostic. The same commit can appear on multiple branches. This join table tracks which commits belong to which branches and their order, enabling branch-specific queries.
|
|
126
|
+
|
|
127
|
+
**Platform field duplication**
|
|
128
|
+
|
|
129
|
+
The platform appears in both `manifests` and `dependency_changes`/`dependency_snapshots`. This denormalization speeds up queries that filter by platform without requiring joins.
|
data/exe/git-pkgs
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "bibliothecary"
|
|
4
|
+
|
|
5
|
+
module Git
|
|
6
|
+
module Pkgs
|
|
7
|
+
class Analyzer
|
|
8
|
+
attr_reader :repository
|
|
9
|
+
|
|
10
|
+
# Common manifest file patterns for quick pre-filtering
|
|
11
|
+
# This avoids calling Bibliothecary.identify_manifests for commits that clearly don't touch manifests
|
|
12
|
+
QUICK_MANIFEST_PATTERNS = %w[
|
|
13
|
+
Gemfile Gemfile.lock gems.rb gems.locked *.gemspec
|
|
14
|
+
package.json package-lock.json yarn.lock npm-shrinkwrap.json pnpm-lock.yaml bun.lock npm-ls.json
|
|
15
|
+
setup.py req*.txt req*.pip requirements/*.txt requirements/*.pip requirements.frozen
|
|
16
|
+
Pipfile Pipfile.lock pyproject.toml poetry.lock uv.lock pylock.toml
|
|
17
|
+
pip-resolved-dependencies.txt pip-dependency-graph.json
|
|
18
|
+
pom.xml ivy.xml build.gradle build.gradle.kts gradle-dependencies-q.txt
|
|
19
|
+
maven-resolved-dependencies.txt sbt-update-full.txt maven-dependency-tree.txt maven-dependency-tree.dot
|
|
20
|
+
Cargo.toml Cargo.lock
|
|
21
|
+
go.mod go.sum glide.yaml glide.lock Godeps Godeps/Godeps.json
|
|
22
|
+
vendor/manifest vendor/vendor.json Gopkg.toml Gopkg.lock go-resolved-dependencies.json
|
|
23
|
+
composer.json composer.lock
|
|
24
|
+
Podfile Podfile.lock *.podspec *.podspec.json
|
|
25
|
+
packages.config packages.lock.json Project.json Project.lock.json
|
|
26
|
+
*.nuspec paket.lock *.csproj project.assets.json
|
|
27
|
+
cyclonedx.xml cyclonedx.json *.cdx.xml *.cdx.json
|
|
28
|
+
*.spdx *.spdx.json
|
|
29
|
+
bower.json bentofile.yaml
|
|
30
|
+
META.json META.yml
|
|
31
|
+
environment.yml environment.yaml
|
|
32
|
+
cog.yaml versions.json MLmodel DESCRIPTION
|
|
33
|
+
pubspec.yaml pubspec.lock
|
|
34
|
+
dub.json dub.sdl
|
|
35
|
+
REQUIRE
|
|
36
|
+
shard.yml shard.lock
|
|
37
|
+
elm-package.json elm_dependencies.json elm-stuff/exact-dependencies.json
|
|
38
|
+
haxelib.json
|
|
39
|
+
action.yml action.yaml .github/workflows/*.yml .github/workflows/*.yaml
|
|
40
|
+
Dockerfile docker-compose*.yml docker-compose*.yaml
|
|
41
|
+
dvc.yaml vcpkg.json
|
|
42
|
+
Brewfile Brewfile.lock.json
|
|
43
|
+
Modelfile
|
|
44
|
+
].freeze
|
|
45
|
+
|
|
46
|
+
QUICK_MANIFEST_REGEX = Regexp.union(
|
|
47
|
+
QUICK_MANIFEST_PATTERNS.map do |pattern|
|
|
48
|
+
if pattern.include?('*')
|
|
49
|
+
Regexp.new(pattern.gsub('.', '\\.').gsub('*', '.*'))
|
|
50
|
+
else
|
|
51
|
+
/(?:^|\/)#{Regexp.escape(pattern)}$/
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
).freeze
|
|
55
|
+
|
|
56
|
+
def initialize(repository)
|
|
57
|
+
@repository = repository
|
|
58
|
+
@blob_cache = {}
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Quick check if any paths might be manifests (fast regex check)
|
|
62
|
+
def might_have_manifests?(paths)
|
|
63
|
+
paths.any? { |p| p.match?(QUICK_MANIFEST_REGEX) }
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Quick check if a commit touches any manifest files
|
|
67
|
+
def has_manifest_changes?(rugged_commit)
|
|
68
|
+
return false if repository.merge_commit?(rugged_commit)
|
|
69
|
+
|
|
70
|
+
blob_paths = repository.blob_paths(rugged_commit)
|
|
71
|
+
all_paths = blob_paths.map { |p| p[:path] }
|
|
72
|
+
|
|
73
|
+
return false unless might_have_manifests?(all_paths)
|
|
74
|
+
|
|
75
|
+
Bibliothecary.identify_manifests(all_paths).any?
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def analyze_commit(rugged_commit, previous_snapshot = {})
|
|
79
|
+
return nil if repository.merge_commit?(rugged_commit)
|
|
80
|
+
|
|
81
|
+
blob_paths = repository.blob_paths(rugged_commit)
|
|
82
|
+
|
|
83
|
+
added_paths = blob_paths.select { |p| p[:status] == :added }.map { |p| p[:path] }
|
|
84
|
+
modified_paths = blob_paths.select { |p| p[:status] == :modified }.map { |p| p[:path] }
|
|
85
|
+
removed_paths = blob_paths.select { |p| p[:status] == :deleted }.map { |p| p[:path] }
|
|
86
|
+
|
|
87
|
+
all_paths = added_paths + modified_paths + removed_paths
|
|
88
|
+
return nil unless might_have_manifests?(all_paths)
|
|
89
|
+
|
|
90
|
+
added_manifests = Bibliothecary.identify_manifests(added_paths)
|
|
91
|
+
modified_manifests = Bibliothecary.identify_manifests(modified_paths)
|
|
92
|
+
removed_manifests = Bibliothecary.identify_manifests(removed_paths)
|
|
93
|
+
|
|
94
|
+
return nil if added_manifests.empty? && modified_manifests.empty? && removed_manifests.empty?
|
|
95
|
+
|
|
96
|
+
changes = []
|
|
97
|
+
new_snapshot = previous_snapshot.dup
|
|
98
|
+
|
|
99
|
+
# Process added manifest files
|
|
100
|
+
added_manifests.each do |manifest_path|
|
|
101
|
+
result = parse_manifest_at_commit(rugged_commit, manifest_path)
|
|
102
|
+
next unless result
|
|
103
|
+
|
|
104
|
+
result[:dependencies].each do |dep|
|
|
105
|
+
changes << {
|
|
106
|
+
manifest_path: manifest_path,
|
|
107
|
+
ecosystem: result[:platform],
|
|
108
|
+
kind: result[:kind],
|
|
109
|
+
name: dep[:name],
|
|
110
|
+
change_type: "added",
|
|
111
|
+
requirement: dep[:requirement],
|
|
112
|
+
dependency_type: dep[:type]
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
key = [manifest_path, dep[:name]]
|
|
116
|
+
new_snapshot[key] = {
|
|
117
|
+
ecosystem: result[:platform],
|
|
118
|
+
kind: result[:kind],
|
|
119
|
+
requirement: dep[:requirement],
|
|
120
|
+
dependency_type: dep[:type]
|
|
121
|
+
}
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Process modified manifest files
|
|
126
|
+
modified_manifests.each do |manifest_path|
|
|
127
|
+
before_result = parse_manifest_before_commit(rugged_commit, manifest_path)
|
|
128
|
+
after_result = parse_manifest_at_commit(rugged_commit, manifest_path)
|
|
129
|
+
|
|
130
|
+
next unless after_result
|
|
131
|
+
|
|
132
|
+
before_deps = (before_result&.dig(:dependencies) || []).map { |d| [d[:name], d] }.to_h
|
|
133
|
+
after_deps = (after_result[:dependencies] || []).map { |d| [d[:name], d] }.to_h
|
|
134
|
+
|
|
135
|
+
added_names = after_deps.keys - before_deps.keys
|
|
136
|
+
removed_names = before_deps.keys - after_deps.keys
|
|
137
|
+
common_names = after_deps.keys & before_deps.keys
|
|
138
|
+
|
|
139
|
+
added_names.each do |name|
|
|
140
|
+
dep = after_deps[name]
|
|
141
|
+
changes << {
|
|
142
|
+
manifest_path: manifest_path,
|
|
143
|
+
ecosystem: after_result[:platform],
|
|
144
|
+
kind: after_result[:kind],
|
|
145
|
+
name: name,
|
|
146
|
+
change_type: "added",
|
|
147
|
+
requirement: dep[:requirement],
|
|
148
|
+
dependency_type: dep[:type]
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
key = [manifest_path, name]
|
|
152
|
+
new_snapshot[key] = {
|
|
153
|
+
ecosystem: after_result[:platform],
|
|
154
|
+
kind: after_result[:kind],
|
|
155
|
+
requirement: dep[:requirement],
|
|
156
|
+
dependency_type: dep[:type]
|
|
157
|
+
}
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
removed_names.each do |name|
|
|
161
|
+
dep = before_deps[name]
|
|
162
|
+
changes << {
|
|
163
|
+
manifest_path: manifest_path,
|
|
164
|
+
ecosystem: before_result[:platform],
|
|
165
|
+
kind: before_result[:kind],
|
|
166
|
+
name: name,
|
|
167
|
+
change_type: "removed",
|
|
168
|
+
requirement: dep[:requirement],
|
|
169
|
+
dependency_type: dep[:type]
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
key = [manifest_path, name]
|
|
173
|
+
new_snapshot.delete(key)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
common_names.each do |name|
|
|
177
|
+
before_dep = before_deps[name]
|
|
178
|
+
after_dep = after_deps[name]
|
|
179
|
+
|
|
180
|
+
if before_dep[:requirement] != after_dep[:requirement] || before_dep[:type] != after_dep[:type]
|
|
181
|
+
changes << {
|
|
182
|
+
manifest_path: manifest_path,
|
|
183
|
+
ecosystem: after_result[:platform],
|
|
184
|
+
kind: after_result[:kind],
|
|
185
|
+
name: name,
|
|
186
|
+
change_type: "modified",
|
|
187
|
+
requirement: after_dep[:requirement],
|
|
188
|
+
previous_requirement: before_dep[:requirement],
|
|
189
|
+
dependency_type: after_dep[:type]
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
key = [manifest_path, name]
|
|
193
|
+
new_snapshot[key] = {
|
|
194
|
+
ecosystem: after_result[:platform],
|
|
195
|
+
kind: after_result[:kind],
|
|
196
|
+
requirement: after_dep[:requirement],
|
|
197
|
+
dependency_type: after_dep[:type]
|
|
198
|
+
}
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Process removed manifest files
|
|
204
|
+
removed_manifests.each do |manifest_path|
|
|
205
|
+
result = parse_manifest_before_commit(rugged_commit, manifest_path)
|
|
206
|
+
next unless result
|
|
207
|
+
|
|
208
|
+
result[:dependencies].each do |dep|
|
|
209
|
+
changes << {
|
|
210
|
+
manifest_path: manifest_path,
|
|
211
|
+
ecosystem: result[:platform],
|
|
212
|
+
kind: result[:kind],
|
|
213
|
+
name: dep[:name],
|
|
214
|
+
change_type: "removed",
|
|
215
|
+
requirement: dep[:requirement],
|
|
216
|
+
dependency_type: dep[:type]
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
key = [manifest_path, dep[:name]]
|
|
220
|
+
new_snapshot.delete(key)
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
{
|
|
225
|
+
changes: changes,
|
|
226
|
+
snapshot: new_snapshot
|
|
227
|
+
}
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
# Cache stats for debugging
|
|
231
|
+
def cache_stats
|
|
232
|
+
hits = @blob_cache.values.count { |v| v[:hits] > 0 }
|
|
233
|
+
total = @blob_cache.size
|
|
234
|
+
{ cached_blobs: total, blobs_with_hits: hits }
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def parse_manifest_at_commit(rugged_commit, manifest_path)
|
|
238
|
+
blob_oid = repository.blob_oid_at_commit(rugged_commit, manifest_path)
|
|
239
|
+
return nil unless blob_oid
|
|
240
|
+
|
|
241
|
+
parse_manifest_by_oid(blob_oid, manifest_path)
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def parse_manifest_before_commit(rugged_commit, manifest_path)
|
|
245
|
+
return nil if rugged_commit.parents.empty?
|
|
246
|
+
|
|
247
|
+
blob_oid = repository.blob_oid_at_commit(rugged_commit.parents[0], manifest_path)
|
|
248
|
+
return nil unless blob_oid
|
|
249
|
+
|
|
250
|
+
parse_manifest_by_oid(blob_oid, manifest_path)
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def parse_manifest_by_oid(blob_oid, manifest_path)
|
|
254
|
+
cache_key = "#{blob_oid}:#{manifest_path}"
|
|
255
|
+
|
|
256
|
+
if @blob_cache.key?(cache_key)
|
|
257
|
+
@blob_cache[cache_key][:hits] += 1
|
|
258
|
+
return @blob_cache[cache_key][:result]
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
content = repository.blob_content(blob_oid)
|
|
262
|
+
return nil unless content
|
|
263
|
+
|
|
264
|
+
result = Bibliothecary.analyse_file(manifest_path, content).first
|
|
265
|
+
@blob_cache[cache_key] = { result: result, hits: 0 }
|
|
266
|
+
result
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
end
|