git-pkgs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,151 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "git/pkgs"
6
+ require "benchmark"
7
+
8
+ repo_path = ARGV[0] || "/Users/andrew/code/octobox"
9
+ sample_size = (ARGV[1] || 500).to_i
10
+
11
+ repo = Git::Pkgs::Repository.new(repo_path)
12
+ analyzer = Git::Pkgs::Analyzer.new(repo)
13
+
14
+ walker = repo.walk(repo.default_branch)
15
+ commits = walker.take(sample_size)
16
+
17
+ puts "Benchmarking #{commits.size} commits from #{repo_path}"
18
+ puts "=" * 60
19
+
20
+ timings = {
21
+ walk_iteration: 0.0,
22
+ blob_paths: 0.0,
23
+ regex_check: 0.0,
24
+ identify_manifests: 0.0,
25
+ parse_manifests: 0.0,
26
+ db_operations: 0.0
27
+ }
28
+
29
+ counts = {
30
+ total: 0,
31
+ merge_commits: 0,
32
+ regex_passed: 0,
33
+ identify_passed: 0,
34
+ has_changes: 0,
35
+ paths_by_commit: []
36
+ }
37
+
38
+ platform_times = Hash.new(0.0)
39
+ platform_counts = Hash.new(0)
40
+
41
+ commits.each do |rugged_commit|
42
+ counts[:total] += 1
43
+
44
+ if repo.merge_commit?(rugged_commit)
45
+ counts[:merge_commits] += 1
46
+ next
47
+ end
48
+
49
+ # Phase 1: Extract diff/file paths
50
+ blob_paths = nil
51
+ timings[:blob_paths] += Benchmark.realtime do
52
+ blob_paths = repo.blob_paths(rugged_commit)
53
+ end
54
+
55
+ all_paths = blob_paths.map { |p| p[:path] }
56
+ counts[:paths_by_commit] << all_paths.size
57
+
58
+ # Phase 2: Quick regex check
59
+ regex_match = nil
60
+ timings[:regex_check] += Benchmark.realtime do
61
+ regex_match = analyzer.might_have_manifests?(all_paths)
62
+ end
63
+
64
+ next unless regex_match
65
+ counts[:regex_passed] += 1
66
+
67
+ # Phase 3: Bibliothecary identify_manifests
68
+ added_paths = blob_paths.select { |p| p[:status] == :added }.map { |p| p[:path] }
69
+ modified_paths = blob_paths.select { |p| p[:status] == :modified }.map { |p| p[:path] }
70
+ removed_paths = blob_paths.select { |p| p[:status] == :deleted }.map { |p| p[:path] }
71
+
72
+ added_manifests = modified_manifests = removed_manifests = nil
73
+ timings[:identify_manifests] += Benchmark.realtime do
74
+ added_manifests = Bibliothecary.identify_manifests(added_paths)
75
+ modified_manifests = Bibliothecary.identify_manifests(modified_paths)
76
+ removed_manifests = Bibliothecary.identify_manifests(removed_paths)
77
+ end
78
+
79
+ all_manifests = added_manifests + modified_manifests + removed_manifests
80
+ next if all_manifests.empty?
81
+ counts[:identify_passed] += 1
82
+
83
+ # Phase 4: Parse manifests (with platform tracking)
84
+ timings[:parse_manifests] += Benchmark.realtime do
85
+ all_manifests.each do |manifest_path|
86
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
87
+
88
+ blob_oid = repo.blob_oid_at_commit(rugged_commit, manifest_path)
89
+ if blob_oid
90
+ content = repo.blob_content(blob_oid)
91
+ if content
92
+ result = Bibliothecary.analyse_file(manifest_path, content).first
93
+ if result
94
+ platform_counts[result[:platform]] += 1
95
+ platform_times[result[:platform]] += Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ counts[:has_changes] += 1
103
+ end
104
+
105
+ total_time = timings.values.sum
106
+
107
+ puts "\nTiming breakdown:"
108
+ puts "-" * 60
109
+ timings.each do |phase, time|
110
+ pct = (time / total_time * 100).round(1)
111
+ puts " #{phase.to_s.ljust(20)} #{time.round(3).to_s.rjust(8)}s (#{pct}%)"
112
+ end
113
+ puts "-" * 60
114
+ puts " #{'Total'.ljust(20)} #{total_time.round(3).to_s.rjust(8)}s"
115
+
116
+ puts "\nCommit counts:"
117
+ puts "-" * 60
118
+ puts " Total commits: #{counts[:total]}"
119
+ puts " Merge commits: #{counts[:merge_commits]} (skipped)"
120
+ puts " Regex passed: #{counts[:regex_passed]} (#{(counts[:regex_passed].to_f / (counts[:total] - counts[:merge_commits]) * 100).round(1)}%)"
121
+ puts " Identify passed: #{counts[:identify_passed]}"
122
+ puts " Has actual changes: #{counts[:has_changes]}"
123
+
124
+ if counts[:paths_by_commit].any?
125
+ avg_paths = counts[:paths_by_commit].sum.to_f / counts[:paths_by_commit].size
126
+ max_paths = counts[:paths_by_commit].max
127
+ puts "\nPaths per commit:"
128
+ puts " Average: #{avg_paths.round(1)}"
129
+ puts " Max: #{max_paths}"
130
+ end
131
+
132
+ if platform_times.any?
133
+ puts "\nTime by platform:"
134
+ puts "-" * 60
135
+ platform_times.sort_by { |_, v| -v }.each do |platform, time|
136
+ count = platform_counts[platform]
137
+ avg = (time / count * 1000).round(2)
138
+ puts " #{platform.ljust(20)} #{time.round(3).to_s.rjust(8)}s (#{count} files, #{avg}ms avg)"
139
+ end
140
+ end
141
+
142
+ puts "\nPer-commit averages:"
143
+ non_merge = counts[:total] - counts[:merge_commits]
144
+ puts " blob_paths: #{(timings[:blob_paths] / non_merge * 1000).round(3)}ms"
145
+ puts " regex_check: #{(timings[:regex_check] / non_merge * 1000).round(3)}ms"
146
+ if counts[:regex_passed] > 0
147
+ puts " identify_manifests: #{(timings[:identify_manifests] / counts[:regex_passed] * 1000).round(3)}ms (when regex passes)"
148
+ end
149
+
150
+ commits_per_sec = counts[:total] / total_time
151
+ puts "\nThroughput: #{commits_per_sec.round(1)} commits/sec"
data/benchmark_full.rb ADDED
@@ -0,0 +1,131 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "git/pkgs"
6
+ require "benchmark"
7
+
8
+ repo_path = ARGV[0] || "/Users/andrew/code/octobox"
9
+ sample_size = (ARGV[1] || 500).to_i
10
+
11
+ # Setup in-memory database for fair comparison
12
+ Git::Pkgs::Database.connect_memory
13
+
14
+ repo = Git::Pkgs::Repository.new(repo_path)
15
+ analyzer = Git::Pkgs::Analyzer.new(repo)
16
+
17
+ walker = repo.walk(repo.default_branch)
18
+ commits = walker.take(sample_size)
19
+
20
+ puts "Full pipeline benchmark: #{commits.size} commits"
21
+ puts "=" * 60
22
+
23
+ timings = {
24
+ git_diff: 0.0,
25
+ filtering: 0.0,
26
+ parsing: 0.0,
27
+ db_writes: 0.0
28
+ }
29
+
30
+ snapshot = {}
31
+ branch = Git::Pkgs::Models::Branch.find_or_create("main")
32
+ position = 0
33
+
34
+ commits.each do |rugged_commit|
35
+ next if repo.merge_commit?(rugged_commit)
36
+ position += 1
37
+
38
+ # Git diff extraction
39
+ blob_paths = nil
40
+ timings[:git_diff] += Benchmark.realtime do
41
+ blob_paths = repo.blob_paths(rugged_commit)
42
+ end
43
+
44
+ all_paths = blob_paths.map { |p| p[:path] }
45
+
46
+ # Filtering (regex + identify_manifests)
47
+ result = nil
48
+ timings[:filtering] += Benchmark.realtime do
49
+ next unless analyzer.might_have_manifests?(all_paths)
50
+
51
+ added_paths = blob_paths.select { |p| p[:status] == :added }.map { |p| p[:path] }
52
+ modified_paths = blob_paths.select { |p| p[:status] == :modified }.map { |p| p[:path] }
53
+ removed_paths = blob_paths.select { |p| p[:status] == :deleted }.map { |p| p[:path] }
54
+
55
+ added_manifests = Bibliothecary.identify_manifests(added_paths)
56
+ modified_manifests = Bibliothecary.identify_manifests(modified_paths)
57
+ removed_manifests = Bibliothecary.identify_manifests(removed_paths)
58
+
59
+ result = (added_manifests + modified_manifests + removed_manifests).any?
60
+ end
61
+
62
+ # Full analysis with parsing
63
+ analysis_result = nil
64
+ if result
65
+ timings[:parsing] += Benchmark.realtime do
66
+ analysis_result = analyzer.analyze_commit(rugged_commit, snapshot)
67
+ end
68
+ end
69
+
70
+ # Database writes
71
+ timings[:db_writes] += Benchmark.realtime do
72
+ commit = Git::Pkgs::Models::Commit.find_or_create_from_rugged(rugged_commit)
73
+ Git::Pkgs::Models::BranchCommit.find_or_create_by(
74
+ branch: branch,
75
+ commit: commit,
76
+ position: position
77
+ )
78
+
79
+ if analysis_result && analysis_result[:changes].any?
80
+ commit.update(has_dependency_changes: true)
81
+
82
+ analysis_result[:changes].each do |change|
83
+ manifest = Git::Pkgs::Models::Manifest.find_or_create(
84
+ path: change[:manifest_path],
85
+ platform: change[:platform],
86
+ kind: change[:kind]
87
+ )
88
+
89
+ Git::Pkgs::Models::DependencyChange.create!(
90
+ commit: commit,
91
+ manifest: manifest,
92
+ name: change[:name],
93
+ platform: change[:platform],
94
+ change_type: change[:change_type],
95
+ requirement: change[:requirement],
96
+ previous_requirement: change[:previous_requirement],
97
+ dependency_type: change[:dependency_type]
98
+ )
99
+ end
100
+
101
+ snapshot = analysis_result[:snapshot]
102
+
103
+ snapshot.each do |(manifest_path, name), dep_info|
104
+ manifest = Git::Pkgs::Models::Manifest.find_by(path: manifest_path)
105
+ Git::Pkgs::Models::DependencySnapshot.find_or_create_by(
106
+ commit: commit,
107
+ manifest: manifest,
108
+ name: name
109
+ ) do |s|
110
+ s.platform = dep_info[:platform]
111
+ s.requirement = dep_info[:requirement]
112
+ s.dependency_type = dep_info[:dependency_type]
113
+ end
114
+ end
115
+ end
116
+ end
117
+ end
118
+
119
+ total = timings.values.sum
120
+
121
+ puts "\nFull pipeline breakdown:"
122
+ puts "-" * 60
123
+ timings.each do |phase, time|
124
+ pct = total > 0 ? (time / total * 100).round(1) : 0
125
+ puts " #{phase.to_s.ljust(15)} #{time.round(3).to_s.rjust(8)}s (#{pct}%)"
126
+ end
127
+ puts "-" * 60
128
+ puts " #{'Total'.ljust(15)} #{total.round(3).to_s.rjust(8)}s"
129
+
130
+ puts "\nThroughput: #{(position / total).round(1)} commits/sec"
131
+ puts "Cache stats: #{analyzer.cache_stats}"
data/docs/schema.md ADDED
@@ -0,0 +1,129 @@
1
+ # Database Schema
2
+
3
+ git-pkgs stores dependency history in a SQLite database at `.git/pkgs.sqlite3`.
4
+
5
+ ## Tables
6
+
7
+ ### branches
8
+
9
+ Tracks which branches have been analyzed.
10
+
11
+ | Column | Type | Description |
12
+ |--------|------|-------------|
13
+ | id | integer | Primary key |
14
+ | name | string | Branch name (e.g., "main", "develop") |
15
+ | last_analyzed_sha | string | SHA of last commit analyzed for incremental updates |
16
+ | created_at | datetime | |
17
+ | updated_at | datetime | |
18
+
19
+ Indexes: `name` (unique)
20
+
21
+ ### commits
22
+
23
+ Stores commit metadata for commits that have been analyzed.
24
+
25
+ | Column | Type | Description |
26
+ |--------|------|-------------|
27
+ | id | integer | Primary key |
28
+ | sha | string | Full commit SHA |
29
+ | message | text | Commit message |
30
+ | author_name | string | Author name |
31
+ | author_email | string | Author email |
32
+ | committed_at | datetime | Commit timestamp |
33
+ | has_dependency_changes | boolean | True if this commit modified dependencies |
34
+ | created_at | datetime | |
35
+ | updated_at | datetime | |
36
+
37
+ Indexes: `sha` (unique)
38
+
39
+ ### branch_commits
40
+
41
+ Join table linking commits to branches. A commit can belong to multiple branches.
42
+
43
+ | Column | Type | Description |
44
+ |--------|------|-------------|
45
+ | id | integer | Primary key |
46
+ | branch_id | integer | Foreign key to branches |
47
+ | commit_id | integer | Foreign key to commits |
48
+ | position | integer | Order of commit in branch history |
49
+
50
+ Indexes: `(branch_id, commit_id)` (unique)
51
+
52
+ ### manifests
53
+
54
+ Stores manifest file metadata.
55
+
56
+ | Column | Type | Description |
57
+ |--------|------|-------------|
58
+ | id | integer | Primary key |
59
+ | path | string | File path (e.g., "Gemfile", "package.json") |
60
+ | platform | string | Package manager (e.g., "rubygems", "npm") |
61
+ | kind | string | Manifest type (e.g., "manifest", "lockfile") |
62
+ | created_at | datetime | |
63
+ | updated_at | datetime | |
64
+
65
+ Indexes: `path`
66
+
67
+ ### dependency_changes
68
+
69
+ Records each dependency addition, modification, or removal.
70
+
71
+ | Column | Type | Description |
72
+ |--------|------|-------------|
73
+ | id | integer | Primary key |
74
+ | commit_id | integer | Foreign key to commits |
75
+ | manifest_id | integer | Foreign key to manifests |
76
+ | name | string | Package name |
77
+ | platform | string | Package manager |
78
+ | change_type | string | "added", "modified", or "removed" |
79
+ | requirement | string | Version constraint after change |
80
+ | previous_requirement | string | Version constraint before change (for modifications) |
81
+ | dependency_type | string | "runtime", "development", etc. |
82
+ | created_at | datetime | |
83
+ | updated_at | datetime | |
84
+
85
+ Indexes: `name`, `platform`, `(commit_id, name)`
86
+
87
+ ### dependency_snapshots
88
+
89
+ Stores the complete dependency state at each commit that has changes. Enables O(1) queries for "what dependencies existed at commit X" without replaying history.
90
+
91
+ | Column | Type | Description |
92
+ |--------|------|-------------|
93
+ | id | integer | Primary key |
94
+ | commit_id | integer | Foreign key to commits |
95
+ | manifest_id | integer | Foreign key to manifests |
96
+ | name | string | Package name |
97
+ | platform | string | Package manager |
98
+ | requirement | string | Version constraint |
99
+ | dependency_type | string | "runtime", "development", etc. |
100
+ | created_at | datetime | |
101
+ | updated_at | datetime | |
102
+
103
+ Indexes: `(commit_id, manifest_id, name)` (unique), `name`, `platform`
104
+
105
+ ## Relationships
106
+
107
+ ```
108
+ branches ──┬── branch_commits ──┬── commits
109
+ │ │
110
+ │ ├── dependency_changes ──── manifests
111
+ │ │
112
+ │ └── dependency_snapshots ── manifests
113
+
114
+ └── last_analyzed_sha (references commits.sha)
115
+ ```
116
+
117
+ ## Design Notes
118
+
119
+ **Why snapshots?**
120
+
121
+ Without snapshots, answering "what dependencies existed at commit X" requires replaying all changes from the beginning. With snapshots, it's a single query. The tradeoff is storage space, but SQLite handles this well.
122
+
123
+ **Why branch_commits?**
124
+
125
+ Git commits are branch-agnostic. The same commit can appear on multiple branches. This join table tracks which commits belong to which branches and their order, enabling branch-specific queries.
126
+
127
+ **Platform field duplication**
128
+
129
+ The platform appears in both `manifests` and `dependency_changes`/`dependency_snapshots`. This denormalization speeds up queries that filter by platform without requiring joins.
data/exe/git-pkgs ADDED
@@ -0,0 +1,6 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "git/pkgs"
5
+
6
+ Git::Pkgs::CLI.run(ARGV)
@@ -0,0 +1,270 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bibliothecary"
4
+
5
+ module Git
6
+ module Pkgs
7
+ class Analyzer
8
+ attr_reader :repository
9
+
10
+ # Common manifest file patterns for quick pre-filtering
11
+ # This avoids calling Bibliothecary.identify_manifests for commits that clearly don't touch manifests
12
+ QUICK_MANIFEST_PATTERNS = %w[
13
+ Gemfile Gemfile.lock gems.rb gems.locked *.gemspec
14
+ package.json package-lock.json yarn.lock npm-shrinkwrap.json pnpm-lock.yaml bun.lock npm-ls.json
15
+ setup.py req*.txt req*.pip requirements/*.txt requirements/*.pip requirements.frozen
16
+ Pipfile Pipfile.lock pyproject.toml poetry.lock uv.lock pylock.toml
17
+ pip-resolved-dependencies.txt pip-dependency-graph.json
18
+ pom.xml ivy.xml build.gradle build.gradle.kts gradle-dependencies-q.txt
19
+ maven-resolved-dependencies.txt sbt-update-full.txt maven-dependency-tree.txt maven-dependency-tree.dot
20
+ Cargo.toml Cargo.lock
21
+ go.mod go.sum glide.yaml glide.lock Godeps Godeps/Godeps.json
22
+ vendor/manifest vendor/vendor.json Gopkg.toml Gopkg.lock go-resolved-dependencies.json
23
+ composer.json composer.lock
24
+ Podfile Podfile.lock *.podspec *.podspec.json
25
+ packages.config packages.lock.json Project.json Project.lock.json
26
+ *.nuspec paket.lock *.csproj project.assets.json
27
+ cyclonedx.xml cyclonedx.json *.cdx.xml *.cdx.json
28
+ *.spdx *.spdx.json
29
+ bower.json bentofile.yaml
30
+ META.json META.yml
31
+ environment.yml environment.yaml
32
+ cog.yaml versions.json MLmodel DESCRIPTION
33
+ pubspec.yaml pubspec.lock
34
+ dub.json dub.sdl
35
+ REQUIRE
36
+ shard.yml shard.lock
37
+ elm-package.json elm_dependencies.json elm-stuff/exact-dependencies.json
38
+ haxelib.json
39
+ action.yml action.yaml .github/workflows/*.yml .github/workflows/*.yaml
40
+ Dockerfile docker-compose*.yml docker-compose*.yaml
41
+ dvc.yaml vcpkg.json
42
+ Brewfile Brewfile.lock.json
43
+ Modelfile
44
+ ].freeze
45
+
46
+ QUICK_MANIFEST_REGEX = Regexp.union(
47
+ QUICK_MANIFEST_PATTERNS.map do |pattern|
48
+ if pattern.include?('*')
49
+ Regexp.new(pattern.gsub('.', '\\.').gsub('*', '.*'))
50
+ else
51
+ /(?:^|\/)#{Regexp.escape(pattern)}$/
52
+ end
53
+ end
54
+ ).freeze
55
+
56
+ def initialize(repository)
57
+ @repository = repository
58
+ @blob_cache = {}
59
+ end
60
+
61
+ # Quick check if any paths might be manifests (fast regex check)
62
+ def might_have_manifests?(paths)
63
+ paths.any? { |p| p.match?(QUICK_MANIFEST_REGEX) }
64
+ end
65
+
66
+ # Quick check if a commit touches any manifest files
67
+ def has_manifest_changes?(rugged_commit)
68
+ return false if repository.merge_commit?(rugged_commit)
69
+
70
+ blob_paths = repository.blob_paths(rugged_commit)
71
+ all_paths = blob_paths.map { |p| p[:path] }
72
+
73
+ return false unless might_have_manifests?(all_paths)
74
+
75
+ Bibliothecary.identify_manifests(all_paths).any?
76
+ end
77
+
78
+ def analyze_commit(rugged_commit, previous_snapshot = {})
79
+ return nil if repository.merge_commit?(rugged_commit)
80
+
81
+ blob_paths = repository.blob_paths(rugged_commit)
82
+
83
+ added_paths = blob_paths.select { |p| p[:status] == :added }.map { |p| p[:path] }
84
+ modified_paths = blob_paths.select { |p| p[:status] == :modified }.map { |p| p[:path] }
85
+ removed_paths = blob_paths.select { |p| p[:status] == :deleted }.map { |p| p[:path] }
86
+
87
+ all_paths = added_paths + modified_paths + removed_paths
88
+ return nil unless might_have_manifests?(all_paths)
89
+
90
+ added_manifests = Bibliothecary.identify_manifests(added_paths)
91
+ modified_manifests = Bibliothecary.identify_manifests(modified_paths)
92
+ removed_manifests = Bibliothecary.identify_manifests(removed_paths)
93
+
94
+ return nil if added_manifests.empty? && modified_manifests.empty? && removed_manifests.empty?
95
+
96
+ changes = []
97
+ new_snapshot = previous_snapshot.dup
98
+
99
+ # Process added manifest files
100
+ added_manifests.each do |manifest_path|
101
+ result = parse_manifest_at_commit(rugged_commit, manifest_path)
102
+ next unless result
103
+
104
+ result[:dependencies].each do |dep|
105
+ changes << {
106
+ manifest_path: manifest_path,
107
+ ecosystem: result[:platform],
108
+ kind: result[:kind],
109
+ name: dep[:name],
110
+ change_type: "added",
111
+ requirement: dep[:requirement],
112
+ dependency_type: dep[:type]
113
+ }
114
+
115
+ key = [manifest_path, dep[:name]]
116
+ new_snapshot[key] = {
117
+ ecosystem: result[:platform],
118
+ kind: result[:kind],
119
+ requirement: dep[:requirement],
120
+ dependency_type: dep[:type]
121
+ }
122
+ end
123
+ end
124
+
125
+ # Process modified manifest files
126
+ modified_manifests.each do |manifest_path|
127
+ before_result = parse_manifest_before_commit(rugged_commit, manifest_path)
128
+ after_result = parse_manifest_at_commit(rugged_commit, manifest_path)
129
+
130
+ next unless after_result
131
+
132
+ before_deps = (before_result&.dig(:dependencies) || []).map { |d| [d[:name], d] }.to_h
133
+ after_deps = (after_result[:dependencies] || []).map { |d| [d[:name], d] }.to_h
134
+
135
+ added_names = after_deps.keys - before_deps.keys
136
+ removed_names = before_deps.keys - after_deps.keys
137
+ common_names = after_deps.keys & before_deps.keys
138
+
139
+ added_names.each do |name|
140
+ dep = after_deps[name]
141
+ changes << {
142
+ manifest_path: manifest_path,
143
+ ecosystem: after_result[:platform],
144
+ kind: after_result[:kind],
145
+ name: name,
146
+ change_type: "added",
147
+ requirement: dep[:requirement],
148
+ dependency_type: dep[:type]
149
+ }
150
+
151
+ key = [manifest_path, name]
152
+ new_snapshot[key] = {
153
+ ecosystem: after_result[:platform],
154
+ kind: after_result[:kind],
155
+ requirement: dep[:requirement],
156
+ dependency_type: dep[:type]
157
+ }
158
+ end
159
+
160
+ removed_names.each do |name|
161
+ dep = before_deps[name]
162
+ changes << {
163
+ manifest_path: manifest_path,
164
+ ecosystem: before_result[:platform],
165
+ kind: before_result[:kind],
166
+ name: name,
167
+ change_type: "removed",
168
+ requirement: dep[:requirement],
169
+ dependency_type: dep[:type]
170
+ }
171
+
172
+ key = [manifest_path, name]
173
+ new_snapshot.delete(key)
174
+ end
175
+
176
+ common_names.each do |name|
177
+ before_dep = before_deps[name]
178
+ after_dep = after_deps[name]
179
+
180
+ if before_dep[:requirement] != after_dep[:requirement] || before_dep[:type] != after_dep[:type]
181
+ changes << {
182
+ manifest_path: manifest_path,
183
+ ecosystem: after_result[:platform],
184
+ kind: after_result[:kind],
185
+ name: name,
186
+ change_type: "modified",
187
+ requirement: after_dep[:requirement],
188
+ previous_requirement: before_dep[:requirement],
189
+ dependency_type: after_dep[:type]
190
+ }
191
+
192
+ key = [manifest_path, name]
193
+ new_snapshot[key] = {
194
+ ecosystem: after_result[:platform],
195
+ kind: after_result[:kind],
196
+ requirement: after_dep[:requirement],
197
+ dependency_type: after_dep[:type]
198
+ }
199
+ end
200
+ end
201
+ end
202
+
203
+ # Process removed manifest files
204
+ removed_manifests.each do |manifest_path|
205
+ result = parse_manifest_before_commit(rugged_commit, manifest_path)
206
+ next unless result
207
+
208
+ result[:dependencies].each do |dep|
209
+ changes << {
210
+ manifest_path: manifest_path,
211
+ ecosystem: result[:platform],
212
+ kind: result[:kind],
213
+ name: dep[:name],
214
+ change_type: "removed",
215
+ requirement: dep[:requirement],
216
+ dependency_type: dep[:type]
217
+ }
218
+
219
+ key = [manifest_path, dep[:name]]
220
+ new_snapshot.delete(key)
221
+ end
222
+ end
223
+
224
+ {
225
+ changes: changes,
226
+ snapshot: new_snapshot
227
+ }
228
+ end
229
+
230
+ # Cache stats for debugging
231
+ def cache_stats
232
+ hits = @blob_cache.values.count { |v| v[:hits] > 0 }
233
+ total = @blob_cache.size
234
+ { cached_blobs: total, blobs_with_hits: hits }
235
+ end
236
+
237
+ def parse_manifest_at_commit(rugged_commit, manifest_path)
238
+ blob_oid = repository.blob_oid_at_commit(rugged_commit, manifest_path)
239
+ return nil unless blob_oid
240
+
241
+ parse_manifest_by_oid(blob_oid, manifest_path)
242
+ end
243
+
244
+ def parse_manifest_before_commit(rugged_commit, manifest_path)
245
+ return nil if rugged_commit.parents.empty?
246
+
247
+ blob_oid = repository.blob_oid_at_commit(rugged_commit.parents[0], manifest_path)
248
+ return nil unless blob_oid
249
+
250
+ parse_manifest_by_oid(blob_oid, manifest_path)
251
+ end
252
+
253
+ def parse_manifest_by_oid(blob_oid, manifest_path)
254
+ cache_key = "#{blob_oid}:#{manifest_path}"
255
+
256
+ if @blob_cache.key?(cache_key)
257
+ @blob_cache[cache_key][:hits] += 1
258
+ return @blob_cache[cache_key][:result]
259
+ end
260
+
261
+ content = repository.blob_content(blob_oid)
262
+ return nil unless content
263
+
264
+ result = Bibliothecary.analyse_file(manifest_path, content).first
265
+ @blob_cache[cache_key] = { result: result, hits: 0 }
266
+ result
267
+ end
268
+ end
269
+ end
270
+ end