git-pkgs 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/benchmark_bulk.rb DELETED
@@ -1,167 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require "bundler/setup"
5
- require "git/pkgs"
6
- require "benchmark"
7
-
8
- repo_path = ARGV[0] || "/Users/andrew/code/octobox"
9
- sample_size = (ARGV[1] || 500).to_i
10
-
11
- # In-memory with WAL mode equivalent (journal_mode=memory for in-memory DB)
12
- Git::Pkgs::Database.connect_memory
13
- ActiveRecord::Base.connection.execute("PRAGMA synchronous = OFF")
14
- ActiveRecord::Base.connection.execute("PRAGMA journal_mode = MEMORY")
15
-
16
- repo = Git::Pkgs::Repository.new(repo_path)
17
- analyzer = Git::Pkgs::Analyzer.new(repo)
18
-
19
- walker = repo.walk(repo.default_branch)
20
- commits = walker.take(sample_size)
21
-
22
- puts "Bulk insert benchmark: #{commits.size} commits"
23
- puts "=" * 60
24
-
25
- # Pre-collect all data
26
- all_commits = []
27
- all_branch_commits = []
28
- all_changes = []
29
- all_snapshots = []
30
-
31
- snapshot = {}
32
- branch = Git::Pkgs::Models::Branch.find_or_create("main")
33
- position = 0
34
- manifests_cache = {}
35
-
36
- now = Time.now
37
-
38
- collect_time = Benchmark.realtime do
39
- commits.each do |rugged_commit|
40
- next if repo.merge_commit?(rugged_commit)
41
- position += 1
42
-
43
- result = analyzer.analyze_commit(rugged_commit, snapshot)
44
-
45
- all_commits << {
46
- sha: rugged_commit.oid,
47
- message: rugged_commit.message,
48
- author_name: rugged_commit.author[:name],
49
- author_email: rugged_commit.author[:email],
50
- committed_at: rugged_commit.time,
51
- has_dependency_changes: result && result[:changes].any?,
52
- created_at: now,
53
- updated_at: now
54
- }
55
-
56
- all_branch_commits << {
57
- branch_id: branch.id,
58
- commit_position: position, # placeholder, need to resolve after commit insert
59
- commit_sha: rugged_commit.oid
60
- }
61
-
62
- next unless result && result[:changes].any?
63
-
64
- result[:changes].each do |change|
65
- manifest_key = change[:manifest_path]
66
- unless manifests_cache[manifest_key]
67
- manifests_cache[manifest_key] = Git::Pkgs::Models::Manifest.find_or_create(
68
- path: change[:manifest_path],
69
- platform: change[:platform],
70
- kind: change[:kind]
71
- )
72
- end
73
-
74
- all_changes << {
75
- commit_sha: rugged_commit.oid,
76
- manifest_path: manifest_key,
77
- name: change[:name],
78
- platform: change[:platform],
79
- change_type: change[:change_type],
80
- requirement: change[:requirement],
81
- previous_requirement: change[:previous_requirement],
82
- dependency_type: change[:dependency_type],
83
- created_at: now,
84
- updated_at: now
85
- }
86
- end
87
-
88
- snapshot = result[:snapshot]
89
-
90
- snapshot.each do |(manifest_path, name), dep_info|
91
- all_snapshots << {
92
- commit_sha: rugged_commit.oid,
93
- manifest_path: manifest_path,
94
- name: name,
95
- platform: dep_info[:platform],
96
- requirement: dep_info[:requirement],
97
- dependency_type: dep_info[:dependency_type],
98
- created_at: now,
99
- updated_at: now
100
- }
101
- end
102
- end
103
- end
104
-
105
- puts "Collection time: #{collect_time.round(3)}s"
106
- puts "Data collected:"
107
- puts " Commits: #{all_commits.size}"
108
- puts " Changes: #{all_changes.size}"
109
- puts " Snapshots: #{all_snapshots.size}"
110
-
111
- # Bulk insert
112
- insert_time = Benchmark.realtime do
113
- # Insert commits
114
- Git::Pkgs::Models::Commit.insert_all(all_commits) if all_commits.any?
115
-
116
- # Build SHA -> ID map
117
- commit_ids = Git::Pkgs::Models::Commit.where(sha: all_commits.map { |c| c[:sha] }).pluck(:sha, :id).to_h
118
- manifest_ids = Git::Pkgs::Models::Manifest.pluck(:path, :id).to_h
119
-
120
- # Insert branch_commits with resolved IDs
121
- branch_commit_records = all_branch_commits.map do |bc|
122
- {
123
- branch_id: bc[:branch_id],
124
- commit_id: commit_ids[bc[:commit_sha]],
125
- position: bc[:commit_position]
126
- }
127
- end
128
- Git::Pkgs::Models::BranchCommit.insert_all(branch_commit_records) if branch_commit_records.any?
129
-
130
- # Insert changes with resolved IDs
131
- change_records = all_changes.map do |c|
132
- {
133
- commit_id: commit_ids[c[:commit_sha]],
134
- manifest_id: manifest_ids[c[:manifest_path]],
135
- name: c[:name],
136
- platform: c[:platform],
137
- change_type: c[:change_type],
138
- requirement: c[:requirement],
139
- previous_requirement: c[:previous_requirement],
140
- dependency_type: c[:dependency_type],
141
- created_at: c[:created_at],
142
- updated_at: c[:updated_at]
143
- }
144
- end
145
- Git::Pkgs::Models::DependencyChange.insert_all(change_records) if change_records.any?
146
-
147
- # Insert snapshots with resolved IDs
148
- snapshot_records = all_snapshots.map do |s|
149
- {
150
- commit_id: commit_ids[s[:commit_sha]],
151
- manifest_id: manifest_ids[s[:manifest_path]],
152
- name: s[:name],
153
- platform: s[:platform],
154
- requirement: s[:requirement],
155
- dependency_type: s[:dependency_type],
156
- created_at: s[:created_at],
157
- updated_at: s[:updated_at]
158
- }
159
- end
160
- Git::Pkgs::Models::DependencySnapshot.insert_all(snapshot_records) if snapshot_records.any?
161
- end
162
-
163
- puts "Insert time: #{insert_time.round(3)}s"
164
-
165
- total = collect_time + insert_time
166
- puts "\nTotal: #{total.round(3)}s"
167
- puts "Throughput: #{(all_commits.size / total).round(1)} commits/sec"
data/benchmark_db.rb DELETED
@@ -1,138 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require "bundler/setup"
5
- require "git/pkgs"
6
- require "benchmark"
7
-
8
- repo_path = ARGV[0] || "/Users/andrew/code/octobox"
9
- sample_size = (ARGV[1] || 200).to_i
10
-
11
- Git::Pkgs::Database.connect_memory
12
-
13
- repo = Git::Pkgs::Repository.new(repo_path)
14
- analyzer = Git::Pkgs::Analyzer.new(repo)
15
-
16
- walker = repo.walk(repo.default_branch)
17
- commits = walker.take(sample_size)
18
-
19
- puts "DB operation breakdown: #{commits.size} commits"
20
- puts "=" * 60
21
-
22
- timings = {
23
- commit_create: 0.0,
24
- branch_commit_create: 0.0,
25
- commit_update: 0.0,
26
- manifest_find_create: 0.0,
27
- change_create: 0.0,
28
- snapshot_create: 0.0
29
- }
30
-
31
- counts = {
32
- commits: 0,
33
- branch_commits: 0,
34
- changes: 0,
35
- snapshots: 0
36
- }
37
-
38
- snapshot = {}
39
- branch = Git::Pkgs::Models::Branch.find_or_create("main")
40
- position = 0
41
-
42
- commits.each do |rugged_commit|
43
- next if repo.merge_commit?(rugged_commit)
44
- position += 1
45
-
46
- result = analyzer.analyze_commit(rugged_commit, snapshot)
47
-
48
- commit = nil
49
- timings[:commit_create] += Benchmark.realtime do
50
- commit = Git::Pkgs::Models::Commit.find_or_create_from_rugged(rugged_commit)
51
- end
52
- counts[:commits] += 1
53
-
54
- timings[:branch_commit_create] += Benchmark.realtime do
55
- Git::Pkgs::Models::BranchCommit.find_or_create_by(
56
- branch: branch,
57
- commit: commit,
58
- position: position
59
- )
60
- end
61
- counts[:branch_commits] += 1
62
-
63
- next unless result && result[:changes].any?
64
-
65
- timings[:commit_update] += Benchmark.realtime do
66
- commit.update(has_dependency_changes: true)
67
- end
68
-
69
- result[:changes].each do |change|
70
- manifest = nil
71
- timings[:manifest_find_create] += Benchmark.realtime do
72
- manifest = Git::Pkgs::Models::Manifest.find_or_create(
73
- path: change[:manifest_path],
74
- platform: change[:platform],
75
- kind: change[:kind]
76
- )
77
- end
78
-
79
- timings[:change_create] += Benchmark.realtime do
80
- Git::Pkgs::Models::DependencyChange.create!(
81
- commit: commit,
82
- manifest: manifest,
83
- name: change[:name],
84
- platform: change[:platform],
85
- change_type: change[:change_type],
86
- requirement: change[:requirement],
87
- previous_requirement: change[:previous_requirement],
88
- dependency_type: change[:dependency_type]
89
- )
90
- end
91
- counts[:changes] += 1
92
- end
93
-
94
- snapshot = result[:snapshot]
95
-
96
- snapshot.each do |(manifest_path, name), dep_info|
97
- timings[:snapshot_create] += Benchmark.realtime do
98
- manifest = Git::Pkgs::Models::Manifest.find_by(path: manifest_path)
99
- Git::Pkgs::Models::DependencySnapshot.find_or_create_by(
100
- commit: commit,
101
- manifest: manifest,
102
- name: name
103
- ) do |s|
104
- s.platform = dep_info[:platform]
105
- s.requirement = dep_info[:requirement]
106
- s.dependency_type = dep_info[:dependency_type]
107
- end
108
- end
109
- counts[:snapshots] += 1
110
- end
111
- end
112
-
113
- total = timings.values.sum
114
-
115
- puts "\nDB operation breakdown:"
116
- puts "-" * 60
117
- timings.each do |op, time|
118
- pct = total > 0 ? (time / total * 100).round(1) : 0
119
- puts " #{op.to_s.ljust(22)} #{time.round(3).to_s.rjust(8)}s (#{pct}%)"
120
- end
121
- puts "-" * 60
122
- puts " #{'Total'.ljust(22)} #{total.round(3).to_s.rjust(8)}s"
123
-
124
- puts "\nRecord counts:"
125
- puts " Commits: #{counts[:commits]}"
126
- puts " BranchCommits: #{counts[:branch_commits]}"
127
- puts " Changes: #{counts[:changes]}"
128
- puts " Snapshots: #{counts[:snapshots]}"
129
-
130
- puts "\nPer-operation averages:"
131
- puts " commit_create: #{(timings[:commit_create] / counts[:commits] * 1000).round(3)}ms"
132
- puts " branch_commit_create: #{(timings[:branch_commit_create] / counts[:branch_commits] * 1000).round(3)}ms"
133
- if counts[:changes] > 0
134
- puts " change_create: #{(timings[:change_create] / counts[:changes] * 1000).round(3)}ms"
135
- end
136
- if counts[:snapshots] > 0
137
- puts " snapshot_create: #{(timings[:snapshot_create] / counts[:snapshots] * 1000).round(3)}ms"
138
- end
@@ -1,151 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require "bundler/setup"
5
- require "git/pkgs"
6
- require "benchmark"
7
-
8
- repo_path = ARGV[0] || "/Users/andrew/code/octobox"
9
- sample_size = (ARGV[1] || 500).to_i
10
-
11
- repo = Git::Pkgs::Repository.new(repo_path)
12
- analyzer = Git::Pkgs::Analyzer.new(repo)
13
-
14
- walker = repo.walk(repo.default_branch)
15
- commits = walker.take(sample_size)
16
-
17
- puts "Benchmarking #{commits.size} commits from #{repo_path}"
18
- puts "=" * 60
19
-
20
- timings = {
21
- walk_iteration: 0.0,
22
- blob_paths: 0.0,
23
- regex_check: 0.0,
24
- identify_manifests: 0.0,
25
- parse_manifests: 0.0,
26
- db_operations: 0.0
27
- }
28
-
29
- counts = {
30
- total: 0,
31
- merge_commits: 0,
32
- regex_passed: 0,
33
- identify_passed: 0,
34
- has_changes: 0,
35
- paths_by_commit: []
36
- }
37
-
38
- platform_times = Hash.new(0.0)
39
- platform_counts = Hash.new(0)
40
-
41
- commits.each do |rugged_commit|
42
- counts[:total] += 1
43
-
44
- if repo.merge_commit?(rugged_commit)
45
- counts[:merge_commits] += 1
46
- next
47
- end
48
-
49
- # Phase 1: Extract diff/file paths
50
- blob_paths = nil
51
- timings[:blob_paths] += Benchmark.realtime do
52
- blob_paths = repo.blob_paths(rugged_commit)
53
- end
54
-
55
- all_paths = blob_paths.map { |p| p[:path] }
56
- counts[:paths_by_commit] << all_paths.size
57
-
58
- # Phase 2: Quick regex check
59
- regex_match = nil
60
- timings[:regex_check] += Benchmark.realtime do
61
- regex_match = analyzer.might_have_manifests?(all_paths)
62
- end
63
-
64
- next unless regex_match
65
- counts[:regex_passed] += 1
66
-
67
- # Phase 3: Bibliothecary identify_manifests
68
- added_paths = blob_paths.select { |p| p[:status] == :added }.map { |p| p[:path] }
69
- modified_paths = blob_paths.select { |p| p[:status] == :modified }.map { |p| p[:path] }
70
- removed_paths = blob_paths.select { |p| p[:status] == :deleted }.map { |p| p[:path] }
71
-
72
- added_manifests = modified_manifests = removed_manifests = nil
73
- timings[:identify_manifests] += Benchmark.realtime do
74
- added_manifests = Bibliothecary.identify_manifests(added_paths)
75
- modified_manifests = Bibliothecary.identify_manifests(modified_paths)
76
- removed_manifests = Bibliothecary.identify_manifests(removed_paths)
77
- end
78
-
79
- all_manifests = added_manifests + modified_manifests + removed_manifests
80
- next if all_manifests.empty?
81
- counts[:identify_passed] += 1
82
-
83
- # Phase 4: Parse manifests (with platform tracking)
84
- timings[:parse_manifests] += Benchmark.realtime do
85
- all_manifests.each do |manifest_path|
86
- start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
87
-
88
- blob_oid = repo.blob_oid_at_commit(rugged_commit, manifest_path)
89
- if blob_oid
90
- content = repo.blob_content(blob_oid)
91
- if content
92
- result = Bibliothecary.analyse_file(manifest_path, content).first
93
- if result
94
- platform_counts[result[:platform]] += 1
95
- platform_times[result[:platform]] += Process.clock_gettime(Process::CLOCK_MONOTONIC) - start
96
- end
97
- end
98
- end
99
- end
100
- end
101
-
102
- counts[:has_changes] += 1
103
- end
104
-
105
- total_time = timings.values.sum
106
-
107
- puts "\nTiming breakdown:"
108
- puts "-" * 60
109
- timings.each do |phase, time|
110
- pct = (time / total_time * 100).round(1)
111
- puts " #{phase.to_s.ljust(20)} #{time.round(3).to_s.rjust(8)}s (#{pct}%)"
112
- end
113
- puts "-" * 60
114
- puts " #{'Total'.ljust(20)} #{total_time.round(3).to_s.rjust(8)}s"
115
-
116
- puts "\nCommit counts:"
117
- puts "-" * 60
118
- puts " Total commits: #{counts[:total]}"
119
- puts " Merge commits: #{counts[:merge_commits]} (skipped)"
120
- puts " Regex passed: #{counts[:regex_passed]} (#{(counts[:regex_passed].to_f / (counts[:total] - counts[:merge_commits]) * 100).round(1)}%)"
121
- puts " Identify passed: #{counts[:identify_passed]}"
122
- puts " Has actual changes: #{counts[:has_changes]}"
123
-
124
- if counts[:paths_by_commit].any?
125
- avg_paths = counts[:paths_by_commit].sum.to_f / counts[:paths_by_commit].size
126
- max_paths = counts[:paths_by_commit].max
127
- puts "\nPaths per commit:"
128
- puts " Average: #{avg_paths.round(1)}"
129
- puts " Max: #{max_paths}"
130
- end
131
-
132
- if platform_times.any?
133
- puts "\nTime by platform:"
134
- puts "-" * 60
135
- platform_times.sort_by { |_, v| -v }.each do |platform, time|
136
- count = platform_counts[platform]
137
- avg = (time / count * 1000).round(2)
138
- puts " #{platform.ljust(20)} #{time.round(3).to_s.rjust(8)}s (#{count} files, #{avg}ms avg)"
139
- end
140
- end
141
-
142
- puts "\nPer-commit averages:"
143
- non_merge = counts[:total] - counts[:merge_commits]
144
- puts " blob_paths: #{(timings[:blob_paths] / non_merge * 1000).round(3)}ms"
145
- puts " regex_check: #{(timings[:regex_check] / non_merge * 1000).round(3)}ms"
146
- if counts[:regex_passed] > 0
147
- puts " identify_manifests: #{(timings[:identify_manifests] / counts[:regex_passed] * 1000).round(3)}ms (when regex passes)"
148
- end
149
-
150
- commits_per_sec = counts[:total] / total_time
151
- puts "\nThroughput: #{commits_per_sec.round(1)} commits/sec"
data/benchmark_full.rb DELETED
@@ -1,131 +0,0 @@
1
- #!/usr/bin/env ruby
2
- # frozen_string_literal: true
3
-
4
- require "bundler/setup"
5
- require "git/pkgs"
6
- require "benchmark"
7
-
8
- repo_path = ARGV[0] || "/Users/andrew/code/octobox"
9
- sample_size = (ARGV[1] || 500).to_i
10
-
11
- # Setup in-memory database for fair comparison
12
- Git::Pkgs::Database.connect_memory
13
-
14
- repo = Git::Pkgs::Repository.new(repo_path)
15
- analyzer = Git::Pkgs::Analyzer.new(repo)
16
-
17
- walker = repo.walk(repo.default_branch)
18
- commits = walker.take(sample_size)
19
-
20
- puts "Full pipeline benchmark: #{commits.size} commits"
21
- puts "=" * 60
22
-
23
- timings = {
24
- git_diff: 0.0,
25
- filtering: 0.0,
26
- parsing: 0.0,
27
- db_writes: 0.0
28
- }
29
-
30
- snapshot = {}
31
- branch = Git::Pkgs::Models::Branch.find_or_create("main")
32
- position = 0
33
-
34
- commits.each do |rugged_commit|
35
- next if repo.merge_commit?(rugged_commit)
36
- position += 1
37
-
38
- # Git diff extraction
39
- blob_paths = nil
40
- timings[:git_diff] += Benchmark.realtime do
41
- blob_paths = repo.blob_paths(rugged_commit)
42
- end
43
-
44
- all_paths = blob_paths.map { |p| p[:path] }
45
-
46
- # Filtering (regex + identify_manifests)
47
- result = nil
48
- timings[:filtering] += Benchmark.realtime do
49
- next unless analyzer.might_have_manifests?(all_paths)
50
-
51
- added_paths = blob_paths.select { |p| p[:status] == :added }.map { |p| p[:path] }
52
- modified_paths = blob_paths.select { |p| p[:status] == :modified }.map { |p| p[:path] }
53
- removed_paths = blob_paths.select { |p| p[:status] == :deleted }.map { |p| p[:path] }
54
-
55
- added_manifests = Bibliothecary.identify_manifests(added_paths)
56
- modified_manifests = Bibliothecary.identify_manifests(modified_paths)
57
- removed_manifests = Bibliothecary.identify_manifests(removed_paths)
58
-
59
- result = (added_manifests + modified_manifests + removed_manifests).any?
60
- end
61
-
62
- # Full analysis with parsing
63
- analysis_result = nil
64
- if result
65
- timings[:parsing] += Benchmark.realtime do
66
- analysis_result = analyzer.analyze_commit(rugged_commit, snapshot)
67
- end
68
- end
69
-
70
- # Database writes
71
- timings[:db_writes] += Benchmark.realtime do
72
- commit = Git::Pkgs::Models::Commit.find_or_create_from_rugged(rugged_commit)
73
- Git::Pkgs::Models::BranchCommit.find_or_create_by(
74
- branch: branch,
75
- commit: commit,
76
- position: position
77
- )
78
-
79
- if analysis_result && analysis_result[:changes].any?
80
- commit.update(has_dependency_changes: true)
81
-
82
- analysis_result[:changes].each do |change|
83
- manifest = Git::Pkgs::Models::Manifest.find_or_create(
84
- path: change[:manifest_path],
85
- platform: change[:platform],
86
- kind: change[:kind]
87
- )
88
-
89
- Git::Pkgs::Models::DependencyChange.create!(
90
- commit: commit,
91
- manifest: manifest,
92
- name: change[:name],
93
- platform: change[:platform],
94
- change_type: change[:change_type],
95
- requirement: change[:requirement],
96
- previous_requirement: change[:previous_requirement],
97
- dependency_type: change[:dependency_type]
98
- )
99
- end
100
-
101
- snapshot = analysis_result[:snapshot]
102
-
103
- snapshot.each do |(manifest_path, name), dep_info|
104
- manifest = Git::Pkgs::Models::Manifest.find_by(path: manifest_path)
105
- Git::Pkgs::Models::DependencySnapshot.find_or_create_by(
106
- commit: commit,
107
- manifest: manifest,
108
- name: name
109
- ) do |s|
110
- s.platform = dep_info[:platform]
111
- s.requirement = dep_info[:requirement]
112
- s.dependency_type = dep_info[:dependency_type]
113
- end
114
- end
115
- end
116
- end
117
- end
118
-
119
- total = timings.values.sum
120
-
121
- puts "\nFull pipeline breakdown:"
122
- puts "-" * 60
123
- timings.each do |phase, time|
124
- pct = total > 0 ? (time / total * 100).round(1) : 0
125
- puts " #{phase.to_s.ljust(15)} #{time.round(3).to_s.rjust(8)}s (#{pct}%)"
126
- end
127
- puts "-" * 60
128
- puts " #{'Total'.ljust(15)} #{total.round(3).to_s.rjust(8)}s"
129
-
130
- puts "\nThroughput: #{(position / total).round(1)} commits/sec"
131
- puts "Cache stats: #{analyzer.cache_stats}"