repository_merger 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'branch_local_commit_map'
4
+
5
+ class RepositoryMerger
6
+ class CommitHistoryMerger
7
+ attr_reader :configuration, :original_references, :commit_message_conversion, :progress_title
8
+ attr_accessor :wants_to_abort
9
+
10
+ def initialize(references, configuration:, commit_message_conversion: nil, progress_title: nil)
11
+ @original_references = references
12
+ @configuration = configuration
13
+ @commit_message_conversion = commit_message_conversion
14
+ @progress_title = progress_title
15
+ end
16
+
17
+ def run
18
+ logger.verbose("Importing commits from #{original_references.map { |ref| "#{ref.name} (#{ref.repo.name})" }.join(', ')}", title: true)
19
+ logger.start_tracking_progress_for('commits', total: unprocessed_original_commit_queue.size, title: progress_title)
20
+
21
+ while (original_commit = unprocessed_original_commit_queue.next)
22
+ process_commit(original_commit)
23
+ break if wants_to_abort
24
+ end
25
+
26
+ monorepo_head_commit
27
+ ensure
28
+ repo_commit_map.merge!(branch_local_commit_map)
29
+ repo_commit_map.save if repo_commit_map.path
30
+ end
31
+
32
+ private
33
+
34
+ attr_reader :monorepo_head_commit
35
+
36
+ def process_commit(original_commit)
37
+ logger.verbose " #{original_commit.commit_time} [#{original_commit.repo.name}] #{original_commit.message.each_line.first}"
38
+
39
+ if (monorepo_commit = already_imported_monorepo_commit_for(original_commit))
40
+ logger.verbose " Already imported as #{monorepo_commit.abbreviated_id}. Skipping."
41
+ else
42
+ monorepo_commit = import_commit_into_monorepo(original_commit)
43
+ logger.verbose " Created commit #{monorepo_commit.abbreviated_id}."
44
+ end
45
+
46
+ branch_local_commit_map.register(
47
+ monorepo_commit: monorepo_commit,
48
+ original_commit: original_commit
49
+ )
50
+
51
+ if mainline?(original_commit)
52
+ @monorepo_head_commit = monorepo_commit
53
+ end
54
+
55
+ logger.increment_progress
56
+ end
57
+
58
+ def already_imported_monorepo_commit_for(original_commit)
59
+ monorepo_commits = repo_commit_map.monorepo_commits_for(original_commit)
60
+
61
+ monorepo_commits.find do |monorepo_commit|
62
+ monorepo_commit.parents == parent_commits_in_monorepo_for(original_commit)
63
+ end
64
+ end
65
+
66
+ def import_commit_into_monorepo(original_commit)
67
+ parent_commits_in_monorepo = parent_commits_in_monorepo_for(original_commit)
68
+
69
+ monorepo.import_commit(
70
+ original_commit,
71
+ new_parents: parent_commits_in_monorepo,
72
+ subdirectory: original_commit.repo.name,
73
+ message: commit_message_from(original_commit)
74
+ )
75
+ end
76
+
77
+ def parent_commits_in_monorepo_for(original_commit)
78
+ if original_commit.root?
79
+ return [monorepo_head_commit].compact
80
+ end
81
+
82
+ original_commit.parents.map do |original_parent_commit|
83
+ if mainline?(original_commit) && mainline?(original_parent_commit)
84
+ monorepo_head_commit
85
+ else
86
+ branch_local_commit_map.monorepo_commit_for(original_parent_commit)
87
+ end
88
+ end
89
+ end
90
+
91
+ def mainline?(original_commit)
92
+ original_reference = original_references_by_repo[original_commit.repo]
93
+ original_reference.mainline?(original_commit)
94
+ end
95
+
96
+ def commit_message_from(original_commit)
97
+ if commit_message_conversion
98
+ commit_message_conversion.call(original_commit)
99
+ else
100
+ original_commit.message
101
+ end
102
+ end
103
+
104
+ def unprocessed_original_commit_queue
105
+ @unprocessed_original_commit_queue ||= OriginalCommitQueue.new(original_references)
106
+ end
107
+
108
+ def original_references_by_repo
109
+ @original_references_by_repo ||= original_references.each_with_object({}) do |original_reference, hash|
110
+ hash[original_reference.repo] = original_reference
111
+ end
112
+ end
113
+
114
+ def monorepo
115
+ configuration.monorepo
116
+ end
117
+
118
+ def repo_commit_map
119
+ configuration.repo_commit_map
120
+ end
121
+
122
+ def branch_local_commit_map
123
+ @branch_local_commit_map ||= BranchLocalCommitMap.new(monorepo: monorepo)
124
+ end
125
+
126
+ def logger
127
+ configuration.logger
128
+ end
129
+
130
+ class OriginalCommitQueue
131
+ attr_reader :references
132
+
133
+ def initialize(references)
134
+ @references = references
135
+ end
136
+
137
+ def next
138
+ queue_having_earliest_commit = commit_queues.reject(&:empty?).min_by { |queue| queue.first.commit_time }
139
+ queue_having_earliest_commit&.shift
140
+ end
141
+
142
+ def size
143
+ commit_queues.sum(&:size)
144
+ end
145
+
146
+ def commit_queues
147
+ @commit_queues ||= references.map(&:topologically_ordered_commits_from_root).map(&:dup)
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ class RepositoryMerger
4
+ module CommitMap
5
+ def original_commit_key(commit)
6
+ "#{commit.id}@#{commit.repo.name}"
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'logger'
4
+ require_relative 'mono_repository'
5
+ require_relative 'repository'
6
+ require_relative 'repository_commit_map'
7
+
8
+ class RepositoryMerger
9
+ class Configuration
10
+ attr_reader :original_repo_paths, :monorepo_path, :commit_map_file_path, :log_output, :verbose_logging
11
+
12
+ def initialize(original_repo_paths:, monorepo_path:, commit_map_file_path: 'commit_map.json', log_output: $stdout, verbose_logging: false)
13
+ @original_repo_paths = original_repo_paths
14
+ @monorepo_path = monorepo_path
15
+ @commit_map_file_path = commit_map_file_path
16
+ @log_output = log_output
17
+ @verbose_logging = verbose_logging
18
+ end
19
+
20
+ def original_repos
21
+ @original_repos ||= original_repo_paths.map { |path| Repository.new(path) }
22
+ end
23
+
24
+ def monorepo
25
+ @monorepo ||= MonoRepository.new(monorepo_path)
26
+ end
27
+
28
+ def repo_commit_map
29
+ @repo_commit_map ||= RepositoryCommitMap.new(path: commit_map_file_path, monorepo: monorepo)
30
+ end
31
+
32
+ def logger
33
+ @logger ||= Logger.new(log_output, verbose: verbose_logging)
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ class RepositoryMerger
4
+ module GitHubIssueReference
5
+ RepositoryLocalReference = Struct.new(:issue_number, keyword_init: true)
6
+ AbsoluteReference = Struct.new(:username, :repo_name, :issue_number, keyword_init: true)
7
+
8
+ # https://github.com/isiahmeadows/github-limits
9
+ USERNAME_PATTERN = /(?<username>[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38})/i.freeze
10
+ REPOSITORY_NAME_PATTERN = /(?<repo_name>[a-z0-9.\-_]{1,100})/i.freeze
11
+ ISSUE_NUMBER_PATTERN = /(?<issue_number>\d{1,5})/.freeze # Technically max is 1073741824 but it won't exist in real life
12
+
13
+ # https://docs.github.com/en/github/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls#issues-and-pull-requests
14
+ REPO_LOCAL_REFERENCE_PATTERN = /(?<!\w)(?<repo_local_reference>(?:#|GH-)#{ISSUE_NUMBER_PATTERN})(?!\w)/i.freeze
15
+ ABSOLUTE_REFERENCE_PATTERN = /(?<!\w)(?<absolute_reference>#{USERNAME_PATTERN}\/#{REPOSITORY_NAME_PATTERN}##{ISSUE_NUMBER_PATTERN})(?!\w)/.freeze
16
+ REFERENCE_PATTERN = /(?:#{REPO_LOCAL_REFERENCE_PATTERN}|#{ABSOLUTE_REFERENCE_PATTERN})/.freeze
17
+
18
+ def self.extract_references_from(message)
19
+ references = []
20
+
21
+ message.scan(REFERENCE_PATTERN) do
22
+ reference = create_referece_from(Regexp.last_match)
23
+ references << reference if reference
24
+ end
25
+
26
+ references
27
+ end
28
+
29
+ def self.convert_repo_local_references_to_absolute_ones_in(message, username:, repo_name:)
30
+ message.gsub(REPO_LOCAL_REFERENCE_PATTERN) do
31
+ reference = create_referece_from(Regexp.last_match)
32
+ raise unless reference
33
+ "#{username}/#{repo_name}##{reference.issue_number}"
34
+ end
35
+ end
36
+
37
+ def self.create_referece_from(match)
38
+ if match[:repo_local_reference]
39
+ RepositoryLocalReference.new(issue_number: Integer(match[:issue_number]))
40
+ elsif match[:absolute_reference]
41
+ AbsoluteReference.new(
42
+ username: match[:username],
43
+ repo_name: match[:repo_name],
44
+ issue_number: Integer(match[:issue_number])
45
+ )
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ruby-progressbar'
4
+
5
+ class RepositoryMerger
6
+ class Logger
7
+ attr_reader :output
8
+
9
+ def initialize(output, verbose:)
10
+ @output = output
11
+ @verbose = verbose
12
+ end
13
+
14
+ def info(message, title: false)
15
+ log(message, title: title)
16
+ end
17
+
18
+ def verbose(message, title: false)
19
+ return unless verbose?
20
+ log(message, title: title)
21
+ end
22
+
23
+ def verbose?
24
+ @verbose
25
+ end
26
+
27
+ def start_tracking_progress_for(plural_noun, total:, title: nil)
28
+ format = " %c/%C #{plural_noun} |%w>%i| %e "
29
+ format = " %t#{format}" if title
30
+
31
+ @progressbar = ProgressBar.create(
32
+ format: format,
33
+ output: output,
34
+ title: title,
35
+ total: total
36
+ )
37
+ end
38
+
39
+ def increment_progress
40
+ progressbar.increment
41
+ end
42
+
43
+ private
44
+
45
+ def log(message, title:)
46
+ if title
47
+ message = "#{'=' * 10} #{message} #{'=' * 10}"
48
+ end
49
+
50
+ progressbar.log(message)
51
+ end
52
+
53
+ def progressbar
54
+ @progressbar ||= ProgressBar.create
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'repository'
4
+ require 'rugged'
5
+
6
+ class RepositoryMerger
7
+ class MonoRepository < Repository
8
+ def initialize(_path)
9
+ super
10
+ configure
11
+ end
12
+
13
+ def import_commit(original_commit, new_parents:, subdirectory:, message: nil)
14
+ checkout_contents_if_needed(new_parents.first) unless new_parents.empty?
15
+
16
+ stage_contents_of(original_commit, subdirectory: subdirectory)
17
+
18
+ create_commit_with_metadata_of(
19
+ original_commit,
20
+ new_parent_ids: new_parents.map(&:id),
21
+ message: message
22
+ )
23
+ end
24
+
25
+ def import_tag(original_tag, new_commit_id:, new_tag_name:)
26
+ # This is to suppress warning messages
27
+ # `warning: Using the last argument as keyword parameters is deprecated`
28
+ # from rugged gem until a fixed version is released.
29
+ # https://github.com/libgit2/rugged/pull/840
30
+ if original_tag.annotation
31
+ rugged_repo.tags.create(new_tag_name, new_commit_id, **original_tag.annotation)
32
+ else
33
+ rugged_repo.tags.create(new_tag_name, new_commit_id)
34
+ end
35
+
36
+ tag_for(new_tag_name)
37
+ end
38
+
39
+ def create_or_update_branch(branch_name, commit_id:)
40
+ if branch_for(branch_name)
41
+ # `rugged_repo.branches.create` with master branch fails with error:
42
+ # cannot force update branch 'master' as it is the current HEAD of the repository. (Rugged::ReferenceError)
43
+ rugged_repo.references.update("refs/heads/#{branch_name}", commit_id)
44
+ else
45
+ rugged_repo.branches.create(branch_name, commit_id)
46
+ end
47
+ end
48
+
49
+ private
50
+
51
+ def configure
52
+ rugged_repo.config['core.ignorecase'] = false
53
+ end
54
+
55
+ attr_accessor :current_checked_out_commit_id
56
+
57
+ def checkout_contents_if_needed(commit)
58
+ return if empty_commit_for_debug?
59
+ return if commit.id == current_checked_out_commit_id
60
+ commit.checkout_contents
61
+ @current_checked_out_commit_id = commit.id
62
+ end
63
+
64
+ def stage_contents_of(original_commit, subdirectory:)
65
+ return if empty_commit_for_debug?
66
+
67
+ original_commit.extract_contents_into(File.join(path, subdirectory))
68
+
69
+ # First, track (almost) all files (including removed files) with .gitignore rules
70
+ rugged_repo.index.add_all(subdirectory)
71
+
72
+ # Then specify all the file paths explicily to track files
73
+ # that matches .gitignore but is tracked in the original commit.
74
+ # We cannot use Rugged::Index#add_all with :force option
75
+ # since it has a bug where it raises strange error when adding files in a ignored directory.
76
+ # https://github.com/libgit2/libgit2/issues/4377
77
+ original_commit.files.each do |original_file_path|
78
+ monorepo_file_path = File.join(subdirectory, original_file_path)
79
+ rugged_repo.index.add(monorepo_file_path)
80
+ end
81
+
82
+ rugged_repo.index.write
83
+ end
84
+
85
+ def create_commit_with_metadata_of(original_commit, new_parent_ids:, message:)
86
+ original_rugged_commit = original_commit.rugged_commit
87
+
88
+ new_commit_id = Rugged::Commit.create(rugged_repo, {
89
+ message: message || original_rugged_commit.message,
90
+ committer: original_rugged_commit.committer,
91
+ author: original_rugged_commit.author,
92
+ tree: empty_commit_for_debug? ? empty_tree.oid : rugged_repo.index.write_tree,
93
+ parents: new_parent_ids
94
+ })
95
+
96
+ @current_checked_out_commit_id = new_commit_id
97
+
98
+ commit_for(new_commit_id)
99
+ end
100
+
101
+ def empty_commit_for_debug?
102
+ ENV['REPO_MERGER_IMPORT_AS_EMPTY_COMMITS']
103
+ end
104
+
105
+ def empty_tree
106
+ @empty_tree ||= Rugged::Tree.empty(rugged_repo)
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'commit'
4
+
5
+ require 'rugged'
6
+ require 'set'
7
+
8
+ class RepositoryMerger
9
+ module Reference
10
+ def target_commit
11
+ raise NotImplementedError
12
+ end
13
+
14
+ def topologically_ordered_commits_from_root
15
+ @topologically_ordered_commits_from_root ||= begin
16
+ walker = Rugged::Walker.new(repo.rugged_repo)
17
+ walker.sorting(Rugged::SORT_TOPO | Rugged::SORT_REVERSE)
18
+ walker.push(target_commit.id)
19
+ walker.map { |rugged_commit| Commit.new(rugged_commit, repo) }.freeze
20
+ end
21
+ end
22
+
23
+ def mainline?(commit)
24
+ mainline_commit_ids.include?(commit.id)
25
+ end
26
+
27
+ def mainline_commit_ids
28
+ @mainline_commit_ids ||= Set.new.tap do |mainline_commit_ids|
29
+ commit = target_commit
30
+
31
+ while commit
32
+ mainline_commit_ids << commit.id
33
+ commit = commit.parents.first
34
+ end
35
+ end.freeze
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'branch'
4
+ require_relative 'commit'
5
+ require_relative 'tag'
6
+
7
+ require 'rugged'
8
+
9
+ class RepositoryMerger
10
+ class Repository
11
+ attr_reader :path
12
+
13
+ def initialize(path)
14
+ @path = File.expand_path(path)
15
+ end
16
+
17
+ def rugged_repo
18
+ @rugged_repo ||= Rugged::Repository.new(path)
19
+ end
20
+
21
+ def ==(other)
22
+ path == other.path
23
+ end
24
+
25
+ alias eql? ==
26
+
27
+ def hash
28
+ path.hash
29
+ end
30
+
31
+ def name
32
+ File.basename(path)
33
+ end
34
+
35
+ def branch_for(name)
36
+ rugged_branch = rugged_repo.branches[name]
37
+ return nil unless rugged_branch
38
+ Branch.new(rugged_branch, self)
39
+ end
40
+
41
+ def branches
42
+ rugged_repo.branches.map do |rugged_branch|
43
+ Branch.new(rugged_branch, self)
44
+ end
45
+ end
46
+
47
+ def tag_for(name)
48
+ rugged_tag = rugged_repo.tags[name]
49
+ return nil unless rugged_tag
50
+ Tag.new(rugged_tag, self)
51
+ end
52
+
53
+ def tags
54
+ rugged_repo.tags.map do |rugged_tag|
55
+ Tag.new(rugged_tag, self)
56
+ end
57
+ end
58
+
59
+ def commit_for(commit_id)
60
+ object = rugged_repo.lookup(commit_id)
61
+ return nil unless object.is_a?(Rugged::Commit)
62
+ Commit.new(object, self)
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'commit_map'
4
+ require 'json'
5
+
6
+ class RepositoryMerger
7
+ class RepositoryCommitMap
8
+ include CommitMap
9
+
10
+ CannotSaveWithoutPathError = Class.new(StandardError)
11
+
12
+ attr_reader :path, :monorepo
13
+
14
+ def initialize(monorepo:, path: nil)
15
+ @path = path
16
+ @monorepo = monorepo
17
+ end
18
+
19
+ def map
20
+ @map ||=
21
+ if path && File.exist?(path)
22
+ json = File.read(path)
23
+ JSON.parse(json)
24
+ else
25
+ {}
26
+ end
27
+ end
28
+
29
+ def merge!(branch_local_commit_map)
30
+ branch_local_commit_map.map.each do |original_commit_key, monorepo_commit_id|
31
+ map[original_commit_key] ||= []
32
+ next if map[original_commit_key].include?(monorepo_commit_id)
33
+ map[original_commit_key] << monorepo_commit_id
34
+ end
35
+ end
36
+
37
+ def save
38
+ raise CannotSaveWithoutPathError unless path
39
+ json = JSON.pretty_generate(map)
40
+ File.write(path, json)
41
+ end
42
+
43
+ def monorepo_commits_for(original_commit)
44
+ commit_ids = monorepo_commit_ids_for(original_commit)
45
+ commit_ids.map { |id| monorepo.commit_for(id) }
46
+ end
47
+
48
+ def monorepo_commit_ids_for(original_commit)
49
+ map[original_commit_key(original_commit)] || []
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'reference'
4
+
5
+ class RepositoryMerger
6
+ Tag = Struct.new(:rugged_tag, :repo) do
7
+ include Reference
8
+
9
+ def name
10
+ rugged_tag.name
11
+ end
12
+
13
+ def target_commit
14
+ commit_id =
15
+ if rugged_tag.annotated?
16
+ rugged_tag.annotation.target_id
17
+ else
18
+ rugged_tag.target_id
19
+ end
20
+
21
+ repo.commit_for(commit_id)
22
+ end
23
+
24
+ def annotation
25
+ if rugged_tag.annotated?
26
+ { tagger: rugged_tag.annotation.tagger, message: rugged_tag.annotation.message }
27
+ else
28
+ nil
29
+ end
30
+ end
31
+
32
+ def id
33
+ name
34
+ end
35
+
36
+ def revision_id
37
+ name
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ class RepositoryMerger
4
+ class TagImporter
5
+ attr_reader :original_tags, :configuration, :tag_name_conversion
6
+
7
+ def initialize(tags, configuration:, tag_name_conversion:)
8
+ @original_tags = tags
9
+ @configuration = configuration
10
+ @tag_name_conversion = tag_name_conversion
11
+ end
12
+
13
+ def run
14
+ logger.verbose('Importing tags', title: true)
15
+ logger.start_tracking_progress_for('tags', total: original_tags.size)
16
+
17
+ original_tags.each do |original_tag|
18
+ process_tag(original_tag)
19
+ end
20
+ end
21
+
22
+ def process_tag(original_tag)
23
+ logger.verbose " [#{original_tag.repo.name}] #{original_tag.name}"
24
+
25
+ new_tag_name = tag_name_conversion.call(original_tag)
26
+
27
+ if new_tag_name
28
+ if monorepo.tag_for(new_tag_name)
29
+ logger.verbose " Already imported as #{new_tag_name.inspect}. Skipping."
30
+ else
31
+ new_tag = import_tag_into_monorepo(original_tag, new_tag_name: new_tag_name)
32
+ logger.verbose " Imported as #{new_tag_name.inspect}." if new_tag
33
+ end
34
+ else
35
+ logger.verbose ' Not for import. Skipping.'
36
+ end
37
+
38
+ logger.increment_progress
39
+ end
40
+
41
+ def import_tag_into_monorepo(original_tag, new_tag_name:)
42
+ target_commit_id_in_monorepo = monorepo_commit_id_for(original_tag)
43
+
44
+ unless target_commit_id_in_monorepo
45
+ commit_description = "#{original_tag.target_commit.message.chomp.inspect} (#{original_tag.target_commit.abbreviated_id}) in #{original_tag.repo.name}"
46
+ logger.verbose " The target commit #{commit_description} is not yet imported. Skipping."
47
+ return nil
48
+ end
49
+
50
+ monorepo.import_tag(
51
+ original_tag,
52
+ new_commit_id: target_commit_id_in_monorepo,
53
+ new_tag_name: new_tag_name
54
+ )
55
+ end
56
+
57
+ def monorepo_commit_id_for(original_tag)
58
+ # TODO: Choosing the first one might be wrong
59
+ configuration.repo_commit_map.monorepo_commit_ids_for(original_tag.target_commit).first
60
+ end
61
+
62
+ def monorepo
63
+ configuration.monorepo
64
+ end
65
+
66
+ def logger
67
+ configuration.logger
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ class RepositoryMerger
4
+ module Version
5
+ MAJOR = 0
6
+ MINOR = 1
7
+ PATCH = 0
8
+
9
+ def self.to_s
10
+ [MAJOR, MINOR, PATCH].join('.')
11
+ end
12
+ end
13
+ end