repository_merger 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,151 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'branch_local_commit_map'
4
+
5
+ class RepositoryMerger
6
+ class CommitHistoryMerger
7
+ attr_reader :configuration, :original_references, :commit_message_conversion, :progress_title
8
+ attr_accessor :wants_to_abort
9
+
10
+ def initialize(references, configuration:, commit_message_conversion: nil, progress_title: nil)
11
+ @original_references = references
12
+ @configuration = configuration
13
+ @commit_message_conversion = commit_message_conversion
14
+ @progress_title = progress_title
15
+ end
16
+
17
+ def run
18
+ logger.verbose("Importing commits from #{original_references.map { |ref| "#{ref.name} (#{ref.repo.name})" }.join(', ')}", title: true)
19
+ logger.start_tracking_progress_for('commits', total: unprocessed_original_commit_queue.size, title: progress_title)
20
+
21
+ while (original_commit = unprocessed_original_commit_queue.next)
22
+ process_commit(original_commit)
23
+ break if wants_to_abort
24
+ end
25
+
26
+ monorepo_head_commit
27
+ ensure
28
+ repo_commit_map.merge!(branch_local_commit_map)
29
+ repo_commit_map.save if repo_commit_map.path
30
+ end
31
+
32
+ private
33
+
34
+ attr_reader :monorepo_head_commit
35
+
36
+ def process_commit(original_commit)
37
+ logger.verbose " #{original_commit.commit_time} [#{original_commit.repo.name}] #{original_commit.message.each_line.first}"
38
+
39
+ if (monorepo_commit = already_imported_monorepo_commit_for(original_commit))
40
+ logger.verbose " Already imported as #{monorepo_commit.abbreviated_id}. Skipping."
41
+ else
42
+ monorepo_commit = import_commit_into_monorepo(original_commit)
43
+ logger.verbose " Created commit #{monorepo_commit.abbreviated_id}."
44
+ end
45
+
46
+ branch_local_commit_map.register(
47
+ monorepo_commit: monorepo_commit,
48
+ original_commit: original_commit
49
+ )
50
+
51
+ if mainline?(original_commit)
52
+ @monorepo_head_commit = monorepo_commit
53
+ end
54
+
55
+ logger.increment_progress
56
+ end
57
+
58
+ def already_imported_monorepo_commit_for(original_commit)
59
+ monorepo_commits = repo_commit_map.monorepo_commits_for(original_commit)
60
+
61
+ monorepo_commits.find do |monorepo_commit|
62
+ monorepo_commit.parents == parent_commits_in_monorepo_for(original_commit)
63
+ end
64
+ end
65
+
66
+ def import_commit_into_monorepo(original_commit)
67
+ parent_commits_in_monorepo = parent_commits_in_monorepo_for(original_commit)
68
+
69
+ monorepo.import_commit(
70
+ original_commit,
71
+ new_parents: parent_commits_in_monorepo,
72
+ subdirectory: original_commit.repo.name,
73
+ message: commit_message_from(original_commit)
74
+ )
75
+ end
76
+
77
+ def parent_commits_in_monorepo_for(original_commit)
78
+ if original_commit.root?
79
+ return [monorepo_head_commit].compact
80
+ end
81
+
82
+ original_commit.parents.map do |original_parent_commit|
83
+ if mainline?(original_commit) && mainline?(original_parent_commit)
84
+ monorepo_head_commit
85
+ else
86
+ branch_local_commit_map.monorepo_commit_for(original_parent_commit)
87
+ end
88
+ end
89
+ end
90
+
91
+ def mainline?(original_commit)
92
+ original_reference = original_references_by_repo[original_commit.repo]
93
+ original_reference.mainline?(original_commit)
94
+ end
95
+
96
+ def commit_message_from(original_commit)
97
+ if commit_message_conversion
98
+ commit_message_conversion.call(original_commit)
99
+ else
100
+ original_commit.message
101
+ end
102
+ end
103
+
104
+ def unprocessed_original_commit_queue
105
+ @unprocessed_original_commit_queue ||= OriginalCommitQueue.new(original_references)
106
+ end
107
+
108
+ def original_references_by_repo
109
+ @original_references_by_repo ||= original_references.each_with_object({}) do |original_reference, hash|
110
+ hash[original_reference.repo] = original_reference
111
+ end
112
+ end
113
+
114
+ def monorepo
115
+ configuration.monorepo
116
+ end
117
+
118
+ def repo_commit_map
119
+ configuration.repo_commit_map
120
+ end
121
+
122
+ def branch_local_commit_map
123
+ @branch_local_commit_map ||= BranchLocalCommitMap.new(monorepo: monorepo)
124
+ end
125
+
126
+ def logger
127
+ configuration.logger
128
+ end
129
+
130
+ class OriginalCommitQueue
131
+ attr_reader :references
132
+
133
+ def initialize(references)
134
+ @references = references
135
+ end
136
+
137
+ def next
138
+ queue_having_earliest_commit = commit_queues.reject(&:empty?).min_by { |queue| queue.first.commit_time }
139
+ queue_having_earliest_commit&.shift
140
+ end
141
+
142
+ def size
143
+ commit_queues.sum(&:size)
144
+ end
145
+
146
+ def commit_queues
147
+ @commit_queues ||= references.map(&:topologically_ordered_commits_from_root).map(&:dup)
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ class RepositoryMerger
4
+ module CommitMap
5
+ def original_commit_key(commit)
6
+ "#{commit.id}@#{commit.repo.name}"
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'logger'
4
+ require_relative 'mono_repository'
5
+ require_relative 'repository'
6
+ require_relative 'repository_commit_map'
7
+
8
+ class RepositoryMerger
9
+ class Configuration
10
+ attr_reader :original_repo_paths, :monorepo_path, :commit_map_file_path, :log_output, :verbose_logging
11
+
12
+ def initialize(original_repo_paths:, monorepo_path:, commit_map_file_path: 'commit_map.json', log_output: $stdout, verbose_logging: false)
13
+ @original_repo_paths = original_repo_paths
14
+ @monorepo_path = monorepo_path
15
+ @commit_map_file_path = commit_map_file_path
16
+ @log_output = log_output
17
+ @verbose_logging = verbose_logging
18
+ end
19
+
20
+ def original_repos
21
+ @original_repos ||= original_repo_paths.map { |path| Repository.new(path) }
22
+ end
23
+
24
+ def monorepo
25
+ @monorepo ||= MonoRepository.new(monorepo_path)
26
+ end
27
+
28
+ def repo_commit_map
29
+ @repo_commit_map ||= RepositoryCommitMap.new(path: commit_map_file_path, monorepo: monorepo)
30
+ end
31
+
32
+ def logger
33
+ @logger ||= Logger.new(log_output, verbose: verbose_logging)
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ class RepositoryMerger
4
+ module GitHubIssueReference
5
+ RepositoryLocalReference = Struct.new(:issue_number, keyword_init: true)
6
+ AbsoluteReference = Struct.new(:username, :repo_name, :issue_number, keyword_init: true)
7
+
8
+ # https://github.com/isiahmeadows/github-limits
9
+ USERNAME_PATTERN = /(?<username>[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38})/i.freeze
10
+ REPOSITORY_NAME_PATTERN = /(?<repo_name>[a-z0-9.\-_]{1,100})/i.freeze
11
+ ISSUE_NUMBER_PATTERN = /(?<issue_number>\d{1,5})/.freeze # Technically max is 1073741824 but it won't exist in real life
12
+
13
+ # https://docs.github.com/en/github/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls#issues-and-pull-requests
14
+ REPO_LOCAL_REFERENCE_PATTERN = /(?<!\w)(?<repo_local_reference>(?:#|GH-)#{ISSUE_NUMBER_PATTERN})(?!\w)/i.freeze
15
+ ABSOLUTE_REFERENCE_PATTERN = /(?<!\w)(?<absolute_reference>#{USERNAME_PATTERN}\/#{REPOSITORY_NAME_PATTERN}##{ISSUE_NUMBER_PATTERN})(?!\w)/.freeze
16
+ REFERENCE_PATTERN = /(?:#{REPO_LOCAL_REFERENCE_PATTERN}|#{ABSOLUTE_REFERENCE_PATTERN})/.freeze
17
+
18
+ def self.extract_references_from(message)
19
+ references = []
20
+
21
+ message.scan(REFERENCE_PATTERN) do
22
+ reference = create_referece_from(Regexp.last_match)
23
+ references << reference if reference
24
+ end
25
+
26
+ references
27
+ end
28
+
29
+ def self.convert_repo_local_references_to_absolute_ones_in(message, username:, repo_name:)
30
+ message.gsub(REPO_LOCAL_REFERENCE_PATTERN) do
31
+ reference = create_referece_from(Regexp.last_match)
32
+ raise unless reference
33
+ "#{username}/#{repo_name}##{reference.issue_number}"
34
+ end
35
+ end
36
+
37
+ def self.create_referece_from(match)
38
+ if match[:repo_local_reference]
39
+ RepositoryLocalReference.new(issue_number: Integer(match[:issue_number]))
40
+ elsif match[:absolute_reference]
41
+ AbsoluteReference.new(
42
+ username: match[:username],
43
+ repo_name: match[:repo_name],
44
+ issue_number: Integer(match[:issue_number])
45
+ )
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ruby-progressbar'
4
+
5
+ class RepositoryMerger
6
+ class Logger
7
+ attr_reader :output
8
+
9
+ def initialize(output, verbose:)
10
+ @output = output
11
+ @verbose = verbose
12
+ end
13
+
14
+ def info(message, title: false)
15
+ log(message, title: title)
16
+ end
17
+
18
+ def verbose(message, title: false)
19
+ return unless verbose?
20
+ log(message, title: title)
21
+ end
22
+
23
+ def verbose?
24
+ @verbose
25
+ end
26
+
27
+ def start_tracking_progress_for(plural_noun, total:, title: nil)
28
+ format = " %c/%C #{plural_noun} |%w>%i| %e "
29
+ format = " %t#{format}" if title
30
+
31
+ @progressbar = ProgressBar.create(
32
+ format: format,
33
+ output: output,
34
+ title: title,
35
+ total: total
36
+ )
37
+ end
38
+
39
+ def increment_progress
40
+ progressbar.increment
41
+ end
42
+
43
+ private
44
+
45
+ def log(message, title:)
46
+ if title
47
+ message = "#{'=' * 10} #{message} #{'=' * 10}"
48
+ end
49
+
50
+ progressbar.log(message)
51
+ end
52
+
53
+ def progressbar
54
+ @progressbar ||= ProgressBar.create
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'repository'
4
+ require 'rugged'
5
+
6
+ class RepositoryMerger
7
+ class MonoRepository < Repository
8
+ def initialize(_path)
9
+ super
10
+ configure
11
+ end
12
+
13
+ def import_commit(original_commit, new_parents:, subdirectory:, message: nil)
14
+ checkout_contents_if_needed(new_parents.first) unless new_parents.empty?
15
+
16
+ stage_contents_of(original_commit, subdirectory: subdirectory)
17
+
18
+ create_commit_with_metadata_of(
19
+ original_commit,
20
+ new_parent_ids: new_parents.map(&:id),
21
+ message: message
22
+ )
23
+ end
24
+
25
+ def import_tag(original_tag, new_commit_id:, new_tag_name:)
26
+ # This is to suppress warning messages
27
+ # `warning: Using the last argument as keyword parameters is deprecated`
28
+ # from rugged gem until a fixed version is released.
29
+ # https://github.com/libgit2/rugged/pull/840
30
+ if original_tag.annotation
31
+ rugged_repo.tags.create(new_tag_name, new_commit_id, **original_tag.annotation)
32
+ else
33
+ rugged_repo.tags.create(new_tag_name, new_commit_id)
34
+ end
35
+
36
+ tag_for(new_tag_name)
37
+ end
38
+
39
+ def create_or_update_branch(branch_name, commit_id:)
40
+ if branch_for(branch_name)
41
+ # `rugged_repo.branches.create` with master branch fails with error:
42
+ # cannot force update branch 'master' as it is the current HEAD of the repository. (Rugged::ReferenceError)
43
+ rugged_repo.references.update("refs/heads/#{branch_name}", commit_id)
44
+ else
45
+ rugged_repo.branches.create(branch_name, commit_id)
46
+ end
47
+ end
48
+
49
+ private
50
+
51
+ def configure
52
+ rugged_repo.config['core.ignorecase'] = false
53
+ end
54
+
55
+ attr_accessor :current_checked_out_commit_id
56
+
57
+ def checkout_contents_if_needed(commit)
58
+ return if empty_commit_for_debug?
59
+ return if commit.id == current_checked_out_commit_id
60
+ commit.checkout_contents
61
+ @current_checked_out_commit_id = commit.id
62
+ end
63
+
64
+ def stage_contents_of(original_commit, subdirectory:)
65
+ return if empty_commit_for_debug?
66
+
67
+ original_commit.extract_contents_into(File.join(path, subdirectory))
68
+
69
+ # First, track (almost) all files (including removed files) with .gitignore rules
70
+ rugged_repo.index.add_all(subdirectory)
71
+
72
+ # Then specify all the file paths explicily to track files
73
+ # that matches .gitignore but is tracked in the original commit.
74
+ # We cannot use Rugged::Index#add_all with :force option
75
+ # since it has a bug where it raises strange error when adding files in a ignored directory.
76
+ # https://github.com/libgit2/libgit2/issues/4377
77
+ original_commit.files.each do |original_file_path|
78
+ monorepo_file_path = File.join(subdirectory, original_file_path)
79
+ rugged_repo.index.add(monorepo_file_path)
80
+ end
81
+
82
+ rugged_repo.index.write
83
+ end
84
+
85
+ def create_commit_with_metadata_of(original_commit, new_parent_ids:, message:)
86
+ original_rugged_commit = original_commit.rugged_commit
87
+
88
+ new_commit_id = Rugged::Commit.create(rugged_repo, {
89
+ message: message || original_rugged_commit.message,
90
+ committer: original_rugged_commit.committer,
91
+ author: original_rugged_commit.author,
92
+ tree: empty_commit_for_debug? ? empty_tree.oid : rugged_repo.index.write_tree,
93
+ parents: new_parent_ids
94
+ })
95
+
96
+ @current_checked_out_commit_id = new_commit_id
97
+
98
+ commit_for(new_commit_id)
99
+ end
100
+
101
+ def empty_commit_for_debug?
102
+ ENV['REPO_MERGER_IMPORT_AS_EMPTY_COMMITS']
103
+ end
104
+
105
+ def empty_tree
106
+ @empty_tree ||= Rugged::Tree.empty(rugged_repo)
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'commit'
4
+
5
+ require 'rugged'
6
+ require 'set'
7
+
8
+ class RepositoryMerger
9
+ module Reference
10
+ def target_commit
11
+ raise NotImplementedError
12
+ end
13
+
14
+ def topologically_ordered_commits_from_root
15
+ @topologically_ordered_commits_from_root ||= begin
16
+ walker = Rugged::Walker.new(repo.rugged_repo)
17
+ walker.sorting(Rugged::SORT_TOPO | Rugged::SORT_REVERSE)
18
+ walker.push(target_commit.id)
19
+ walker.map { |rugged_commit| Commit.new(rugged_commit, repo) }.freeze
20
+ end
21
+ end
22
+
23
+ def mainline?(commit)
24
+ mainline_commit_ids.include?(commit.id)
25
+ end
26
+
27
+ def mainline_commit_ids
28
+ @mainline_commit_ids ||= Set.new.tap do |mainline_commit_ids|
29
+ commit = target_commit
30
+
31
+ while commit
32
+ mainline_commit_ids << commit.id
33
+ commit = commit.parents.first
34
+ end
35
+ end.freeze
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,65 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'branch'
4
+ require_relative 'commit'
5
+ require_relative 'tag'
6
+
7
+ require 'rugged'
8
+
9
+ class RepositoryMerger
10
+ class Repository
11
+ attr_reader :path
12
+
13
+ def initialize(path)
14
+ @path = File.expand_path(path)
15
+ end
16
+
17
+ def rugged_repo
18
+ @rugged_repo ||= Rugged::Repository.new(path)
19
+ end
20
+
21
+ def ==(other)
22
+ path == other.path
23
+ end
24
+
25
+ alias eql? ==
26
+
27
+ def hash
28
+ path.hash
29
+ end
30
+
31
+ def name
32
+ File.basename(path)
33
+ end
34
+
35
+ def branch_for(name)
36
+ rugged_branch = rugged_repo.branches[name]
37
+ return nil unless rugged_branch
38
+ Branch.new(rugged_branch, self)
39
+ end
40
+
41
+ def branches
42
+ rugged_repo.branches.map do |rugged_branch|
43
+ Branch.new(rugged_branch, self)
44
+ end
45
+ end
46
+
47
+ def tag_for(name)
48
+ rugged_tag = rugged_repo.tags[name]
49
+ return nil unless rugged_tag
50
+ Tag.new(rugged_tag, self)
51
+ end
52
+
53
+ def tags
54
+ rugged_repo.tags.map do |rugged_tag|
55
+ Tag.new(rugged_tag, self)
56
+ end
57
+ end
58
+
59
+ def commit_for(commit_id)
60
+ object = rugged_repo.lookup(commit_id)
61
+ return nil unless object.is_a?(Rugged::Commit)
62
+ Commit.new(object, self)
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'commit_map'
4
+ require 'json'
5
+
6
+ class RepositoryMerger
7
+ class RepositoryCommitMap
8
+ include CommitMap
9
+
10
+ CannotSaveWithoutPathError = Class.new(StandardError)
11
+
12
+ attr_reader :path, :monorepo
13
+
14
+ def initialize(monorepo:, path: nil)
15
+ @path = path
16
+ @monorepo = monorepo
17
+ end
18
+
19
+ def map
20
+ @map ||=
21
+ if path && File.exist?(path)
22
+ json = File.read(path)
23
+ JSON.parse(json)
24
+ else
25
+ {}
26
+ end
27
+ end
28
+
29
+ def merge!(branch_local_commit_map)
30
+ branch_local_commit_map.map.each do |original_commit_key, monorepo_commit_id|
31
+ map[original_commit_key] ||= []
32
+ next if map[original_commit_key].include?(monorepo_commit_id)
33
+ map[original_commit_key] << monorepo_commit_id
34
+ end
35
+ end
36
+
37
+ def save
38
+ raise CannotSaveWithoutPathError unless path
39
+ json = JSON.pretty_generate(map)
40
+ File.write(path, json)
41
+ end
42
+
43
+ def monorepo_commits_for(original_commit)
44
+ commit_ids = monorepo_commit_ids_for(original_commit)
45
+ commit_ids.map { |id| monorepo.commit_for(id) }
46
+ end
47
+
48
+ def monorepo_commit_ids_for(original_commit)
49
+ map[original_commit_key(original_commit)] || []
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'reference'
4
+
5
+ class RepositoryMerger
6
+ Tag = Struct.new(:rugged_tag, :repo) do
7
+ include Reference
8
+
9
+ def name
10
+ rugged_tag.name
11
+ end
12
+
13
+ def target_commit
14
+ commit_id =
15
+ if rugged_tag.annotated?
16
+ rugged_tag.annotation.target_id
17
+ else
18
+ rugged_tag.target_id
19
+ end
20
+
21
+ repo.commit_for(commit_id)
22
+ end
23
+
24
+ def annotation
25
+ if rugged_tag.annotated?
26
+ { tagger: rugged_tag.annotation.tagger, message: rugged_tag.annotation.message }
27
+ else
28
+ nil
29
+ end
30
+ end
31
+
32
+ def id
33
+ name
34
+ end
35
+
36
+ def revision_id
37
+ name
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ class RepositoryMerger
4
+ class TagImporter
5
+ attr_reader :original_tags, :configuration, :tag_name_conversion
6
+
7
+ def initialize(tags, configuration:, tag_name_conversion:)
8
+ @original_tags = tags
9
+ @configuration = configuration
10
+ @tag_name_conversion = tag_name_conversion
11
+ end
12
+
13
+ def run
14
+ logger.verbose('Importing tags', title: true)
15
+ logger.start_tracking_progress_for('tags', total: original_tags.size)
16
+
17
+ original_tags.each do |original_tag|
18
+ process_tag(original_tag)
19
+ end
20
+ end
21
+
22
+ def process_tag(original_tag)
23
+ logger.verbose " [#{original_tag.repo.name}] #{original_tag.name}"
24
+
25
+ new_tag_name = tag_name_conversion.call(original_tag)
26
+
27
+ if new_tag_name
28
+ if monorepo.tag_for(new_tag_name)
29
+ logger.verbose " Already imported as #{new_tag_name.inspect}. Skipping."
30
+ else
31
+ new_tag = import_tag_into_monorepo(original_tag, new_tag_name: new_tag_name)
32
+ logger.verbose " Imported as #{new_tag_name.inspect}." if new_tag
33
+ end
34
+ else
35
+ logger.verbose ' Not for import. Skipping.'
36
+ end
37
+
38
+ logger.increment_progress
39
+ end
40
+
41
+ def import_tag_into_monorepo(original_tag, new_tag_name:)
42
+ target_commit_id_in_monorepo = monorepo_commit_id_for(original_tag)
43
+
44
+ unless target_commit_id_in_monorepo
45
+ commit_description = "#{original_tag.target_commit.message.chomp.inspect} (#{original_tag.target_commit.abbreviated_id}) in #{original_tag.repo.name}"
46
+ logger.verbose " The target commit #{commit_description} is not yet imported. Skipping."
47
+ return nil
48
+ end
49
+
50
+ monorepo.import_tag(
51
+ original_tag,
52
+ new_commit_id: target_commit_id_in_monorepo,
53
+ new_tag_name: new_tag_name
54
+ )
55
+ end
56
+
57
+ def monorepo_commit_id_for(original_tag)
58
+ # TODO: Choosing the first one might be wrong
59
+ configuration.repo_commit_map.monorepo_commit_ids_for(original_tag.target_commit).first
60
+ end
61
+
62
+ def monorepo
63
+ configuration.monorepo
64
+ end
65
+
66
+ def logger
67
+ configuration.logger
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ class RepositoryMerger
4
+ module Version
5
+ MAJOR = 0
6
+ MINOR = 1
7
+ PATCH = 0
8
+
9
+ def self.to_s
10
+ [MAJOR, MINOR, PATCH].join('.')
11
+ end
12
+ end
13
+ end