git-fastclone 0.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of git-fastclone might be problematic. Click here for more details.

checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3feed5ccee58d01de0a1703664806f92309c3520
4
+ data.tar.gz: 9129d253e669fdbe2bd97d6ee11cfe832437b8a2
5
+ SHA512:
6
+ metadata.gz: ce5daa467bffaf2a6c4b7118f7cb17adea720f130842603a1adb4050beb3110df4f6c9df64811f0edba82fd4a9f77895f7050a89980c120aed8af149c401feac
7
+ data.tar.gz: ba417ddff24e96b332b445c6dec39fb62c1363d9f5e8ec471ba59bf3ba0c6df0099f0cac7d2773c197d20bb616d19f4d679f278f873f1c9860e8d014db222203
data/bin/git-fastclone ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'git-fastclone'
4
+
5
+ GitFastClone.new.run
data/lib/execution.rb ADDED
@@ -0,0 +1,71 @@
1
+ require 'open3'
2
+
3
+ # Wrapper around open3.popen2e which fails on error
4
+ #
5
+ # We emulate open3.capture2e with the following changes in behavior:
6
+ # 1) The command is printed to stdout before execution.
7
+ # 2) Attempts to use the shell implicitly are blocked.
8
+ # 3) Nonzero return codes result in the process exiting.
9
+ #
10
+ # If you're looking for more process/stream control read the spawn documentation, and pass
11
+ # options directly here
12
+ def fail_on_error (*cmd, **opts)
13
+ # puts "Running Command: \n#{debug_print_cmd_list([cmd])}\n"
14
+ shell_safe(cmd)
15
+ output, status = Open3.capture2(*cmd, opts)
16
+ exit_on_status(output, status)
17
+ end
18
+
19
+ # Look at a cmd list intended for spawn.
20
+ # determine if spawn will call the shell implicitly, fail in that case.
21
+ def shell_safe (cmd)
22
+ # env and opts in the command spec both aren't of type string.
23
+ # If you're only passing one string, spawn is going to launch a shell.
24
+ if cmd.select{ |element| element.class == String }.length == 1
25
+ puts "You tried to use sqiosbuild to call the shell implicitly. Please don't."
26
+ puts "Think of the children."
27
+ puts "Think of shellshock."
28
+ puts "Please don't. Not ever."
29
+ exit 1
30
+ end
31
+ end
32
+
33
+ def debug_print_cmd_list(cmd_list)
34
+ # Take a list of command argument lists like you'd sent to open3.pipeline or fail_on_error_pipe and
35
+ # print out a string that would do the same thing when entered at the shell.
36
+ #
37
+ # This is a converter from our internal representation of commands to a subset of bash that
38
+ # can be executed directly.
39
+ #
40
+ # Note this has problems if you specify env or opts
41
+ # TODO: make this remove those command parts
42
+ "\"" +
43
+ cmd_list.map { |cmd|
44
+ cmd.map { |arg|
45
+ arg.gsub("\"", "\\\"") # Escape all double quotes in command arguments
46
+ }.join("\" \"") # Fully quote all command parts. We add quotes to the beginning and end too.
47
+ }.join("\" | \"") + # Pipe commands to one another.
48
+ "\""
49
+ end
50
+
51
+ # If any of the statuses are bad, exits with the
52
+ # return code of the first one.
53
+ #
54
+ # Otherwise returns first argument (output)
55
+ def exit_on_status (output, status)
56
+ # Do nothing for proper statuses
57
+ if status.exited? && status.exitstatus == 0
58
+ return output
59
+ end
60
+
61
+ # If we exited nonzero or abnormally, print debugging info
62
+ # and explode.
63
+ if status.exited?
64
+ puts "Return code was #{status.exitstatus}"
65
+ exit status.exitstatus
66
+ end
67
+ puts "This might be helpful:\nProcessStatus: #{status.inspect}\nRaw POSIX Status: #{status.to_i}\n"
68
+ exit 1
69
+ end
70
+
71
+
@@ -0,0 +1,181 @@
1
+ require 'optparse'
2
+ require 'fileutils'
3
+ require_relative 'execution'
4
+
5
+ class GitFastClone
6
+ def initialize()
7
+ # Prefetch reference repos for submodules we've seen before
8
+ # Keep our own reference accounting of module dependencies.
9
+ @prefetch_submodules = true
10
+
11
+ # Thread-level locking for reference repos
12
+ # TODO: Add flock-based locking if we want to do more than one build on a given slave
13
+ @reference_mutex = Hash.new { |hash, key| hash[key] = Mutex.new() }
14
+
15
+ # Only update each reference repo once per run.
16
+ # TODO: May want to update this if we're doing more than one build on a given slave.
17
+ # Perhaps a last-updated-time and a timeout per reference repo.
18
+ @reference_updated = Hash.new { |hash, key| hash[key] = false }
19
+ end
20
+
21
+ def run()
22
+ @reference_dir = ENV['REFERENCE_REPO_DIR'] || "/var/tmp/git-fastclone/reference"
23
+
24
+ FileUtils.mkdir_p(@reference_dir)
25
+
26
+ # One option --branch=<branch> We're not as brittle as clone. That branch can be a sha or tag and we're still okay.
27
+ @options = {}
28
+ OptionParser.new do |opts|
29
+ @options[:branch] = nil
30
+ opts.on("-b", "--branch BRANCH", "Checkout this branch rather than the default") do |branch|
31
+ @options[:branch] = branch
32
+ end
33
+ # TODO: add --verbose option that turns on and off printing of sub-commands
34
+ # TODO: Add help text.
35
+ end.parse!
36
+
37
+ puts ARGV
38
+
39
+ # Remaining two positional args are url and optional path
40
+ url = ARGV[0]
41
+ path = ARGV[1] || path_from_git_url(url)
42
+
43
+ # Do a checkout with reference repositories for main and submodules
44
+ clone(url, @options[:branch], File.join(Dir.pwd, path))
45
+ end
46
+
47
+ def path_from_git_url(url)
48
+ # Get the checkout path from tail-end of the url.
49
+ File.join(Dir.pwd, url.match(/([^\/]*)\.git$/)[1])
50
+ end
51
+
52
+ # Checkout to SOURCE_DIR. Update all submodules recursively. Use reference repos everywhere for speed.
53
+ def clone(url, rev, src_dir)
54
+ initial_time = Time.now()
55
+
56
+ with_git_mirror(url) do |mirror|
57
+ fail_on_error("git", "clone", "--reference", mirror, url, src_dir)
58
+ end
59
+
60
+ # Only checkout if we're changing branches to a non-default branch
61
+ unless rev.nil? then
62
+ fail_on_error("git", "checkout", rev, :chdir=>src_dir)
63
+ end
64
+
65
+ update_submodules(src_dir, url)
66
+
67
+ final_time = Time.now()
68
+ puts "Checkout of #{url} took #{final_time-initial_time}s"
69
+ end
70
+
71
+ # Update all submodules in current directory recursively
72
+ # Use a reference repository for speed.
73
+ # Use a separate thread for each submodule.
74
+ def update_submodules (pwd, url)
75
+ # Skip if there's no submodules defined
76
+ if File.exist?(File.join(pwd,".gitmodules")) then
77
+
78
+ # Update each submodule on a different thread.
79
+ threads = []
80
+ submodule_url_list = []
81
+
82
+ # Init outputs all the info we need to run the update commands.
83
+ # Parse its output directly to save time.
84
+ fail_on_error("git", "submodule", "init", :chdir=>pwd).split("\n").each do |line|
85
+ # Submodule path (not name) is in between single quotes '' at the end of the line
86
+ submodule_path = File.join(pwd, line.strip.match(/'([^']*)'$/)[1])
87
+ # URL is in between parentheses ()
88
+ submodule_url = line.strip.match(/\(([^)]*)\)/)[1]
89
+ submodule_url_list << submodule_url
90
+
91
+ # Each update happens on a separate thread for speed.
92
+ threads << Thread.new do
93
+ with_git_mirror(submodule_url) do |mirror|
94
+ fail_on_error("git", "submodule", "update", "--reference", mirror, submodule_path, :chdir=>pwd)
95
+ end
96
+ # Recurse into the submodule directory
97
+ update_submodules(submodule_path, submodule_url)
98
+ end
99
+ end
100
+ update_submodule_reference(url, submodule_url_list)
101
+ threads.each {|t| t.join}
102
+ end
103
+ end
104
+
105
+ def reference_repo_name(url)
106
+ # Derive a unique directory name from the git url.
107
+ url.gsub(/^.*:\/\//, "").gsub(/^[^@]*@/, "").gsub("/","-").gsub(":","-")
108
+ end
109
+
110
+ def reference_repo_dir(url)
111
+ File.join(@reference_dir, reference_repo_name(url))
112
+ end
113
+
114
+ def reference_repo_submodule_file(url)
115
+ # ':' is never a valid char in a reference repo dir, so this
116
+ # uniquely maps to a particular reference repo.
117
+ "#{reference_repo_dir(url)}:submodules.txt"
118
+ end
119
+
120
+ def with_reference_repo_lock(url)
121
+ @reference_mutex[reference_repo_name(url)].synchronize do
122
+ yield
123
+ end
124
+ end
125
+
126
+ def update_submodule_reference(url, submodule_url_list)
127
+ if submodule_url_list != [] and @prefetch_submodules then
128
+ with_reference_repo_lock(url) do
129
+
130
+ # Write the dependency file using submodule list
131
+ File.open(reference_repo_submodule_file(url), 'w') do |f|
132
+ submodule_url_list.each do |submodule_url|
133
+ f.write("#{submodule_url}\n")
134
+ end
135
+ end
136
+
137
+ end
138
+ end
139
+ end
140
+
141
+ def update_reference_repo(url)
142
+ repo_name = reference_repo_name(url)
143
+ mirror = reference_repo_dir(url)
144
+
145
+ with_reference_repo_lock(url) do
146
+ submodule_file = reference_repo_submodule_file(url)
147
+ if File.exist?(submodule_file) and @prefetch_submodules then
148
+ File.readlines(submodule_file).each do |line|
149
+ # We don't join these threads explicitly
150
+ Thread.new { update_reference_repo(line.strip) }
151
+ end
152
+ end
153
+
154
+ if !@reference_updated[repo_name] then
155
+ if !Dir.exist?(mirror)
156
+ fail_on_error("git", "clone", "--mirror", url, mirror)
157
+ end
158
+ fail_on_error("git", "remote", "update", :chdir=> mirror)
159
+ @reference_updated[repo_name] = true
160
+ end
161
+ end
162
+ end
163
+
164
+ # Executes a block passing in the directory of an up-to-date local git mirror
165
+ # for the given url. This will speed up most git commands that ask for data
166
+ # over the network after the mirror is cloned initially.
167
+ #
168
+ # This command will create and bring the mirror up-to-date on-demand,
169
+ # blocking any code passed in while the mirror is brought up-to-date
170
+ #
171
+ # In future we may need to synchronize with flock here if we run multiple builds
172
+ # at once against the same reference repos. One build per slave at the moment means
173
+ # we only need to synchronize our own threads in case a single submodule url is
174
+ # included twice via multiple dependency paths
175
+ def with_git_mirror(url)
176
+ update_reference_repo(url)
177
+
178
+ # May want to lock the reference repo for this, but don't need to for how we use this.
179
+ yield reference_repo_dir(url)
180
+ end
181
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: git-fastclone
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Michael Tauraso
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-19 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A git command that uses reference repositories and multithreading to
14
+ quickly and recursively clone repositories with many nested submodules
15
+ email: mtauraso@gmail.com
16
+ executables:
17
+ - git-fastclone
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - bin/git-fastclone
22
+ - lib/execution.rb
23
+ - lib/git-fastclone.rb
24
+ homepage: https://rubygems.org/gems/git-fastclone
25
+ licenses:
26
+ - MIT
27
+ metadata: {}
28
+ post_install_message:
29
+ rdoc_options: []
30
+ require_paths:
31
+ - lib
32
+ required_ruby_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ required_rubygems_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubyforge_project:
44
+ rubygems_version: 2.2.2
45
+ signing_key:
46
+ specification_version: 4
47
+ summary: git-clone --recursive on steroids!
48
+ test_files: []