git-fastclone 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of git-fastclone might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/bin/git-fastclone +5 -0
- data/lib/execution.rb +71 -0
- data/lib/git-fastclone.rb +181 -0
- metadata +48 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3feed5ccee58d01de0a1703664806f92309c3520
|
4
|
+
data.tar.gz: 9129d253e669fdbe2bd97d6ee11cfe832437b8a2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ce5daa467bffaf2a6c4b7118f7cb17adea720f130842603a1adb4050beb3110df4f6c9df64811f0edba82fd4a9f77895f7050a89980c120aed8af149c401feac
|
7
|
+
data.tar.gz: ba417ddff24e96b332b445c6dec39fb62c1363d9f5e8ec471ba59bf3ba0c6df0099f0cac7d2773c197d20bb616d19f4d679f278f873f1c9860e8d014db222203
|
data/bin/git-fastclone
ADDED
data/lib/execution.rb
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
require 'open3'
|
2
|
+
|
3
|
+
# Wrapper around open3.popen2e which fails on error
|
4
|
+
#
|
5
|
+
# We emulate open3.capture2e with the following changes in behavior:
|
6
|
+
# 1) The command is printed to stdout before execution.
|
7
|
+
# 2) Attempts to use the shell implicitly are blocked.
|
8
|
+
# 3) Nonzero return codes result in the process exiting.
|
9
|
+
#
|
10
|
+
# If you're looking for more process/stream control read the spawn documentation, and pass
|
11
|
+
# options directly here
|
12
|
+
def fail_on_error (*cmd, **opts)
|
13
|
+
# puts "Running Command: \n#{debug_print_cmd_list([cmd])}\n"
|
14
|
+
shell_safe(cmd)
|
15
|
+
output, status = Open3.capture2(*cmd, opts)
|
16
|
+
exit_on_status(output, status)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Look at a cmd list intended for spawn.
|
20
|
+
# determine if spawn will call the shell implicitly, fail in that case.
|
21
|
+
def shell_safe (cmd)
|
22
|
+
# env and opts in the command spec both aren't of type string.
|
23
|
+
# If you're only passing one string, spawn is going to launch a shell.
|
24
|
+
if cmd.select{ |element| element.class == String }.length == 1
|
25
|
+
puts "You tried to use sqiosbuild to call the shell implicitly. Please don't."
|
26
|
+
puts "Think of the children."
|
27
|
+
puts "Think of shellshock."
|
28
|
+
puts "Please don't. Not ever."
|
29
|
+
exit 1
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def debug_print_cmd_list(cmd_list)
|
34
|
+
# Take a list of command argument lists like you'd sent to open3.pipeline or fail_on_error_pipe and
|
35
|
+
# print out a string that would do the same thing when entered at the shell.
|
36
|
+
#
|
37
|
+
# This is a converter from our internal representation of commands to a subset of bash that
|
38
|
+
# can be executed directly.
|
39
|
+
#
|
40
|
+
# Note this has problems if you specify env or opts
|
41
|
+
# TODO: make this remove those command parts
|
42
|
+
"\"" +
|
43
|
+
cmd_list.map { |cmd|
|
44
|
+
cmd.map { |arg|
|
45
|
+
arg.gsub("\"", "\\\"") # Escape all double quotes in command arguments
|
46
|
+
}.join("\" \"") # Fully quote all command parts. We add quotes to the beginning and end too.
|
47
|
+
}.join("\" | \"") + # Pipe commands to one another.
|
48
|
+
"\""
|
49
|
+
end
|
50
|
+
|
51
|
+
# If any of the statuses are bad, exits with the
|
52
|
+
# return code of the first one.
|
53
|
+
#
|
54
|
+
# Otherwise returns first argument (output)
|
55
|
+
def exit_on_status (output, status)
|
56
|
+
# Do nothing for proper statuses
|
57
|
+
if status.exited? && status.exitstatus == 0
|
58
|
+
return output
|
59
|
+
end
|
60
|
+
|
61
|
+
# If we exited nonzero or abnormally, print debugging info
|
62
|
+
# and explode.
|
63
|
+
if status.exited?
|
64
|
+
puts "Return code was #{status.exitstatus}"
|
65
|
+
exit status.exitstatus
|
66
|
+
end
|
67
|
+
puts "This might be helpful:\nProcessStatus: #{status.inspect}\nRaw POSIX Status: #{status.to_i}\n"
|
68
|
+
exit 1
|
69
|
+
end
|
70
|
+
|
71
|
+
|
@@ -0,0 +1,181 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
require 'fileutils'
|
3
|
+
require_relative 'execution'
|
4
|
+
|
5
|
+
class GitFastClone
|
6
|
+
def initialize()
|
7
|
+
# Prefetch reference repos for submodules we've seen before
|
8
|
+
# Keep our own reference accounting of module dependencies.
|
9
|
+
@prefetch_submodules = true
|
10
|
+
|
11
|
+
# Thread-level locking for reference repos
|
12
|
+
# TODO: Add flock-based locking if we want to do more than one build on a given slave
|
13
|
+
@reference_mutex = Hash.new { |hash, key| hash[key] = Mutex.new() }
|
14
|
+
|
15
|
+
# Only update each reference repo once per run.
|
16
|
+
# TODO: May want to update this if we're doing more than one build on a given slave.
|
17
|
+
# Perhaps a last-updated-time and a timeout per reference repo.
|
18
|
+
@reference_updated = Hash.new { |hash, key| hash[key] = false }
|
19
|
+
end
|
20
|
+
|
21
|
+
def run()
|
22
|
+
@reference_dir = ENV['REFERENCE_REPO_DIR'] || "/var/tmp/git-fastclone/reference"
|
23
|
+
|
24
|
+
FileUtils.mkdir_p(@reference_dir)
|
25
|
+
|
26
|
+
# One option --branch=<branch> We're not as brittle as clone. That branch can be a sha or tag and we're still okay.
|
27
|
+
@options = {}
|
28
|
+
OptionParser.new do |opts|
|
29
|
+
@options[:branch] = nil
|
30
|
+
opts.on("-b", "--branch BRANCH", "Checkout this branch rather than the default") do |branch|
|
31
|
+
@options[:branch] = branch
|
32
|
+
end
|
33
|
+
# TODO: add --verbose option that turns on and off printing of sub-commands
|
34
|
+
# TODO: Add help text.
|
35
|
+
end.parse!
|
36
|
+
|
37
|
+
puts ARGV
|
38
|
+
|
39
|
+
# Remaining two positional args are url and optional path
|
40
|
+
url = ARGV[0]
|
41
|
+
path = ARGV[1] || path_from_git_url(url)
|
42
|
+
|
43
|
+
# Do a checkout with reference repositories for main and submodules
|
44
|
+
clone(url, @options[:branch], File.join(Dir.pwd, path))
|
45
|
+
end
|
46
|
+
|
47
|
+
def path_from_git_url(url)
|
48
|
+
# Get the checkout path from tail-end of the url.
|
49
|
+
File.join(Dir.pwd, url.match(/([^\/]*)\.git$/)[1])
|
50
|
+
end
|
51
|
+
|
52
|
+
# Checkout to SOURCE_DIR. Update all submodules recursively. Use reference repos everywhere for speed.
|
53
|
+
def clone(url, rev, src_dir)
|
54
|
+
initial_time = Time.now()
|
55
|
+
|
56
|
+
with_git_mirror(url) do |mirror|
|
57
|
+
fail_on_error("git", "clone", "--reference", mirror, url, src_dir)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Only checkout if we're changing branches to a non-default branch
|
61
|
+
unless rev.nil? then
|
62
|
+
fail_on_error("git", "checkout", rev, :chdir=>src_dir)
|
63
|
+
end
|
64
|
+
|
65
|
+
update_submodules(src_dir, url)
|
66
|
+
|
67
|
+
final_time = Time.now()
|
68
|
+
puts "Checkout of #{url} took #{final_time-initial_time}s"
|
69
|
+
end
|
70
|
+
|
71
|
+
# Update all submodules in current directory recursively
|
72
|
+
# Use a reference repository for speed.
|
73
|
+
# Use a separate thread for each submodule.
|
74
|
+
def update_submodules (pwd, url)
|
75
|
+
# Skip if there's no submodules defined
|
76
|
+
if File.exist?(File.join(pwd,".gitmodules")) then
|
77
|
+
|
78
|
+
# Update each submodule on a different thread.
|
79
|
+
threads = []
|
80
|
+
submodule_url_list = []
|
81
|
+
|
82
|
+
# Init outputs all the info we need to run the update commands.
|
83
|
+
# Parse its output directly to save time.
|
84
|
+
fail_on_error("git", "submodule", "init", :chdir=>pwd).split("\n").each do |line|
|
85
|
+
# Submodule path (not name) is in between single quotes '' at the end of the line
|
86
|
+
submodule_path = File.join(pwd, line.strip.match(/'([^']*)'$/)[1])
|
87
|
+
# URL is in between parentheses ()
|
88
|
+
submodule_url = line.strip.match(/\(([^)]*)\)/)[1]
|
89
|
+
submodule_url_list << submodule_url
|
90
|
+
|
91
|
+
# Each update happens on a separate thread for speed.
|
92
|
+
threads << Thread.new do
|
93
|
+
with_git_mirror(submodule_url) do |mirror|
|
94
|
+
fail_on_error("git", "submodule", "update", "--reference", mirror, submodule_path, :chdir=>pwd)
|
95
|
+
end
|
96
|
+
# Recurse into the submodule directory
|
97
|
+
update_submodules(submodule_path, submodule_url)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
update_submodule_reference(url, submodule_url_list)
|
101
|
+
threads.each {|t| t.join}
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def reference_repo_name(url)
|
106
|
+
# Derive a unique directory name from the git url.
|
107
|
+
url.gsub(/^.*:\/\//, "").gsub(/^[^@]*@/, "").gsub("/","-").gsub(":","-")
|
108
|
+
end
|
109
|
+
|
110
|
+
def reference_repo_dir(url)
|
111
|
+
File.join(@reference_dir, reference_repo_name(url))
|
112
|
+
end
|
113
|
+
|
114
|
+
def reference_repo_submodule_file(url)
|
115
|
+
# ':' is never a valid char in a reference repo dir, so this
|
116
|
+
# uniquely maps to a particular reference repo.
|
117
|
+
"#{reference_repo_dir(url)}:submodules.txt"
|
118
|
+
end
|
119
|
+
|
120
|
+
def with_reference_repo_lock(url)
|
121
|
+
@reference_mutex[reference_repo_name(url)].synchronize do
|
122
|
+
yield
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def update_submodule_reference(url, submodule_url_list)
|
127
|
+
if submodule_url_list != [] and @prefetch_submodules then
|
128
|
+
with_reference_repo_lock(url) do
|
129
|
+
|
130
|
+
# Write the dependency file using submodule list
|
131
|
+
File.open(reference_repo_submodule_file(url), 'w') do |f|
|
132
|
+
submodule_url_list.each do |submodule_url|
|
133
|
+
f.write("#{submodule_url}\n")
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def update_reference_repo(url)
|
142
|
+
repo_name = reference_repo_name(url)
|
143
|
+
mirror = reference_repo_dir(url)
|
144
|
+
|
145
|
+
with_reference_repo_lock(url) do
|
146
|
+
submodule_file = reference_repo_submodule_file(url)
|
147
|
+
if File.exist?(submodule_file) and @prefetch_submodules then
|
148
|
+
File.readlines(submodule_file).each do |line|
|
149
|
+
# We don't join these threads explicitly
|
150
|
+
Thread.new { update_reference_repo(line.strip) }
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
if !@reference_updated[repo_name] then
|
155
|
+
if !Dir.exist?(mirror)
|
156
|
+
fail_on_error("git", "clone", "--mirror", url, mirror)
|
157
|
+
end
|
158
|
+
fail_on_error("git", "remote", "update", :chdir=> mirror)
|
159
|
+
@reference_updated[repo_name] = true
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# Executes a block passing in the directory of an up-to-date local git mirror
|
165
|
+
# for the given url. This will speed up most git commands that ask for data
|
166
|
+
# over the network after the mirror is cloned initially.
|
167
|
+
#
|
168
|
+
# This command will create and bring the mirror up-to-date on-demand,
|
169
|
+
# blocking any code passed in while the mirror is brought up-to-date
|
170
|
+
#
|
171
|
+
# In future we may need to synchronize with flock here if we run multiple builds
|
172
|
+
# at once against the same reference repos. One build per slave at the moment means
|
173
|
+
# we only need to synchronize our own threads in case a single submodule url is
|
174
|
+
# included twice via multiple dependency paths
|
175
|
+
def with_git_mirror(url)
|
176
|
+
update_reference_repo(url)
|
177
|
+
|
178
|
+
# May want to lock the reference repo for this, but don't need to for how we use this.
|
179
|
+
yield reference_repo_dir(url)
|
180
|
+
end
|
181
|
+
end
|
metadata
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: git-fastclone
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Michael Tauraso
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-19 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: A git command that uses reference repositories and multithreading to
|
14
|
+
quickly and recursively clone repositories with many nested submodules
|
15
|
+
email: mtauraso@gmail.com
|
16
|
+
executables:
|
17
|
+
- git-fastclone
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- bin/git-fastclone
|
22
|
+
- lib/execution.rb
|
23
|
+
- lib/git-fastclone.rb
|
24
|
+
homepage: https://rubygems.org/gems/git-fastclone
|
25
|
+
licenses:
|
26
|
+
- MIT
|
27
|
+
metadata: {}
|
28
|
+
post_install_message:
|
29
|
+
rdoc_options: []
|
30
|
+
require_paths:
|
31
|
+
- lib
|
32
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
33
|
+
requirements:
|
34
|
+
- - ">="
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '0'
|
37
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
version: '0'
|
42
|
+
requirements: []
|
43
|
+
rubyforge_project:
|
44
|
+
rubygems_version: 2.2.2
|
45
|
+
signing_key:
|
46
|
+
specification_version: 4
|
47
|
+
summary: git-clone --recursive on steroids!
|
48
|
+
test_files: []
|