git-fastclone 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of git-fastclone might be problematic. Click here for more details.

checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3feed5ccee58d01de0a1703664806f92309c3520
4
+ data.tar.gz: 9129d253e669fdbe2bd97d6ee11cfe832437b8a2
5
+ SHA512:
6
+ metadata.gz: ce5daa467bffaf2a6c4b7118f7cb17adea720f130842603a1adb4050beb3110df4f6c9df64811f0edba82fd4a9f77895f7050a89980c120aed8af149c401feac
7
+ data.tar.gz: ba417ddff24e96b332b445c6dec39fb62c1363d9f5e8ec471ba59bf3ba0c6df0099f0cac7d2773c197d20bb616d19f4d679f278f873f1c9860e8d014db222203
data/bin/git-fastclone ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'git-fastclone'
4
+
5
+ GitFastClone.new.run
data/lib/execution.rb ADDED
@@ -0,0 +1,71 @@
1
+ require 'open3'
2
+
3
+ # Wrapper around open3.popen2e which fails on error
4
+ #
5
+ # We emulate open3.capture2e with the following changes in behavior:
6
+ # 1) The command is printed to stdout before execution.
7
+ # 2) Attempts to use the shell implicitly are blocked.
8
+ # 3) Nonzero return codes result in the process exiting.
9
+ #
10
+ # If you're looking for more process/stream control read the spawn documentation, and pass
11
+ # options directly here
12
+ def fail_on_error (*cmd, **opts)
13
+ # puts "Running Command: \n#{debug_print_cmd_list([cmd])}\n"
14
+ shell_safe(cmd)
15
+ output, status = Open3.capture2(*cmd, opts)
16
+ exit_on_status(output, status)
17
+ end
18
+
19
+ # Look at a cmd list intended for spawn.
20
+ # determine if spawn will call the shell implicitly, fail in that case.
21
+ def shell_safe (cmd)
22
+ # env and opts in the command spec both aren't of type string.
23
+ # If you're only passing one string, spawn is going to launch a shell.
24
+ if cmd.select{ |element| element.class == String }.length == 1
25
+ puts "You tried to use sqiosbuild to call the shell implicitly. Please don't."
26
+ puts "Think of the children."
27
+ puts "Think of shellshock."
28
+ puts "Please don't. Not ever."
29
+ exit 1
30
+ end
31
+ end
32
+
33
+ def debug_print_cmd_list(cmd_list)
34
+ # Take a list of command argument lists like you'd sent to open3.pipeline or fail_on_error_pipe and
35
+ # print out a string that would do the same thing when entered at the shell.
36
+ #
37
+ # This is a converter from our internal representation of commands to a subset of bash that
38
+ # can be executed directly.
39
+ #
40
+ # Note this has problems if you specify env or opts
41
+ # TODO: make this remove those command parts
42
+ "\"" +
43
+ cmd_list.map { |cmd|
44
+ cmd.map { |arg|
45
+ arg.gsub("\"", "\\\"") # Escape all double quotes in command arguments
46
+ }.join("\" \"") # Fully quote all command parts. We add quotes to the beginning and end too.
47
+ }.join("\" | \"") + # Pipe commands to one another.
48
+ "\""
49
+ end
50
+
51
+ # If any of the statuses are bad, exits with the
52
+ # return code of the first one.
53
+ #
54
+ # Otherwise returns first argument (output)
55
+ def exit_on_status (output, status)
56
+ # Do nothing for proper statuses
57
+ if status.exited? && status.exitstatus == 0
58
+ return output
59
+ end
60
+
61
+ # If we exited nonzero or abnormally, print debugging info
62
+ # and explode.
63
+ if status.exited?
64
+ puts "Return code was #{status.exitstatus}"
65
+ exit status.exitstatus
66
+ end
67
+ puts "This might be helpful:\nProcessStatus: #{status.inspect}\nRaw POSIX Status: #{status.to_i}\n"
68
+ exit 1
69
+ end
70
+
71
+
@@ -0,0 +1,181 @@
1
+ require 'optparse'
2
+ require 'fileutils'
3
+ require_relative 'execution'
4
+
5
+ class GitFastClone
6
+ def initialize()
7
+ # Prefetch reference repos for submodules we've seen before
8
+ # Keep our own reference accounting of module dependencies.
9
+ @prefetch_submodules = true
10
+
11
+ # Thread-level locking for reference repos
12
+ # TODO: Add flock-based locking if we want to do more than one build on a given slave
13
+ @reference_mutex = Hash.new { |hash, key| hash[key] = Mutex.new() }
14
+
15
+ # Only update each reference repo once per run.
16
+ # TODO: May want to update this if we're doing more than one build on a given slave.
17
+ # Perhaps a last-updated-time and a timeout per reference repo.
18
+ @reference_updated = Hash.new { |hash, key| hash[key] = false }
19
+ end
20
+
21
+ def run()
22
+ @reference_dir = ENV['REFERENCE_REPO_DIR'] || "/var/tmp/git-fastclone/reference"
23
+
24
+ FileUtils.mkdir_p(@reference_dir)
25
+
26
+ # One option --branch=<branch> We're not as brittle as clone. That branch can be a sha or tag and we're still okay.
27
+ @options = {}
28
+ OptionParser.new do |opts|
29
+ @options[:branch] = nil
30
+ opts.on("-b", "--branch BRANCH", "Checkout this branch rather than the default") do |branch|
31
+ @options[:branch] = branch
32
+ end
33
+ # TODO: add --verbose option that turns on and off printing of sub-commands
34
+ # TODO: Add help text.
35
+ end.parse!
36
+
37
+ puts ARGV
38
+
39
+ # Remaining two positional args are url and optional path
40
+ url = ARGV[0]
41
+ path = ARGV[1] || path_from_git_url(url)
42
+
43
+ # Do a checkout with reference repositories for main and submodules
44
+ clone(url, @options[:branch], File.join(Dir.pwd, path))
45
+ end
46
+
47
+ def path_from_git_url(url)
48
+ # Get the checkout path from tail-end of the url.
49
+ File.join(Dir.pwd, url.match(/([^\/]*)\.git$/)[1])
50
+ end
51
+
52
+ # Checkout to SOURCE_DIR. Update all submodules recursively. Use reference repos everywhere for speed.
53
+ def clone(url, rev, src_dir)
54
+ initial_time = Time.now()
55
+
56
+ with_git_mirror(url) do |mirror|
57
+ fail_on_error("git", "clone", "--reference", mirror, url, src_dir)
58
+ end
59
+
60
+ # Only checkout if we're changing branches to a non-default branch
61
+ unless rev.nil? then
62
+ fail_on_error("git", "checkout", rev, :chdir=>src_dir)
63
+ end
64
+
65
+ update_submodules(src_dir, url)
66
+
67
+ final_time = Time.now()
68
+ puts "Checkout of #{url} took #{final_time-initial_time}s"
69
+ end
70
+
71
+ # Update all submodules in current directory recursively
72
+ # Use a reference repository for speed.
73
+ # Use a separate thread for each submodule.
74
+ def update_submodules (pwd, url)
75
+ # Skip if there's no submodules defined
76
+ if File.exist?(File.join(pwd,".gitmodules")) then
77
+
78
+ # Update each submodule on a different thread.
79
+ threads = []
80
+ submodule_url_list = []
81
+
82
+ # Init outputs all the info we need to run the update commands.
83
+ # Parse its output directly to save time.
84
+ fail_on_error("git", "submodule", "init", :chdir=>pwd).split("\n").each do |line|
85
+ # Submodule path (not name) is in between single quotes '' at the end of the line
86
+ submodule_path = File.join(pwd, line.strip.match(/'([^']*)'$/)[1])
87
+ # URL is in between parentheses ()
88
+ submodule_url = line.strip.match(/\(([^)]*)\)/)[1]
89
+ submodule_url_list << submodule_url
90
+
91
+ # Each update happens on a separate thread for speed.
92
+ threads << Thread.new do
93
+ with_git_mirror(submodule_url) do |mirror|
94
+ fail_on_error("git", "submodule", "update", "--reference", mirror, submodule_path, :chdir=>pwd)
95
+ end
96
+ # Recurse into the submodule directory
97
+ update_submodules(submodule_path, submodule_url)
98
+ end
99
+ end
100
+ update_submodule_reference(url, submodule_url_list)
101
+ threads.each {|t| t.join}
102
+ end
103
+ end
104
+
105
+ def reference_repo_name(url)
106
+ # Derive a unique directory name from the git url.
107
+ url.gsub(/^.*:\/\//, "").gsub(/^[^@]*@/, "").gsub("/","-").gsub(":","-")
108
+ end
109
+
110
+ def reference_repo_dir(url)
111
+ File.join(@reference_dir, reference_repo_name(url))
112
+ end
113
+
114
+ def reference_repo_submodule_file(url)
115
+ # ':' is never a valid char in a reference repo dir, so this
116
+ # uniquely maps to a particular reference repo.
117
+ "#{reference_repo_dir(url)}:submodules.txt"
118
+ end
119
+
120
+ def with_reference_repo_lock(url)
121
+ @reference_mutex[reference_repo_name(url)].synchronize do
122
+ yield
123
+ end
124
+ end
125
+
126
+ def update_submodule_reference(url, submodule_url_list)
127
+ if submodule_url_list != [] and @prefetch_submodules then
128
+ with_reference_repo_lock(url) do
129
+
130
+ # Write the dependency file using submodule list
131
+ File.open(reference_repo_submodule_file(url), 'w') do |f|
132
+ submodule_url_list.each do |submodule_url|
133
+ f.write("#{submodule_url}\n")
134
+ end
135
+ end
136
+
137
+ end
138
+ end
139
+ end
140
+
141
+ def update_reference_repo(url)
142
+ repo_name = reference_repo_name(url)
143
+ mirror = reference_repo_dir(url)
144
+
145
+ with_reference_repo_lock(url) do
146
+ submodule_file = reference_repo_submodule_file(url)
147
+ if File.exist?(submodule_file) and @prefetch_submodules then
148
+ File.readlines(submodule_file).each do |line|
149
+ # We don't join these threads explicitly
150
+ Thread.new { update_reference_repo(line.strip) }
151
+ end
152
+ end
153
+
154
+ if !@reference_updated[repo_name] then
155
+ if !Dir.exist?(mirror)
156
+ fail_on_error("git", "clone", "--mirror", url, mirror)
157
+ end
158
+ fail_on_error("git", "remote", "update", :chdir=> mirror)
159
+ @reference_updated[repo_name] = true
160
+ end
161
+ end
162
+ end
163
+
164
+ # Executes a block passing in the directory of an up-to-date local git mirror
165
+ # for the given url. This will speed up most git commands that ask for data
166
+ # over the network after the mirror is cloned initially.
167
+ #
168
+ # This command will create and bring the mirror up-to-date on-demand,
169
+ # blocking any code passed in while the mirror is brought up-to-date
170
+ #
171
+ # In future we may need to synchronize with flock here if we run multiple builds
172
+ # at once against the same reference repos. One build per slave at the moment means
173
+ # we only need to synchronize our own threads in case a single submodule url is
174
+ # included twice via multiple dependency paths
175
+ def with_git_mirror(url)
176
+ update_reference_repo(url)
177
+
178
+ # May want to lock the reference repo for this, but don't need to for how we use this.
179
+ yield reference_repo_dir(url)
180
+ end
181
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: git-fastclone
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Michael Tauraso
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-19 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A git command that uses reference repositories and multithreading to
14
+ quickly and recursively clone repositories with many nested submodules
15
+ email: mtauraso@gmail.com
16
+ executables:
17
+ - git-fastclone
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - bin/git-fastclone
22
+ - lib/execution.rb
23
+ - lib/git-fastclone.rb
24
+ homepage: https://rubygems.org/gems/git-fastclone
25
+ licenses:
26
+ - MIT
27
+ metadata: {}
28
+ post_install_message:
29
+ rdoc_options: []
30
+ require_paths:
31
+ - lib
32
+ required_ruby_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ required_rubygems_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ requirements: []
43
+ rubyforge_project:
44
+ rubygems_version: 2.2.2
45
+ signing_key:
46
+ specification_version: 4
47
+ summary: git-clone --recursive on steroids!
48
+ test_files: []