git_ls 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +18 -0
- data/.spellr_wordlists/english.txt +3 -0
- data/.spellr_wordlists/ruby.txt +2 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +0 -3
- data/README.md +14 -8
- data/git_index.gemspec +2 -0
- data/lib/git_ls.rb +86 -80
- data/lib/git_ls/version.rb +1 -1
- metadata +31 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7736e59f5f16469e453f3f085e1473cef343eb79e51e75cb94c013de62a311ca
|
4
|
+
data.tar.gz: c9715b5c0080aea1d487b349d825f113d1ecf7d75a85fe76fb2cd9648f3c16c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca243045d1d33fbc49532f35c63d8ea2bc924d119934a39407a9bc427e0e9de493997b6e93be6b3d5245c4d3df129b8bb1b696f0df6849bebdd4fc76febb718a
|
7
|
+
data.tar.gz: 8ebacdfaa8a7723fd87cf650ea72cf70140633b7140403bd0cbc0f320bc1bdfb28bfe8eeeb9fe950d697f2357b4352e5029ac9c749684bca097a2f05b02b4a1a
|
data/.rubocop.yml
CHANGED
@@ -118,6 +118,7 @@ Metrics/BlockLength:
|
|
118
118
|
- describe
|
119
119
|
- context
|
120
120
|
- shared_examples
|
121
|
+
- benchmark
|
121
122
|
|
122
123
|
Metrics/CyclomaticComplexity:
|
123
124
|
Enabled: false
|
@@ -224,3 +225,20 @@ Style/HashTransformValues:
|
|
224
225
|
|
225
226
|
Style/CommentedKeyword:
|
226
227
|
Enabled: false
|
228
|
+
|
229
|
+
Style/CaseLikeIf:
|
230
|
+
Enabled: false
|
231
|
+
|
232
|
+
Style/NumericPredicate:
|
233
|
+
Enabled: false
|
234
|
+
|
235
|
+
Style/Semicolon:
|
236
|
+
Exclude:
|
237
|
+
- bin/benchmark
|
238
|
+
|
239
|
+
Naming/VariableNumber:
|
240
|
+
Enabled: false
|
241
|
+
|
242
|
+
Lint/Void:
|
243
|
+
Exclude:
|
244
|
+
- bin/benchmark
|
data/.spellr_wordlists/ruby.txt
CHANGED
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -2,10 +2,9 @@
|
|
2
2
|
|
3
3
|
Parses the .git/index file like `git ls-files` does.
|
4
4
|
|
5
|
-
-
|
6
|
-
it can be faster than doing the system call to git
|
7
|
-
- still takes less than half a second for very large repos e.g. the linux repo
|
5
|
+
- faster than doing the system call to git
|
8
6
|
- doesn't require git to be installed
|
7
|
+
- tested against ruby 2.4 - 2.7 and jruby
|
9
8
|
|
10
9
|
## Installation
|
11
10
|
|
@@ -23,23 +22,30 @@ Or install it yourself as:
|
|
23
22
|
|
24
23
|
$ gem install git_ls
|
25
24
|
|
25
|
+
And require
|
26
|
+
```ruby
|
27
|
+
require 'git_ls'
|
28
|
+
```
|
29
|
+
|
26
30
|
## Usage
|
27
31
|
|
28
|
-
`GitLS.files`
|
32
|
+
`GitLS.files` reads the `.git/index` file to return an array of file paths, equivalent to `` `git ls-files`.split("\n") ``, but faster, and without requiring git being installed.
|
33
|
+
|
29
34
|
`GitLS.files("path/to/repo")` if the repo is not $PWD.
|
30
35
|
|
36
|
+
Strictly speaking it's equivalent to `` `git ls-files -c core.quotepath=off -z`.split("\0") ``, handling file paths with spaces and non-ascii characters, and returning file paths as UTF-8 strings.
|
37
|
+
|
31
38
|
## Development
|
32
39
|
|
33
|
-
Have a look in the bin dir for some useful tools.
|
34
|
-
To install this gem onto your local machine, run `bundle exec rake install`.
|
40
|
+
- Have a look in the bin dir for some useful tools.
|
41
|
+
- To install this gem onto your local machine, run `bundle exec rake install`.
|
42
|
+
- Run `rake` to run all tests & linters.
|
35
43
|
|
36
44
|
## Contributing
|
37
45
|
|
38
46
|
Bug reports and pull requests are welcome on GitHub at https://github.com/robotdana/git_ls.
|
39
47
|
If you're comfortable, please attach `.git/index` (and `.git/sharedindex.<sha>` if applicable) and the output of `git ls-files` where it doesn't match.
|
40
48
|
|
41
|
-
|
42
49
|
## License
|
43
50
|
|
44
51
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
45
|
-
# git_ls
|
data/git_index.gemspec
CHANGED
@@ -27,10 +27,12 @@ Gem::Specification.new do |spec|
|
|
27
27
|
|
28
28
|
spec.require_paths = ['lib']
|
29
29
|
|
30
|
+
spec.add_development_dependency 'benchmark-ips'
|
30
31
|
spec.add_development_dependency 'bundler', '>= 2'
|
31
32
|
spec.add_development_dependency 'leftovers'
|
32
33
|
spec.add_development_dependency 'pry', '> 0'
|
33
34
|
spec.add_development_dependency 'rake', '>= 12.3.3'
|
35
|
+
spec.add_development_dependency 'rspec'
|
34
36
|
spec.add_development_dependency 'rubocop'
|
35
37
|
spec.add_development_dependency 'rubocop-performance'
|
36
38
|
spec.add_development_dependency 'rubocop-rspec'
|
data/lib/git_ls.rb
CHANGED
@@ -3,105 +3,100 @@
|
|
3
3
|
# Usage:
|
4
4
|
# GitLS.files -> Array of strings as files.
|
5
5
|
# This will be identical output to git ls-files
|
6
|
+
require 'stringio'
|
7
|
+
|
6
8
|
module GitLS # rubocop:disable Metrics/ModuleLength
|
7
9
|
class Error < StandardError; end
|
8
10
|
|
9
11
|
class << self
|
10
|
-
def files(path =
|
11
|
-
|
12
|
-
end
|
12
|
+
def files(path = nil)
|
13
|
+
path = path ? ::File.join(path, '.git/index') : '.git/index'
|
13
14
|
|
14
|
-
|
15
|
-
read(path, true)
|
15
|
+
read(path, false)
|
16
16
|
end
|
17
17
|
|
18
18
|
private
|
19
19
|
|
20
|
-
def read(path,
|
21
|
-
|
22
|
-
|
20
|
+
def read(path, _return_headers_only) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
21
|
+
begin
|
22
|
+
# reading the whole file into memory is faster than lots of ::File#read
|
23
|
+
# the biggest it's going to be is 10s of megabytes, well within ram.
|
24
|
+
file = ::StringIO.new(::File.read(path, mode: 'rb'))
|
25
|
+
rescue ::Errno::ENOENT => e
|
26
|
+
raise ::GitLS::Error, "Not a git directory: #{e.message}"
|
27
|
+
end
|
28
|
+
|
23
29
|
buf = ::String.new
|
24
30
|
# 4-byte signature:
|
25
31
|
# The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache")
|
26
32
|
# 4-byte version number:
|
27
33
|
# The current supported versions are 2, 3 and 4.
|
28
34
|
# 32-bit number of index entries.
|
29
|
-
sig
|
30
|
-
raise ::GitLS::Error, ".git/index file not found at #{path}" unless sig == 'DIRC'
|
35
|
+
sig = file.read(4, buf)
|
36
|
+
raise ::GitLS::Error, ".git/index file not found at '#{path}'" unless sig == 'DIRC'
|
31
37
|
|
32
|
-
|
38
|
+
git_index_version = file.read(4, buf).unpack1('N')
|
39
|
+
entries = file.read(4, buf).unpack1('N')
|
33
40
|
|
34
|
-
files = ::Array.new(
|
35
|
-
case git_index_version
|
41
|
+
files = ::Array.new(entries)
|
42
|
+
files = case git_index_version
|
36
43
|
when 2 then files_2(files, file)
|
37
44
|
when 3 then files_3(files, file)
|
38
45
|
when 4 then files_4(files, file)
|
39
|
-
else raise ::GitLS::Error,
|
46
|
+
else raise ::GitLS::Error, "Unrecognized git index version '#{git_index_version}'"
|
40
47
|
end
|
41
48
|
|
42
|
-
|
43
|
-
files
|
44
|
-
rescue ::Errno::ENOENT => e
|
45
|
-
raise ::GitLS::Error, "Not a git directory: #{e.message}"
|
46
|
-
ensure
|
47
|
-
# :nocov:
|
48
|
-
# coverage tracking for branches in ensure blocks is weird
|
49
|
-
file&.close
|
50
|
-
# :nocov:
|
51
|
-
files
|
49
|
+
read_extensions(files, file, path, buf)
|
52
50
|
end
|
53
51
|
|
54
|
-
def
|
55
|
-
|
56
|
-
|
57
|
-
|
52
|
+
def read_extensions(files, file, path, buf) # rubocop:disable Metrics/MethodLength
|
53
|
+
extension = file.read(4, buf)
|
54
|
+
if extension == 'link'
|
55
|
+
read_link_extension(files, file, path, buf)
|
56
|
+
elsif extension.match?(/\A[A-Z]{4}\z/)
|
57
|
+
size = file.read(4, buf).unpack1('N')
|
58
|
+
file.seek(size, 1)
|
59
|
+
read_extensions(files, file, path, buf)
|
58
60
|
else
|
59
|
-
return if
|
61
|
+
return files if file.seek(16, 1) && file.eof?
|
60
62
|
|
61
|
-
raise ::GitLS::Error, "Unrecognized .git/index extension #{
|
63
|
+
raise ::GitLS::Error, "Unrecognized .git/index extension #{extension.inspect}"
|
62
64
|
end
|
63
65
|
end
|
64
66
|
|
65
|
-
def
|
66
|
-
|
67
|
-
file.pos += size
|
68
|
-
extensions(files, file, buf)
|
69
|
-
end
|
70
|
-
|
71
|
-
def link_extension(files, file, buf) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
72
|
-
file.pos += 4 # size = file.read(4, buf).unpack1('N')
|
67
|
+
def read_link_extension(files, file, path, buf) # rubocop:disable Metrics/MethodLength
|
68
|
+
file.seek(4, 1) # skip size
|
73
69
|
|
74
70
|
sha = file.read(20, buf)
|
75
71
|
|
76
|
-
|
77
|
-
|
78
|
-
files.replace files("#{::File.dirname(file.path)}/sharedindex.#{sha.unpack1('H*')}")
|
72
|
+
split_files = read("#{::File.dirname(path)}/sharedindex.#{sha.unpack1('H*')}", false)
|
79
73
|
|
80
74
|
ewah_each_value(file, buf) do |pos|
|
81
|
-
|
75
|
+
split_files[pos] = nil
|
82
76
|
end
|
83
77
|
|
84
78
|
ewah_each_value(file, buf) do |pos|
|
85
|
-
replacement_file =
|
79
|
+
replacement_file = files.shift
|
86
80
|
# the documentation *implies* that this *may* get a new filename
|
87
81
|
# i can't get it to happen though
|
88
82
|
# :nocov:
|
89
|
-
|
83
|
+
split_files[pos] = replacement_file unless replacement_file.empty?
|
90
84
|
# :nocov:
|
91
85
|
end
|
92
86
|
|
93
|
-
|
94
|
-
|
95
|
-
|
87
|
+
split_files.compact!
|
88
|
+
split_files.concat(files)
|
89
|
+
split_files.sort!
|
96
90
|
|
97
|
-
|
91
|
+
read_extensions(split_files, file, path, buf)
|
98
92
|
end
|
99
93
|
|
100
94
|
# format is defined here:
|
101
95
|
# https://git-scm.com/docs/bitmap-format#_appendix_a_serialization_format_for_an_ewah_bitmap
|
102
96
|
def ewah_each_value(file, buf) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
103
97
|
uncompressed_pos = 0
|
104
|
-
|
98
|
+
|
99
|
+
file.seek(4, 1) # skip 4 byte uncompressed_bits_count.
|
105
100
|
compressed_bytes = file.read(4, buf).unpack1('N') * 8
|
106
101
|
|
107
102
|
final_file_pos = file.pos + compressed_bytes
|
@@ -110,10 +105,10 @@ module GitLS # rubocop:disable Metrics/ModuleLength
|
|
110
105
|
run_length_word = file.read(8, buf).unpack1('Q>')
|
111
106
|
# 1st bit
|
112
107
|
run_bit = run_length_word & 1
|
113
|
-
# the next 32 bits, masked, multiplied by 64
|
114
|
-
run_length = ((run_length_word
|
108
|
+
# the next 32 bits, masked, multiplied by 64
|
109
|
+
run_length = ((run_length_word / 0b1_0) & 0xFFFF_FFFF) * 64
|
115
110
|
# the next 31 bits
|
116
|
-
literal_length = (run_length_word
|
111
|
+
literal_length = (run_length_word / 0b100000000_00000000_00000000_00000000_0)
|
117
112
|
|
118
113
|
if run_bit == 1
|
119
114
|
run_length.times do
|
@@ -124,9 +119,11 @@ module GitLS # rubocop:disable Metrics/ModuleLength
|
|
124
119
|
uncompressed_pos += run_length
|
125
120
|
end
|
126
121
|
|
127
|
-
literal_length
|
128
|
-
|
129
|
-
|
122
|
+
next unless literal_length > 0
|
123
|
+
|
124
|
+
words = file.read(8 * literal_length, buf).unpack('B64' * literal_length)
|
125
|
+
words.each do |word|
|
126
|
+
word.each_char.reverse_each do |char|
|
130
127
|
yield(uncompressed_pos) if char == '1'
|
131
128
|
|
132
129
|
uncompressed_pos += 1
|
@@ -134,47 +131,52 @@ module GitLS # rubocop:disable Metrics/ModuleLength
|
|
134
131
|
end
|
135
132
|
end
|
136
133
|
|
137
|
-
file.
|
134
|
+
file.seek(4, 1) # bitmap metadata for adding to bitmaps
|
138
135
|
end
|
139
136
|
|
140
137
|
def files_2(files, file) # rubocop:disable Metrics/MethodLength
|
141
138
|
files.map! do
|
142
|
-
file.
|
143
|
-
|
139
|
+
file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
|
140
|
+
|
141
|
+
length = (file.getbyte & 0xF) * 256 + file.getbyte # find the 12 byte length
|
144
142
|
if length < 0xFFF
|
145
143
|
path = file.read(length)
|
146
144
|
# :nocov:
|
147
145
|
else
|
148
146
|
# i can't test this i just get ENAMETOOLONG a lot
|
149
|
-
path
|
150
|
-
|
147
|
+
# I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
|
148
|
+
# and length is a 12 byte number: 4096 max.
|
149
|
+
path = file.readline("\0").chop!
|
150
|
+
file.seek(-1, 1)
|
151
151
|
# :nocov:
|
152
152
|
end
|
153
|
-
file.
|
153
|
+
file.seek(8 - ((length - 2) % 8), 1) # 1-8 bytes padding of nuls
|
154
|
+
path.force_encoding(Encoding::UTF_8)
|
154
155
|
path
|
155
156
|
end
|
156
157
|
end
|
157
158
|
|
158
159
|
def files_3(files, file) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
159
160
|
files.map! do
|
160
|
-
file.
|
161
|
+
file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
|
162
|
+
flags = file.getbyte
|
163
|
+
extended_flag = (flags & 0b0100_0000) > 0
|
164
|
+
length = (flags & 0xF) * 256 + file.getbyte # find the 12 byte length
|
165
|
+
file.seek(2, 1) if extended_flag
|
161
166
|
|
162
|
-
flags = file.getbyte * 256 + file.getbyte
|
163
|
-
extended_flag = (flags & 0b0100_0000_0000_0000).positive?
|
164
|
-
file.pos += 2 if extended_flag
|
165
|
-
|
166
|
-
length = flags & 0b0000_1111_1111_1111
|
167
167
|
if length < 0xFFF
|
168
168
|
path = file.read(length)
|
169
169
|
# :nocov:
|
170
170
|
else
|
171
171
|
# i can't test this i just get ENAMETOOLONG a lot
|
172
|
-
path
|
173
|
-
|
172
|
+
# I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
|
173
|
+
# and length is a 12 byte number: 4096 max.
|
174
|
+
path = file.readline("\0").chop!
|
175
|
+
file.seek(-1, 1)
|
174
176
|
# :nocov:
|
175
177
|
end
|
176
|
-
|
177
|
-
|
178
|
+
file.seek(8 - ((path.bytesize - (extended_flag ? 0 : 2)) % 8), 1) # 1-8 bytes padding of nuls
|
179
|
+
path.force_encoding(Encoding::UTF_8)
|
178
180
|
path
|
179
181
|
end
|
180
182
|
end
|
@@ -182,11 +184,11 @@ module GitLS # rubocop:disable Metrics/ModuleLength
|
|
182
184
|
def files_4(files, file) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
183
185
|
prev_entry_path = ''
|
184
186
|
files.map! do
|
185
|
-
file.
|
186
|
-
flags = file.getbyte
|
187
|
-
|
188
|
-
|
189
|
-
|
187
|
+
file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
|
188
|
+
flags = file.getbyte
|
189
|
+
extended_flag = (flags & 0b0100_0000) > 0
|
190
|
+
length = (flags & 0xF) * 256 + file.getbyte # find the 12 byte length
|
191
|
+
file.seek(2, 1) if extended_flag
|
190
192
|
|
191
193
|
# documentation for this number from
|
192
194
|
# https://git-scm.com/docs/pack-format#_original_version_1_pack_idx_files_have_the_following_format
|
@@ -199,8 +201,8 @@ module GitLS # rubocop:disable Metrics/ModuleLength
|
|
199
201
|
read_offset = 0
|
200
202
|
prev_read_offset = file.getbyte
|
201
203
|
n = 1
|
202
|
-
while (prev_read_offset & 0b1000_0000)
|
203
|
-
read_offset += (prev_read_offset
|
204
|
+
while (prev_read_offset & 0b1000_0000) > 0
|
205
|
+
read_offset += (prev_read_offset & 0b0111_1111)
|
204
206
|
read_offset += 2**(7 * n)
|
205
207
|
n += 1
|
206
208
|
prev_read_offset = file.getbyte
|
@@ -211,15 +213,19 @@ module GitLS # rubocop:disable Metrics/ModuleLength
|
|
211
213
|
|
212
214
|
if length < 0xFFF
|
213
215
|
rest = file.read(length - initial_part_length)
|
214
|
-
file.
|
216
|
+
file.seek(1, 1) # the NUL
|
215
217
|
# :nocov:
|
216
218
|
else
|
217
219
|
# i can't test this i just get ENAMETOOLONG a lot
|
218
|
-
|
220
|
+
# I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
|
221
|
+
# and length is a 12 byte number: 4096 max.
|
222
|
+
rest = file.readline("\0").chop!
|
223
|
+
file.seek(-1, 1)
|
219
224
|
# :nocov:
|
220
225
|
end
|
221
226
|
|
222
227
|
prev_entry_path = prev_entry_path.byteslice(0, initial_part_length) + rest
|
228
|
+
prev_entry_path.force_encoding(Encoding::UTF_8)
|
223
229
|
end
|
224
230
|
end
|
225
231
|
end
|
data/lib/git_ls/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: git_ls
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dana Sherson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: benchmark-ips
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +80,20 @@ dependencies:
|
|
66
80
|
- - ">="
|
67
81
|
- !ruby/object:Gem::Version
|
68
82
|
version: 12.3.3
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rspec
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
69
97
|
- !ruby/object:Gem::Dependency
|
70
98
|
name: rubocop
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -183,7 +211,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
211
|
- !ruby/object:Gem::Version
|
184
212
|
version: '0'
|
185
213
|
requirements: []
|
186
|
-
rubygems_version: 3.
|
214
|
+
rubygems_version: 3.1.2
|
187
215
|
signing_key:
|
188
216
|
specification_version: 4
|
189
217
|
summary: Read a .git/index file and list the files
|