git_ls 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +18 -0
- data/.spellr_wordlists/english.txt +3 -0
- data/.spellr_wordlists/ruby.txt +2 -0
- data/CHANGELOG.md +3 -0
- data/Gemfile +0 -3
- data/README.md +14 -8
- data/git_index.gemspec +2 -0
- data/lib/git_ls.rb +86 -80
- data/lib/git_ls/version.rb +1 -1
- metadata +31 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7736e59f5f16469e453f3f085e1473cef343eb79e51e75cb94c013de62a311ca
|
4
|
+
data.tar.gz: c9715b5c0080aea1d487b349d825f113d1ecf7d75a85fe76fb2cd9648f3c16c0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ca243045d1d33fbc49532f35c63d8ea2bc924d119934a39407a9bc427e0e9de493997b6e93be6b3d5245c4d3df129b8bb1b696f0df6849bebdd4fc76febb718a
|
7
|
+
data.tar.gz: 8ebacdfaa8a7723fd87cf650ea72cf70140633b7140403bd0cbc0f320bc1bdfb28bfe8eeeb9fe950d697f2357b4352e5029ac9c749684bca097a2f05b02b4a1a
|
data/.rubocop.yml
CHANGED
@@ -118,6 +118,7 @@ Metrics/BlockLength:
|
|
118
118
|
- describe
|
119
119
|
- context
|
120
120
|
- shared_examples
|
121
|
+
- benchmark
|
121
122
|
|
122
123
|
Metrics/CyclomaticComplexity:
|
123
124
|
Enabled: false
|
@@ -224,3 +225,20 @@ Style/HashTransformValues:
|
|
224
225
|
|
225
226
|
Style/CommentedKeyword:
|
226
227
|
Enabled: false
|
228
|
+
|
229
|
+
Style/CaseLikeIf:
|
230
|
+
Enabled: false
|
231
|
+
|
232
|
+
Style/NumericPredicate:
|
233
|
+
Enabled: false
|
234
|
+
|
235
|
+
Style/Semicolon:
|
236
|
+
Exclude:
|
237
|
+
- bin/benchmark
|
238
|
+
|
239
|
+
Naming/VariableNumber:
|
240
|
+
Enabled: false
|
241
|
+
|
242
|
+
Lint/Void:
|
243
|
+
Exclude:
|
244
|
+
- bin/benchmark
|
data/.spellr_wordlists/ruby.txt
CHANGED
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -2,10 +2,9 @@
|
|
2
2
|
|
3
3
|
Parses the .git/index file like `git ls-files` does.
|
4
4
|
|
5
|
-
-
|
6
|
-
it can be faster than doing the system call to git
|
7
|
-
- still takes less than half a second for very large repos e.g. the linux repo
|
5
|
+
- faster than doing the system call to git
|
8
6
|
- doesn't require git to be installed
|
7
|
+
- tested against ruby 2.4 - 2.7 and jruby
|
9
8
|
|
10
9
|
## Installation
|
11
10
|
|
@@ -23,23 +22,30 @@ Or install it yourself as:
|
|
23
22
|
|
24
23
|
$ gem install git_ls
|
25
24
|
|
25
|
+
And require
|
26
|
+
```ruby
|
27
|
+
require 'git_ls'
|
28
|
+
```
|
29
|
+
|
26
30
|
## Usage
|
27
31
|
|
28
|
-
`GitLS.files`
|
32
|
+
`GitLS.files` reads the `.git/index` file to return an array of file paths, equivalent to `` `git ls-files`.split("\n") ``, but faster, and without requiring git being installed.
|
33
|
+
|
29
34
|
`GitLS.files("path/to/repo")` if the repo is not $PWD.
|
30
35
|
|
36
|
+
Strictly speaking it's equivalent to `` `git ls-files -c core.quotepath=off -z`.split("\0") ``, handling file paths with spaces and non-ascii characters, and returning file paths as UTF-8 strings.
|
37
|
+
|
31
38
|
## Development
|
32
39
|
|
33
|
-
Have a look in the bin dir for some useful tools.
|
34
|
-
To install this gem onto your local machine, run `bundle exec rake install`.
|
40
|
+
- Have a look in the bin dir for some useful tools.
|
41
|
+
- To install this gem onto your local machine, run `bundle exec rake install`.
|
42
|
+
- Run `rake` to run all tests & linters.
|
35
43
|
|
36
44
|
## Contributing
|
37
45
|
|
38
46
|
Bug reports and pull requests are welcome on GitHub at https://github.com/robotdana/git_ls.
|
39
47
|
If you're comfortable, please attach `.git/index` (and `.git/sharedindex.<sha>` if applicable) and the output of `git ls-files` where it doesn't match.
|
40
48
|
|
41
|
-
|
42
49
|
## License
|
43
50
|
|
44
51
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
45
|
-
# git_ls
|
data/git_index.gemspec
CHANGED
@@ -27,10 +27,12 @@ Gem::Specification.new do |spec|
|
|
27
27
|
|
28
28
|
spec.require_paths = ['lib']
|
29
29
|
|
30
|
+
spec.add_development_dependency 'benchmark-ips'
|
30
31
|
spec.add_development_dependency 'bundler', '>= 2'
|
31
32
|
spec.add_development_dependency 'leftovers'
|
32
33
|
spec.add_development_dependency 'pry', '> 0'
|
33
34
|
spec.add_development_dependency 'rake', '>= 12.3.3'
|
35
|
+
spec.add_development_dependency 'rspec'
|
34
36
|
spec.add_development_dependency 'rubocop'
|
35
37
|
spec.add_development_dependency 'rubocop-performance'
|
36
38
|
spec.add_development_dependency 'rubocop-rspec'
|
data/lib/git_ls.rb
CHANGED
@@ -3,105 +3,100 @@
|
|
3
3
|
# Usage:
|
4
4
|
# GitLS.files -> Array of strings as files.
|
5
5
|
# This will be identical output to git ls-files
|
6
|
+
require 'stringio'
|
7
|
+
|
6
8
|
module GitLS # rubocop:disable Metrics/ModuleLength
|
7
9
|
class Error < StandardError; end
|
8
10
|
|
9
11
|
class << self
|
10
|
-
def files(path =
|
11
|
-
|
12
|
-
end
|
12
|
+
def files(path = nil)
|
13
|
+
path = path ? ::File.join(path, '.git/index') : '.git/index'
|
13
14
|
|
14
|
-
|
15
|
-
read(path, true)
|
15
|
+
read(path, false)
|
16
16
|
end
|
17
17
|
|
18
18
|
private
|
19
19
|
|
20
|
-
def read(path,
|
21
|
-
|
22
|
-
|
20
|
+
def read(path, _return_headers_only) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
21
|
+
begin
|
22
|
+
# reading the whole file into memory is faster than lots of ::File#read
|
23
|
+
# the biggest it's going to be is 10s of megabytes, well within ram.
|
24
|
+
file = ::StringIO.new(::File.read(path, mode: 'rb'))
|
25
|
+
rescue ::Errno::ENOENT => e
|
26
|
+
raise ::GitLS::Error, "Not a git directory: #{e.message}"
|
27
|
+
end
|
28
|
+
|
23
29
|
buf = ::String.new
|
24
30
|
# 4-byte signature:
|
25
31
|
# The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache")
|
26
32
|
# 4-byte version number:
|
27
33
|
# The current supported versions are 2, 3 and 4.
|
28
34
|
# 32-bit number of index entries.
|
29
|
-
sig
|
30
|
-
raise ::GitLS::Error, ".git/index file not found at #{path}" unless sig == 'DIRC'
|
35
|
+
sig = file.read(4, buf)
|
36
|
+
raise ::GitLS::Error, ".git/index file not found at '#{path}'" unless sig == 'DIRC'
|
31
37
|
|
32
|
-
|
38
|
+
git_index_version = file.read(4, buf).unpack1('N')
|
39
|
+
entries = file.read(4, buf).unpack1('N')
|
33
40
|
|
34
|
-
files = ::Array.new(
|
35
|
-
case git_index_version
|
41
|
+
files = ::Array.new(entries)
|
42
|
+
files = case git_index_version
|
36
43
|
when 2 then files_2(files, file)
|
37
44
|
when 3 then files_3(files, file)
|
38
45
|
when 4 then files_4(files, file)
|
39
|
-
else raise ::GitLS::Error,
|
46
|
+
else raise ::GitLS::Error, "Unrecognized git index version '#{git_index_version}'"
|
40
47
|
end
|
41
48
|
|
42
|
-
|
43
|
-
files
|
44
|
-
rescue ::Errno::ENOENT => e
|
45
|
-
raise ::GitLS::Error, "Not a git directory: #{e.message}"
|
46
|
-
ensure
|
47
|
-
# :nocov:
|
48
|
-
# coverage tracking for branches in ensure blocks is weird
|
49
|
-
file&.close
|
50
|
-
# :nocov:
|
51
|
-
files
|
49
|
+
read_extensions(files, file, path, buf)
|
52
50
|
end
|
53
51
|
|
54
|
-
def
|
55
|
-
|
56
|
-
|
57
|
-
|
52
|
+
def read_extensions(files, file, path, buf) # rubocop:disable Metrics/MethodLength
|
53
|
+
extension = file.read(4, buf)
|
54
|
+
if extension == 'link'
|
55
|
+
read_link_extension(files, file, path, buf)
|
56
|
+
elsif extension.match?(/\A[A-Z]{4}\z/)
|
57
|
+
size = file.read(4, buf).unpack1('N')
|
58
|
+
file.seek(size, 1)
|
59
|
+
read_extensions(files, file, path, buf)
|
58
60
|
else
|
59
|
-
return if
|
61
|
+
return files if file.seek(16, 1) && file.eof?
|
60
62
|
|
61
|
-
raise ::GitLS::Error, "Unrecognized .git/index extension #{
|
63
|
+
raise ::GitLS::Error, "Unrecognized .git/index extension #{extension.inspect}"
|
62
64
|
end
|
63
65
|
end
|
64
66
|
|
65
|
-
def
|
66
|
-
|
67
|
-
file.pos += size
|
68
|
-
extensions(files, file, buf)
|
69
|
-
end
|
70
|
-
|
71
|
-
def link_extension(files, file, buf) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
72
|
-
file.pos += 4 # size = file.read(4, buf).unpack1('N')
|
67
|
+
def read_link_extension(files, file, path, buf) # rubocop:disable Metrics/MethodLength
|
68
|
+
file.seek(4, 1) # skip size
|
73
69
|
|
74
70
|
sha = file.read(20, buf)
|
75
71
|
|
76
|
-
|
77
|
-
|
78
|
-
files.replace files("#{::File.dirname(file.path)}/sharedindex.#{sha.unpack1('H*')}")
|
72
|
+
split_files = read("#{::File.dirname(path)}/sharedindex.#{sha.unpack1('H*')}", false)
|
79
73
|
|
80
74
|
ewah_each_value(file, buf) do |pos|
|
81
|
-
|
75
|
+
split_files[pos] = nil
|
82
76
|
end
|
83
77
|
|
84
78
|
ewah_each_value(file, buf) do |pos|
|
85
|
-
replacement_file =
|
79
|
+
replacement_file = files.shift
|
86
80
|
# the documentation *implies* that this *may* get a new filename
|
87
81
|
# i can't get it to happen though
|
88
82
|
# :nocov:
|
89
|
-
|
83
|
+
split_files[pos] = replacement_file unless replacement_file.empty?
|
90
84
|
# :nocov:
|
91
85
|
end
|
92
86
|
|
93
|
-
|
94
|
-
|
95
|
-
|
87
|
+
split_files.compact!
|
88
|
+
split_files.concat(files)
|
89
|
+
split_files.sort!
|
96
90
|
|
97
|
-
|
91
|
+
read_extensions(split_files, file, path, buf)
|
98
92
|
end
|
99
93
|
|
100
94
|
# format is defined here:
|
101
95
|
# https://git-scm.com/docs/bitmap-format#_appendix_a_serialization_format_for_an_ewah_bitmap
|
102
96
|
def ewah_each_value(file, buf) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
103
97
|
uncompressed_pos = 0
|
104
|
-
|
98
|
+
|
99
|
+
file.seek(4, 1) # skip 4 byte uncompressed_bits_count.
|
105
100
|
compressed_bytes = file.read(4, buf).unpack1('N') * 8
|
106
101
|
|
107
102
|
final_file_pos = file.pos + compressed_bytes
|
@@ -110,10 +105,10 @@ module GitLS # rubocop:disable Metrics/ModuleLength
|
|
110
105
|
run_length_word = file.read(8, buf).unpack1('Q>')
|
111
106
|
# 1st bit
|
112
107
|
run_bit = run_length_word & 1
|
113
|
-
# the next 32 bits, masked, multiplied by 64
|
114
|
-
run_length = ((run_length_word
|
108
|
+
# the next 32 bits, masked, multiplied by 64
|
109
|
+
run_length = ((run_length_word / 0b1_0) & 0xFFFF_FFFF) * 64
|
115
110
|
# the next 31 bits
|
116
|
-
literal_length = (run_length_word
|
111
|
+
literal_length = (run_length_word / 0b100000000_00000000_00000000_00000000_0)
|
117
112
|
|
118
113
|
if run_bit == 1
|
119
114
|
run_length.times do
|
@@ -124,9 +119,11 @@ module GitLS # rubocop:disable Metrics/ModuleLength
|
|
124
119
|
uncompressed_pos += run_length
|
125
120
|
end
|
126
121
|
|
127
|
-
literal_length
|
128
|
-
|
129
|
-
|
122
|
+
next unless literal_length > 0
|
123
|
+
|
124
|
+
words = file.read(8 * literal_length, buf).unpack('B64' * literal_length)
|
125
|
+
words.each do |word|
|
126
|
+
word.each_char.reverse_each do |char|
|
130
127
|
yield(uncompressed_pos) if char == '1'
|
131
128
|
|
132
129
|
uncompressed_pos += 1
|
@@ -134,47 +131,52 @@ module GitLS # rubocop:disable Metrics/ModuleLength
|
|
134
131
|
end
|
135
132
|
end
|
136
133
|
|
137
|
-
file.
|
134
|
+
file.seek(4, 1) # bitmap metadata for adding to bitmaps
|
138
135
|
end
|
139
136
|
|
140
137
|
def files_2(files, file) # rubocop:disable Metrics/MethodLength
|
141
138
|
files.map! do
|
142
|
-
file.
|
143
|
-
|
139
|
+
file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
|
140
|
+
|
141
|
+
length = (file.getbyte & 0xF) * 256 + file.getbyte # find the 12 byte length
|
144
142
|
if length < 0xFFF
|
145
143
|
path = file.read(length)
|
146
144
|
# :nocov:
|
147
145
|
else
|
148
146
|
# i can't test this i just get ENAMETOOLONG a lot
|
149
|
-
path
|
150
|
-
|
147
|
+
# I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
|
148
|
+
# and length is a 12 byte number: 4096 max.
|
149
|
+
path = file.readline("\0").chop!
|
150
|
+
file.seek(-1, 1)
|
151
151
|
# :nocov:
|
152
152
|
end
|
153
|
-
file.
|
153
|
+
file.seek(8 - ((length - 2) % 8), 1) # 1-8 bytes padding of nuls
|
154
|
+
path.force_encoding(Encoding::UTF_8)
|
154
155
|
path
|
155
156
|
end
|
156
157
|
end
|
157
158
|
|
158
159
|
def files_3(files, file) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
159
160
|
files.map! do
|
160
|
-
file.
|
161
|
+
file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
|
162
|
+
flags = file.getbyte
|
163
|
+
extended_flag = (flags & 0b0100_0000) > 0
|
164
|
+
length = (flags & 0xF) * 256 + file.getbyte # find the 12 byte length
|
165
|
+
file.seek(2, 1) if extended_flag
|
161
166
|
|
162
|
-
flags = file.getbyte * 256 + file.getbyte
|
163
|
-
extended_flag = (flags & 0b0100_0000_0000_0000).positive?
|
164
|
-
file.pos += 2 if extended_flag
|
165
|
-
|
166
|
-
length = flags & 0b0000_1111_1111_1111
|
167
167
|
if length < 0xFFF
|
168
168
|
path = file.read(length)
|
169
169
|
# :nocov:
|
170
170
|
else
|
171
171
|
# i can't test this i just get ENAMETOOLONG a lot
|
172
|
-
path
|
173
|
-
|
172
|
+
# I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
|
173
|
+
# and length is a 12 byte number: 4096 max.
|
174
|
+
path = file.readline("\0").chop!
|
175
|
+
file.seek(-1, 1)
|
174
176
|
# :nocov:
|
175
177
|
end
|
176
|
-
|
177
|
-
|
178
|
+
file.seek(8 - ((path.bytesize - (extended_flag ? 0 : 2)) % 8), 1) # 1-8 bytes padding of nuls
|
179
|
+
path.force_encoding(Encoding::UTF_8)
|
178
180
|
path
|
179
181
|
end
|
180
182
|
end
|
@@ -182,11 +184,11 @@ module GitLS # rubocop:disable Metrics/ModuleLength
|
|
182
184
|
def files_4(files, file) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
183
185
|
prev_entry_path = ''
|
184
186
|
files.map! do
|
185
|
-
file.
|
186
|
-
flags = file.getbyte
|
187
|
-
|
188
|
-
|
189
|
-
|
187
|
+
file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
|
188
|
+
flags = file.getbyte
|
189
|
+
extended_flag = (flags & 0b0100_0000) > 0
|
190
|
+
length = (flags & 0xF) * 256 + file.getbyte # find the 12 byte length
|
191
|
+
file.seek(2, 1) if extended_flag
|
190
192
|
|
191
193
|
# documentation for this number from
|
192
194
|
# https://git-scm.com/docs/pack-format#_original_version_1_pack_idx_files_have_the_following_format
|
@@ -199,8 +201,8 @@ module GitLS # rubocop:disable Metrics/ModuleLength
|
|
199
201
|
read_offset = 0
|
200
202
|
prev_read_offset = file.getbyte
|
201
203
|
n = 1
|
202
|
-
while (prev_read_offset & 0b1000_0000)
|
203
|
-
read_offset += (prev_read_offset
|
204
|
+
while (prev_read_offset & 0b1000_0000) > 0
|
205
|
+
read_offset += (prev_read_offset & 0b0111_1111)
|
204
206
|
read_offset += 2**(7 * n)
|
205
207
|
n += 1
|
206
208
|
prev_read_offset = file.getbyte
|
@@ -211,15 +213,19 @@ module GitLS # rubocop:disable Metrics/ModuleLength
|
|
211
213
|
|
212
214
|
if length < 0xFFF
|
213
215
|
rest = file.read(length - initial_part_length)
|
214
|
-
file.
|
216
|
+
file.seek(1, 1) # the NUL
|
215
217
|
# :nocov:
|
216
218
|
else
|
217
219
|
# i can't test this i just get ENAMETOOLONG a lot
|
218
|
-
|
220
|
+
# I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
|
221
|
+
# and length is a 12 byte number: 4096 max.
|
222
|
+
rest = file.readline("\0").chop!
|
223
|
+
file.seek(-1, 1)
|
219
224
|
# :nocov:
|
220
225
|
end
|
221
226
|
|
222
227
|
prev_entry_path = prev_entry_path.byteslice(0, initial_part_length) + rest
|
228
|
+
prev_entry_path.force_encoding(Encoding::UTF_8)
|
223
229
|
end
|
224
230
|
end
|
225
231
|
end
|
data/lib/git_ls/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: git_ls
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dana Sherson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: benchmark-ips
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,6 +80,20 @@ dependencies:
|
|
66
80
|
- - ">="
|
67
81
|
- !ruby/object:Gem::Version
|
68
82
|
version: 12.3.3
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: rspec
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
69
97
|
- !ruby/object:Gem::Dependency
|
70
98
|
name: rubocop
|
71
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -183,7 +211,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
211
|
- !ruby/object:Gem::Version
|
184
212
|
version: '0'
|
185
213
|
requirements: []
|
186
|
-
rubygems_version: 3.
|
214
|
+
rubygems_version: 3.1.2
|
187
215
|
signing_key:
|
188
216
|
specification_version: 4
|
189
217
|
summary: Read a .git/index file and list the files
|