git_ls 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ef6ee0fb0ab70df2190d65de7389011c84036fff2993ebf65d90913746f7ff3
4
- data.tar.gz: 25c8eb1f6d58e830311b531f3aedb5f5302801bec42aded9e3df8d8e06f938a1
3
+ metadata.gz: 7736e59f5f16469e453f3f085e1473cef343eb79e51e75cb94c013de62a311ca
4
+ data.tar.gz: c9715b5c0080aea1d487b349d825f113d1ecf7d75a85fe76fb2cd9648f3c16c0
5
5
  SHA512:
6
- metadata.gz: 31f293dba3526adcad121f36dd7afe09ca114a559581566e25ea6f60b071c566f29840aa2be46b61f6c3291f3da896b81f9b199e341ea535bb25695ae67381e8
7
- data.tar.gz: 79401fdfd2f6ac2497f3554ceff5d27b9e690f31e2395dbf84a429f4ae8b2c257cd3c6a4cff35f3f5768f662566951edb1e28f59b8efe4abeb22f4ca9569b922
6
+ metadata.gz: ca243045d1d33fbc49532f35c63d8ea2bc924d119934a39407a9bc427e0e9de493997b6e93be6b3d5245c4d3df129b8bb1b696f0df6849bebdd4fc76febb718a
7
+ data.tar.gz: 8ebacdfaa8a7723fd87cf650ea72cf70140633b7140403bd0cbc0f320bc1bdfb28bfe8eeeb9fe950d697f2357b4352e5029ac9c749684bca097a2f05b02b4a1a
@@ -118,6 +118,7 @@ Metrics/BlockLength:
118
118
  - describe
119
119
  - context
120
120
  - shared_examples
121
+ - benchmark
121
122
 
122
123
  Metrics/CyclomaticComplexity:
123
124
  Enabled: false
@@ -224,3 +225,20 @@ Style/HashTransformValues:
224
225
 
225
226
  Style/CommentedKeyword:
226
227
  Enabled: false
228
+
229
+ Style/CaseLikeIf:
230
+ Enabled: false
231
+
232
+ Style/NumericPredicate:
233
+ Enabled: false
234
+
235
+ Style/Semicolon:
236
+ Exclude:
237
+ - bin/benchmark
238
+
239
+ Naming/VariableNumber:
240
+ Enabled: false
241
+
242
+ Lint/Void:
243
+ Exclude:
244
+ - bin/benchmark
@@ -20,6 +20,7 @@ params
20
20
  pwd
21
21
  quotepath
22
22
  rdoc
23
+ readme
23
24
  rspec
24
25
  rubo
25
26
  rubocop
@@ -30,4 +31,6 @@ simplecov
30
31
  sudo
31
32
  tmp
32
33
  usr
34
+ utf
35
+ warmup
33
36
  yardoc
@@ -1,2 +1,4 @@
1
1
  ewah
2
+ ips
2
3
  untr
4
+ webpack
@@ -1,3 +1,6 @@
1
+ # 0.5.0
2
+ - Major Performance Improvements
3
+
1
4
  # 0.4.0
2
5
  - Handles split index files
3
6
 
data/Gemfile CHANGED
@@ -4,6 +4,3 @@ source 'https://rubygems.org'
4
4
 
5
5
  # Specify your gem's dependencies in git_index.gemspec
6
6
  gemspec
7
-
8
- gem 'rake', '>= 12.3.3'
9
- gem 'rspec', '~> 3.0'
data/README.md CHANGED
@@ -2,10 +2,9 @@
2
2
 
3
3
  Parses the .git/index file like `git ls-files` does.
4
4
 
5
- - for small repos (as in, anything smaller than rails),
6
- it can be faster than doing the system call to git
7
- - still takes less than half a second for very large repos e.g. the linux repo
5
+ - faster than doing the system call to git
8
6
  - doesn't require git to be installed
7
+ - tested against ruby 2.4 - 2.7 and jruby
9
8
 
10
9
  ## Installation
11
10
 
@@ -23,23 +22,30 @@ Or install it yourself as:
23
22
 
24
23
  $ gem install git_ls
25
24
 
25
+ And require
26
+ ```ruby
27
+ require 'git_ls'
28
+ ```
29
+
26
30
  ## Usage
27
31
 
28
- `GitLS.files` returns an array of filenames, equivalent to `` `git ls-files -z`.split("\0") ``
32
+ `GitLS.files` reads the `.git/index` file to return an array of file paths, equivalent to `` `git ls-files`.split("\n") ``, but faster, and without requiring git being installed.
33
+
29
34
  `GitLS.files("path/to/repo")` if the repo is not $PWD.
30
35
 
36
+ Strictly speaking it's equivalent to `` `git ls-files -c core.quotepath=off -z`.split("\0") ``, handling file paths with spaces and non-ascii characters, and returning file paths as UTF-8 strings.
37
+
31
38
  ## Development
32
39
 
33
- Have a look in the bin dir for some useful tools.
34
- To install this gem onto your local machine, run `bundle exec rake install`.
40
+ - Have a look in the bin dir for some useful tools.
41
+ - To install this gem onto your local machine, run `bundle exec rake install`.
42
+ - Run `rake` to run all tests & linters.
35
43
 
36
44
  ## Contributing
37
45
 
38
46
  Bug reports and pull requests are welcome on GitHub at https://github.com/robotdana/git_ls.
39
47
  If you're comfortable, please attach `.git/index` (and `.git/sharedindex.<sha>` if applicable) and the output of `git ls-files` where it doesn't match.
40
48
 
41
-
42
49
  ## License
43
50
 
44
51
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
45
- # git_ls
@@ -27,10 +27,12 @@ Gem::Specification.new do |spec|
27
27
 
28
28
  spec.require_paths = ['lib']
29
29
 
30
+ spec.add_development_dependency 'benchmark-ips'
30
31
  spec.add_development_dependency 'bundler', '>= 2'
31
32
  spec.add_development_dependency 'leftovers'
32
33
  spec.add_development_dependency 'pry', '> 0'
33
34
  spec.add_development_dependency 'rake', '>= 12.3.3'
35
+ spec.add_development_dependency 'rspec'
34
36
  spec.add_development_dependency 'rubocop'
35
37
  spec.add_development_dependency 'rubocop-performance'
36
38
  spec.add_development_dependency 'rubocop-rspec'
@@ -3,105 +3,100 @@
3
3
  # Usage:
4
4
  # GitLS.files -> Array of strings as files.
5
5
  # This will be identical output to git ls-files
6
+ require 'stringio'
7
+
6
8
  module GitLS # rubocop:disable Metrics/ModuleLength
7
9
  class Error < StandardError; end
8
10
 
9
11
  class << self
10
- def files(path = ::Dir.pwd)
11
- read(path, false)
12
- end
12
+ def files(path = nil)
13
+ path = path ? ::File.join(path, '.git/index') : '.git/index'
13
14
 
14
- def headers(path = ::Dir.pwd)
15
- read(path, true)
15
+ read(path, false)
16
16
  end
17
17
 
18
18
  private
19
19
 
20
- def read(path, return_headers_only) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
21
- path = ::File.join(path, '.git/index') if ::File.directory?(path)
22
- file = ::File.new(path)
20
+ def read(path, _return_headers_only) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
21
+ begin
22
+ # reading the whole file into memory is faster than lots of ::File#read
23
+ # the biggest it's going to be is 10s of megabytes, well within ram.
24
+ file = ::StringIO.new(::File.read(path, mode: 'rb'))
25
+ rescue ::Errno::ENOENT => e
26
+ raise ::GitLS::Error, "Not a git directory: #{e.message}"
27
+ end
28
+
23
29
  buf = ::String.new
24
30
  # 4-byte signature:
25
31
  # The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache")
26
32
  # 4-byte version number:
27
33
  # The current supported versions are 2, 3 and 4.
28
34
  # 32-bit number of index entries.
29
- sig, git_index_version, length = file.read(12, buf).unpack('a4NN')
30
- raise ::GitLS::Error, ".git/index file not found at #{path}" unless sig == 'DIRC'
35
+ sig = file.read(4, buf)
36
+ raise ::GitLS::Error, ".git/index file not found at '#{path}'" unless sig == 'DIRC'
31
37
 
32
- return { git_index_version: git_index_version, length: length } if return_headers_only
38
+ git_index_version = file.read(4, buf).unpack1('N')
39
+ entries = file.read(4, buf).unpack1('N')
33
40
 
34
- files = ::Array.new(length)
35
- case git_index_version
41
+ files = ::Array.new(entries)
42
+ files = case git_index_version
36
43
  when 2 then files_2(files, file)
37
44
  when 3 then files_3(files, file)
38
45
  when 4 then files_4(files, file)
39
- else raise ::GitLS::Error, 'Unrecognized git index version'
46
+ else raise ::GitLS::Error, "Unrecognized git index version '#{git_index_version}'"
40
47
  end
41
48
 
42
- extensions(files, file, buf)
43
- files
44
- rescue ::Errno::ENOENT => e
45
- raise ::GitLS::Error, "Not a git directory: #{e.message}"
46
- ensure
47
- # :nocov:
48
- # coverage tracking for branches in ensure blocks is weird
49
- file&.close
50
- # :nocov:
51
- files
49
+ read_extensions(files, file, path, buf)
52
50
  end
53
51
 
54
- def extensions(files, file, buf)
55
- case file.read(4, buf)
56
- when 'link' then link_extension(files, file, buf)
57
- when /[A-Z]{4}/ then ignored_extension(files, file, buf)
52
+ def read_extensions(files, file, path, buf) # rubocop:disable Metrics/MethodLength
53
+ extension = file.read(4, buf)
54
+ if extension == 'link'
55
+ read_link_extension(files, file, path, buf)
56
+ elsif extension.match?(/\A[A-Z]{4}\z/)
57
+ size = file.read(4, buf).unpack1('N')
58
+ file.seek(size, 1)
59
+ read_extensions(files, file, path, buf)
58
60
  else
59
- return if (file.pos += 16) && file.eof?
61
+ return files if file.seek(16, 1) && file.eof?
60
62
 
61
- raise ::GitLS::Error, "Unrecognized .git/index extension #{buf.inspect}"
63
+ raise ::GitLS::Error, "Unrecognized .git/index extension #{extension.inspect}"
62
64
  end
63
65
  end
64
66
 
65
- def ignored_extension(files, file, buf)
66
- size = file.read(4, buf).unpack1('N')
67
- file.pos += size
68
- extensions(files, file, buf)
69
- end
70
-
71
- def link_extension(files, file, buf) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
72
- file.pos += 4 # size = file.read(4, buf).unpack1('N')
67
+ def read_link_extension(files, file, path, buf) # rubocop:disable Metrics/MethodLength
68
+ file.seek(4, 1) # skip size
73
69
 
74
70
  sha = file.read(20, buf)
75
71
 
76
- new_files = files.dup
77
-
78
- files.replace files("#{::File.dirname(file.path)}/sharedindex.#{sha.unpack1('H*')}")
72
+ split_files = read("#{::File.dirname(path)}/sharedindex.#{sha.unpack1('H*')}", false)
79
73
 
80
74
  ewah_each_value(file, buf) do |pos|
81
- files[pos] = nil
75
+ split_files[pos] = nil
82
76
  end
83
77
 
84
78
  ewah_each_value(file, buf) do |pos|
85
- replacement_file = new_files.shift
79
+ replacement_file = files.shift
86
80
  # the documentation *implies* that this *may* get a new filename
87
81
  # i can't get it to happen though
88
82
  # :nocov:
89
- files[pos] = replacement_file unless replacement_file.empty?
83
+ split_files[pos] = replacement_file unless replacement_file.empty?
90
84
  # :nocov:
91
85
  end
92
86
 
93
- files.compact!
94
- files.concat(new_files)
95
- files.sort!
87
+ split_files.compact!
88
+ split_files.concat(files)
89
+ split_files.sort!
96
90
 
97
- extensions(files, file, buf)
91
+ read_extensions(split_files, file, path, buf)
98
92
  end
99
93
 
100
94
  # format is defined here:
101
95
  # https://git-scm.com/docs/bitmap-format#_appendix_a_serialization_format_for_an_ewah_bitmap
102
96
  def ewah_each_value(file, buf) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
103
97
  uncompressed_pos = 0
104
- file.pos += 4 # uncompressed_bits_count = file.read(4, buf).unpack1('N')
98
+
99
+ file.seek(4, 1) # skip 4 byte uncompressed_bits_count.
105
100
  compressed_bytes = file.read(4, buf).unpack1('N') * 8
106
101
 
107
102
  final_file_pos = file.pos + compressed_bytes
@@ -110,10 +105,10 @@ module GitLS # rubocop:disable Metrics/ModuleLength
110
105
  run_length_word = file.read(8, buf).unpack1('Q>')
111
106
  # 1st bit
112
107
  run_bit = run_length_word & 1
113
- # the next 32 bits, masked, multiplied by 64 (which is shifted by 6 places)
114
- run_length = ((run_length_word >> 1) & 0xFFFF_FFFF) << 6
108
+ # the next 32 bits, masked, multiplied by 64
109
+ run_length = ((run_length_word / 0b1_0) & 0xFFFF_FFFF) * 64
115
110
  # the next 31 bits
116
- literal_length = (run_length_word >> 33)
111
+ literal_length = (run_length_word / 0b100000000_00000000_00000000_00000000_0)
117
112
 
118
113
  if run_bit == 1
119
114
  run_length.times do
@@ -124,9 +119,11 @@ module GitLS # rubocop:disable Metrics/ModuleLength
124
119
  uncompressed_pos += run_length
125
120
  end
126
121
 
127
- literal_length.times do
128
- word = file.read(8, buf).unpack1('B*').reverse
129
- word.each_char do |char|
122
+ next unless literal_length > 0
123
+
124
+ words = file.read(8 * literal_length, buf).unpack('B64' * literal_length)
125
+ words.each do |word|
126
+ word.each_char.reverse_each do |char|
130
127
  yield(uncompressed_pos) if char == '1'
131
128
 
132
129
  uncompressed_pos += 1
@@ -134,47 +131,52 @@ module GitLS # rubocop:disable Metrics/ModuleLength
134
131
  end
135
132
  end
136
133
 
137
- file.pos += 4 # bitmap metadata for adding to bitmaps
134
+ file.seek(4, 1) # bitmap metadata for adding to bitmaps
138
135
  end
139
136
 
140
137
  def files_2(files, file) # rubocop:disable Metrics/MethodLength
141
138
  files.map! do
142
- file.pos += 60 # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
143
- length = ((file.getbyte & 0b0000_1111) << 8) + file.getbyte # find the 12 byte length
139
+ file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
140
+
141
+ length = (file.getbyte & 0xF) * 256 + file.getbyte # find the 12 byte length
144
142
  if length < 0xFFF
145
143
  path = file.read(length)
146
144
  # :nocov:
147
145
  else
148
146
  # i can't test this i just get ENAMETOOLONG a lot
149
- path = file.readline("\0").chop
150
- file.pos -= 1
147
+ # I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
148
+ # and length is a 12 byte number: 4096 max.
149
+ path = file.readline("\0").chop!
150
+ file.seek(-1, 1)
151
151
  # :nocov:
152
152
  end
153
- file.pos += 8 - ((length - 2) % 8) # 1-8 bytes padding of nuls
153
+ file.seek(8 - ((length - 2) % 8), 1) # 1-8 bytes padding of nuls
154
+ path.force_encoding(Encoding::UTF_8)
154
155
  path
155
156
  end
156
157
  end
157
158
 
158
159
  def files_3(files, file) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
159
160
  files.map! do
160
- file.pos += 60 # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
161
+ file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
162
+ flags = file.getbyte
163
+ extended_flag = (flags & 0b0100_0000) > 0
164
+ length = (flags & 0xF) * 256 + file.getbyte # find the 12 byte length
165
+ file.seek(2, 1) if extended_flag
161
166
 
162
- flags = file.getbyte * 256 + file.getbyte
163
- extended_flag = (flags & 0b0100_0000_0000_0000).positive?
164
- file.pos += 2 if extended_flag
165
-
166
- length = flags & 0b0000_1111_1111_1111
167
167
  if length < 0xFFF
168
168
  path = file.read(length)
169
169
  # :nocov:
170
170
  else
171
171
  # i can't test this i just get ENAMETOOLONG a lot
172
- path = file.readline("\0").chop
173
- file.pos -= 1
172
+ # I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
173
+ # and length is a 12 byte number: 4096 max.
174
+ path = file.readline("\0").chop!
175
+ file.seek(-1, 1)
174
176
  # :nocov:
175
177
  end
176
-
177
- file.pos += 8 - ((path.bytesize - (extended_flag ? 0 : 2)) % 8) # 1-8 bytes padding of nuls
178
+ file.seek(8 - ((path.bytesize - (extended_flag ? 0 : 2)) % 8), 1) # 1-8 bytes padding of nuls
179
+ path.force_encoding(Encoding::UTF_8)
178
180
  path
179
181
  end
180
182
  end
@@ -182,11 +184,11 @@ module GitLS # rubocop:disable Metrics/ModuleLength
182
184
  def files_4(files, file) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
183
185
  prev_entry_path = ''
184
186
  files.map! do
185
- file.pos += 60 # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
186
- flags = file.getbyte * 256 + file.getbyte
187
- file.pos += 2 if (flags & 0b0100_0000_0000_0000).positive?
188
-
189
- length = flags & 0b0000_1111_1111_1111
187
+ file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
188
+ flags = file.getbyte
189
+ extended_flag = (flags & 0b0100_0000) > 0
190
+ length = (flags & 0xF) * 256 + file.getbyte # find the 12 byte length
191
+ file.seek(2, 1) if extended_flag
190
192
 
191
193
  # documentation for this number from
192
194
  # https://git-scm.com/docs/pack-format#_original_version_1_pack_idx_files_have_the_following_format
@@ -199,8 +201,8 @@ module GitLS # rubocop:disable Metrics/ModuleLength
199
201
  read_offset = 0
200
202
  prev_read_offset = file.getbyte
201
203
  n = 1
202
- while (prev_read_offset & 0b1000_0000).positive?
203
- read_offset += (prev_read_offset - 0b1000_0000)
204
+ while (prev_read_offset & 0b1000_0000) > 0
205
+ read_offset += (prev_read_offset & 0b0111_1111)
204
206
  read_offset += 2**(7 * n)
205
207
  n += 1
206
208
  prev_read_offset = file.getbyte
@@ -211,15 +213,19 @@ module GitLS # rubocop:disable Metrics/ModuleLength
211
213
 
212
214
  if length < 0xFFF
213
215
  rest = file.read(length - initial_part_length)
214
- file.pos += 1 # the NUL
216
+ file.seek(1, 1) # the NUL
215
217
  # :nocov:
216
218
  else
217
219
  # i can't test this i just get ENAMETOOLONG a lot
218
- rest = file.readline("\0").chop
220
+ # I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
221
+ # and length is a 12 byte number: 4096 max.
222
+ rest = file.readline("\0").chop!
223
+ file.seek(-1, 1)
219
224
  # :nocov:
220
225
  end
221
226
 
222
227
  prev_entry_path = prev_entry_path.byteslice(0, initial_part_length) + rest
228
+ prev_entry_path.force_encoding(Encoding::UTF_8)
223
229
  end
224
230
  end
225
231
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GitLS
4
- VERSION = '0.4.0'
4
+ VERSION = '0.5.0'
5
5
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: git_ls
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dana Sherson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-05 00:00:00.000000000 Z
11
+ date: 2020-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: benchmark-ips
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: bundler
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -66,6 +80,20 @@ dependencies:
66
80
  - - ">="
67
81
  - !ruby/object:Gem::Version
68
82
  version: 12.3.3
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
69
97
  - !ruby/object:Gem::Dependency
70
98
  name: rubocop
71
99
  requirement: !ruby/object:Gem::Requirement
@@ -183,7 +211,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
211
  - !ruby/object:Gem::Version
184
212
  version: '0'
185
213
  requirements: []
186
- rubygems_version: 3.0.3
214
+ rubygems_version: 3.1.2
187
215
  signing_key:
188
216
  specification_version: 4
189
217
  summary: Read a .git/index file and list the files