git_ls 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ef6ee0fb0ab70df2190d65de7389011c84036fff2993ebf65d90913746f7ff3
4
- data.tar.gz: 25c8eb1f6d58e830311b531f3aedb5f5302801bec42aded9e3df8d8e06f938a1
3
+ metadata.gz: 7736e59f5f16469e453f3f085e1473cef343eb79e51e75cb94c013de62a311ca
4
+ data.tar.gz: c9715b5c0080aea1d487b349d825f113d1ecf7d75a85fe76fb2cd9648f3c16c0
5
5
  SHA512:
6
- metadata.gz: 31f293dba3526adcad121f36dd7afe09ca114a559581566e25ea6f60b071c566f29840aa2be46b61f6c3291f3da896b81f9b199e341ea535bb25695ae67381e8
7
- data.tar.gz: 79401fdfd2f6ac2497f3554ceff5d27b9e690f31e2395dbf84a429f4ae8b2c257cd3c6a4cff35f3f5768f662566951edb1e28f59b8efe4abeb22f4ca9569b922
6
+ metadata.gz: ca243045d1d33fbc49532f35c63d8ea2bc924d119934a39407a9bc427e0e9de493997b6e93be6b3d5245c4d3df129b8bb1b696f0df6849bebdd4fc76febb718a
7
+ data.tar.gz: 8ebacdfaa8a7723fd87cf650ea72cf70140633b7140403bd0cbc0f320bc1bdfb28bfe8eeeb9fe950d697f2357b4352e5029ac9c749684bca097a2f05b02b4a1a
@@ -118,6 +118,7 @@ Metrics/BlockLength:
118
118
  - describe
119
119
  - context
120
120
  - shared_examples
121
+ - benchmark
121
122
 
122
123
  Metrics/CyclomaticComplexity:
123
124
  Enabled: false
@@ -224,3 +225,20 @@ Style/HashTransformValues:
224
225
 
225
226
  Style/CommentedKeyword:
226
227
  Enabled: false
228
+
229
+ Style/CaseLikeIf:
230
+ Enabled: false
231
+
232
+ Style/NumericPredicate:
233
+ Enabled: false
234
+
235
+ Style/Semicolon:
236
+ Exclude:
237
+ - bin/benchmark
238
+
239
+ Naming/VariableNumber:
240
+ Enabled: false
241
+
242
+ Lint/Void:
243
+ Exclude:
244
+ - bin/benchmark
@@ -20,6 +20,7 @@ params
20
20
  pwd
21
21
  quotepath
22
22
  rdoc
23
+ readme
23
24
  rspec
24
25
  rubo
25
26
  rubocop
@@ -30,4 +31,6 @@ simplecov
30
31
  sudo
31
32
  tmp
32
33
  usr
34
+ utf
35
+ warmup
33
36
  yardoc
@@ -1,2 +1,4 @@
1
1
  ewah
2
+ ips
2
3
  untr
4
+ webpack
@@ -1,3 +1,6 @@
1
+ # 0.5.0
2
+ - Major Performance Improvements
3
+
1
4
  # 0.4.0
2
5
  - Handles split index files
3
6
 
data/Gemfile CHANGED
@@ -4,6 +4,3 @@ source 'https://rubygems.org'
4
4
 
5
5
  # Specify your gem's dependencies in git_index.gemspec
6
6
  gemspec
7
-
8
- gem 'rake', '>= 12.3.3'
9
- gem 'rspec', '~> 3.0'
data/README.md CHANGED
@@ -2,10 +2,9 @@
2
2
 
3
3
  Parses the .git/index file like `git ls-files` does.
4
4
 
5
- - for small repos (as in, anything smaller than rails),
6
- it can be faster than doing the system call to git
7
- - still takes less than half a second for very large repos e.g. the linux repo
5
+ - faster than doing the system call to git
8
6
  - doesn't require git to be installed
7
+ - tested against ruby 2.4 - 2.7 and jruby
9
8
 
10
9
  ## Installation
11
10
 
@@ -23,23 +22,30 @@ Or install it yourself as:
23
22
 
24
23
  $ gem install git_ls
25
24
 
25
+ And require
26
+ ```ruby
27
+ require 'git_ls'
28
+ ```
29
+
26
30
  ## Usage
27
31
 
28
- `GitLS.files` returns an array of filenames, equivalent to `` `git ls-files -z`.split("\0") ``
32
+ `GitLS.files` reads the `.git/index` file to return an array of file paths, equivalent to `` `git ls-files`.split("\n") ``, but faster, and without requiring git being installed.
33
+
29
34
  `GitLS.files("path/to/repo")` if the repo is not $PWD.
30
35
 
36
+ Strictly speaking it's equivalent to `` `git ls-files -c core.quotepath=off -z`.split("\0") ``, handling file paths with spaces and non-ascii characters, and returning file paths as UTF-8 strings.
37
+
31
38
  ## Development
32
39
 
33
- Have a look in the bin dir for some useful tools.
34
- To install this gem onto your local machine, run `bundle exec rake install`.
40
+ - Have a look in the bin dir for some useful tools.
41
+ - To install this gem onto your local machine, run `bundle exec rake install`.
42
+ - Run `rake` to run all tests & linters.
35
43
 
36
44
  ## Contributing
37
45
 
38
46
  Bug reports and pull requests are welcome on GitHub at https://github.com/robotdana/git_ls.
39
47
  If you're comfortable, please attach `.git/index` (and `.git/sharedindex.<sha>` if applicable) and the output of `git ls-files` where it doesn't match.
40
48
 
41
-
42
49
  ## License
43
50
 
44
51
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
45
- # git_ls
@@ -27,10 +27,12 @@ Gem::Specification.new do |spec|
27
27
 
28
28
  spec.require_paths = ['lib']
29
29
 
30
+ spec.add_development_dependency 'benchmark-ips'
30
31
  spec.add_development_dependency 'bundler', '>= 2'
31
32
  spec.add_development_dependency 'leftovers'
32
33
  spec.add_development_dependency 'pry', '> 0'
33
34
  spec.add_development_dependency 'rake', '>= 12.3.3'
35
+ spec.add_development_dependency 'rspec'
34
36
  spec.add_development_dependency 'rubocop'
35
37
  spec.add_development_dependency 'rubocop-performance'
36
38
  spec.add_development_dependency 'rubocop-rspec'
@@ -3,105 +3,100 @@
3
3
  # Usage:
4
4
  # GitLS.files -> Array of strings as files.
5
5
  # This will be identical output to git ls-files
6
+ require 'stringio'
7
+
6
8
  module GitLS # rubocop:disable Metrics/ModuleLength
7
9
  class Error < StandardError; end
8
10
 
9
11
  class << self
10
- def files(path = ::Dir.pwd)
11
- read(path, false)
12
- end
12
+ def files(path = nil)
13
+ path = path ? ::File.join(path, '.git/index') : '.git/index'
13
14
 
14
- def headers(path = ::Dir.pwd)
15
- read(path, true)
15
+ read(path, false)
16
16
  end
17
17
 
18
18
  private
19
19
 
20
- def read(path, return_headers_only) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
21
- path = ::File.join(path, '.git/index') if ::File.directory?(path)
22
- file = ::File.new(path)
20
+ def read(path, _return_headers_only) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
21
+ begin
22
+ # reading the whole file into memory is faster than lots of ::File#read
23
+ # the biggest it's going to be is 10s of megabytes, well within ram.
24
+ file = ::StringIO.new(::File.read(path, mode: 'rb'))
25
+ rescue ::Errno::ENOENT => e
26
+ raise ::GitLS::Error, "Not a git directory: #{e.message}"
27
+ end
28
+
23
29
  buf = ::String.new
24
30
  # 4-byte signature:
25
31
  # The signature is { 'D', 'I', 'R', 'C' } (stands for "dircache")
26
32
  # 4-byte version number:
27
33
  # The current supported versions are 2, 3 and 4.
28
34
  # 32-bit number of index entries.
29
- sig, git_index_version, length = file.read(12, buf).unpack('a4NN')
30
- raise ::GitLS::Error, ".git/index file not found at #{path}" unless sig == 'DIRC'
35
+ sig = file.read(4, buf)
36
+ raise ::GitLS::Error, ".git/index file not found at '#{path}'" unless sig == 'DIRC'
31
37
 
32
- return { git_index_version: git_index_version, length: length } if return_headers_only
38
+ git_index_version = file.read(4, buf).unpack1('N')
39
+ entries = file.read(4, buf).unpack1('N')
33
40
 
34
- files = ::Array.new(length)
35
- case git_index_version
41
+ files = ::Array.new(entries)
42
+ files = case git_index_version
36
43
  when 2 then files_2(files, file)
37
44
  when 3 then files_3(files, file)
38
45
  when 4 then files_4(files, file)
39
- else raise ::GitLS::Error, 'Unrecognized git index version'
46
+ else raise ::GitLS::Error, "Unrecognized git index version '#{git_index_version}'"
40
47
  end
41
48
 
42
- extensions(files, file, buf)
43
- files
44
- rescue ::Errno::ENOENT => e
45
- raise ::GitLS::Error, "Not a git directory: #{e.message}"
46
- ensure
47
- # :nocov:
48
- # coverage tracking for branches in ensure blocks is weird
49
- file&.close
50
- # :nocov:
51
- files
49
+ read_extensions(files, file, path, buf)
52
50
  end
53
51
 
54
- def extensions(files, file, buf)
55
- case file.read(4, buf)
56
- when 'link' then link_extension(files, file, buf)
57
- when /[A-Z]{4}/ then ignored_extension(files, file, buf)
52
+ def read_extensions(files, file, path, buf) # rubocop:disable Metrics/MethodLength
53
+ extension = file.read(4, buf)
54
+ if extension == 'link'
55
+ read_link_extension(files, file, path, buf)
56
+ elsif extension.match?(/\A[A-Z]{4}\z/)
57
+ size = file.read(4, buf).unpack1('N')
58
+ file.seek(size, 1)
59
+ read_extensions(files, file, path, buf)
58
60
  else
59
- return if (file.pos += 16) && file.eof?
61
+ return files if file.seek(16, 1) && file.eof?
60
62
 
61
- raise ::GitLS::Error, "Unrecognized .git/index extension #{buf.inspect}"
63
+ raise ::GitLS::Error, "Unrecognized .git/index extension #{extension.inspect}"
62
64
  end
63
65
  end
64
66
 
65
- def ignored_extension(files, file, buf)
66
- size = file.read(4, buf).unpack1('N')
67
- file.pos += size
68
- extensions(files, file, buf)
69
- end
70
-
71
- def link_extension(files, file, buf) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
72
- file.pos += 4 # size = file.read(4, buf).unpack1('N')
67
+ def read_link_extension(files, file, path, buf) # rubocop:disable Metrics/MethodLength
68
+ file.seek(4, 1) # skip size
73
69
 
74
70
  sha = file.read(20, buf)
75
71
 
76
- new_files = files.dup
77
-
78
- files.replace files("#{::File.dirname(file.path)}/sharedindex.#{sha.unpack1('H*')}")
72
+ split_files = read("#{::File.dirname(path)}/sharedindex.#{sha.unpack1('H*')}", false)
79
73
 
80
74
  ewah_each_value(file, buf) do |pos|
81
- files[pos] = nil
75
+ split_files[pos] = nil
82
76
  end
83
77
 
84
78
  ewah_each_value(file, buf) do |pos|
85
- replacement_file = new_files.shift
79
+ replacement_file = files.shift
86
80
  # the documentation *implies* that this *may* get a new filename
87
81
  # i can't get it to happen though
88
82
  # :nocov:
89
- files[pos] = replacement_file unless replacement_file.empty?
83
+ split_files[pos] = replacement_file unless replacement_file.empty?
90
84
  # :nocov:
91
85
  end
92
86
 
93
- files.compact!
94
- files.concat(new_files)
95
- files.sort!
87
+ split_files.compact!
88
+ split_files.concat(files)
89
+ split_files.sort!
96
90
 
97
- extensions(files, file, buf)
91
+ read_extensions(split_files, file, path, buf)
98
92
  end
99
93
 
100
94
  # format is defined here:
101
95
  # https://git-scm.com/docs/bitmap-format#_appendix_a_serialization_format_for_an_ewah_bitmap
102
96
  def ewah_each_value(file, buf) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
103
97
  uncompressed_pos = 0
104
- file.pos += 4 # uncompressed_bits_count = file.read(4, buf).unpack1('N')
98
+
99
+ file.seek(4, 1) # skip 4 byte uncompressed_bits_count.
105
100
  compressed_bytes = file.read(4, buf).unpack1('N') * 8
106
101
 
107
102
  final_file_pos = file.pos + compressed_bytes
@@ -110,10 +105,10 @@ module GitLS # rubocop:disable Metrics/ModuleLength
110
105
  run_length_word = file.read(8, buf).unpack1('Q>')
111
106
  # 1st bit
112
107
  run_bit = run_length_word & 1
113
- # the next 32 bits, masked, multiplied by 64 (which is shifted by 6 places)
114
- run_length = ((run_length_word >> 1) & 0xFFFF_FFFF) << 6
108
+ # the next 32 bits, masked, multiplied by 64
109
+ run_length = ((run_length_word / 0b1_0) & 0xFFFF_FFFF) * 64
115
110
  # the next 31 bits
116
- literal_length = (run_length_word >> 33)
111
+ literal_length = (run_length_word / 0b100000000_00000000_00000000_00000000_0)
117
112
 
118
113
  if run_bit == 1
119
114
  run_length.times do
@@ -124,9 +119,11 @@ module GitLS # rubocop:disable Metrics/ModuleLength
124
119
  uncompressed_pos += run_length
125
120
  end
126
121
 
127
- literal_length.times do
128
- word = file.read(8, buf).unpack1('B*').reverse
129
- word.each_char do |char|
122
+ next unless literal_length > 0
123
+
124
+ words = file.read(8 * literal_length, buf).unpack('B64' * literal_length)
125
+ words.each do |word|
126
+ word.each_char.reverse_each do |char|
130
127
  yield(uncompressed_pos) if char == '1'
131
128
 
132
129
  uncompressed_pos += 1
@@ -134,47 +131,52 @@ module GitLS # rubocop:disable Metrics/ModuleLength
134
131
  end
135
132
  end
136
133
 
137
- file.pos += 4 # bitmap metadata for adding to bitmaps
134
+ file.seek(4, 1) # bitmap metadata for adding to bitmaps
138
135
  end
139
136
 
140
137
  def files_2(files, file) # rubocop:disable Metrics/MethodLength
141
138
  files.map! do
142
- file.pos += 60 # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
143
- length = ((file.getbyte & 0b0000_1111) << 8) + file.getbyte # find the 12 byte length
139
+ file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
140
+
141
+ length = (file.getbyte & 0xF) * 256 + file.getbyte # find the 12 byte length
144
142
  if length < 0xFFF
145
143
  path = file.read(length)
146
144
  # :nocov:
147
145
  else
148
146
  # i can't test this i just get ENAMETOOLONG a lot
149
- path = file.readline("\0").chop
150
- file.pos -= 1
147
+ # I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
148
+ # and length is a 12 byte number: 4096 max.
149
+ path = file.readline("\0").chop!
150
+ file.seek(-1, 1)
151
151
  # :nocov:
152
152
  end
153
- file.pos += 8 - ((length - 2) % 8) # 1-8 bytes padding of nuls
153
+ file.seek(8 - ((length - 2) % 8), 1) # 1-8 bytes padding of nuls
154
+ path.force_encoding(Encoding::UTF_8)
154
155
  path
155
156
  end
156
157
  end
157
158
 
158
159
  def files_3(files, file) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
159
160
  files.map! do
160
- file.pos += 60 # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
161
+ file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
162
+ flags = file.getbyte
163
+ extended_flag = (flags & 0b0100_0000) > 0
164
+ length = (flags & 0xF) * 256 + file.getbyte # find the 12 byte length
165
+ file.seek(2, 1) if extended_flag
161
166
 
162
- flags = file.getbyte * 256 + file.getbyte
163
- extended_flag = (flags & 0b0100_0000_0000_0000).positive?
164
- file.pos += 2 if extended_flag
165
-
166
- length = flags & 0b0000_1111_1111_1111
167
167
  if length < 0xFFF
168
168
  path = file.read(length)
169
169
  # :nocov:
170
170
  else
171
171
  # i can't test this i just get ENAMETOOLONG a lot
172
- path = file.readline("\0").chop
173
- file.pos -= 1
172
+ # I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
173
+ # and length is a 12 byte number: 4096 max.
174
+ path = file.readline("\0").chop!
175
+ file.seek(-1, 1)
174
176
  # :nocov:
175
177
  end
176
-
177
- file.pos += 8 - ((path.bytesize - (extended_flag ? 0 : 2)) % 8) # 1-8 bytes padding of nuls
178
+ file.seek(8 - ((path.bytesize - (extended_flag ? 0 : 2)) % 8), 1) # 1-8 bytes padding of nuls
179
+ path.force_encoding(Encoding::UTF_8)
178
180
  path
179
181
  end
180
182
  end
@@ -182,11 +184,11 @@ module GitLS # rubocop:disable Metrics/ModuleLength
182
184
  def files_4(files, file) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
183
185
  prev_entry_path = ''
184
186
  files.map! do
185
- file.pos += 60 # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
186
- flags = file.getbyte * 256 + file.getbyte
187
- file.pos += 2 if (flags & 0b0100_0000_0000_0000).positive?
188
-
189
- length = flags & 0b0000_1111_1111_1111
187
+ file.seek(60, 1) # skip 60 bytes (40 bytes of stat, 20 bytes of sha)
188
+ flags = file.getbyte
189
+ extended_flag = (flags & 0b0100_0000) > 0
190
+ length = (flags & 0xF) * 256 + file.getbyte # find the 12 byte length
191
+ file.seek(2, 1) if extended_flag
190
192
 
191
193
  # documentation for this number from
192
194
  # https://git-scm.com/docs/pack-format#_original_version_1_pack_idx_files_have_the_following_format
@@ -199,8 +201,8 @@ module GitLS # rubocop:disable Metrics/ModuleLength
199
201
  read_offset = 0
200
202
  prev_read_offset = file.getbyte
201
203
  n = 1
202
- while (prev_read_offset & 0b1000_0000).positive?
203
- read_offset += (prev_read_offset - 0b1000_0000)
204
+ while (prev_read_offset & 0b1000_0000) > 0
205
+ read_offset += (prev_read_offset & 0b0111_1111)
204
206
  read_offset += 2**(7 * n)
205
207
  n += 1
206
208
  prev_read_offset = file.getbyte
@@ -211,15 +213,19 @@ module GitLS # rubocop:disable Metrics/ModuleLength
211
213
 
212
214
  if length < 0xFFF
213
215
  rest = file.read(length - initial_part_length)
214
- file.pos += 1 # the NUL
216
+ file.seek(1, 1) # the NUL
215
217
  # :nocov:
216
218
  else
217
219
  # i can't test this i just get ENAMETOOLONG a lot
218
- rest = file.readline("\0").chop
220
+ # I'm not sure it's even possible to get to this path, PATH_MAX is 4096 bytes on linux, 1024 on mac
221
+ # and length is a 12 byte number: 4096 max.
222
+ rest = file.readline("\0").chop!
223
+ file.seek(-1, 1)
219
224
  # :nocov:
220
225
  end
221
226
 
222
227
  prev_entry_path = prev_entry_path.byteslice(0, initial_part_length) + rest
228
+ prev_entry_path.force_encoding(Encoding::UTF_8)
223
229
  end
224
230
  end
225
231
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module GitLS
4
- VERSION = '0.4.0'
4
+ VERSION = '0.5.0'
5
5
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: git_ls
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dana Sherson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-05 00:00:00.000000000 Z
11
+ date: 2020-09-12 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: benchmark-ips
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: bundler
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -66,6 +80,20 @@ dependencies:
66
80
  - - ">="
67
81
  - !ruby/object:Gem::Version
68
82
  version: 12.3.3
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
69
97
  - !ruby/object:Gem::Dependency
70
98
  name: rubocop
71
99
  requirement: !ruby/object:Gem::Requirement
@@ -183,7 +211,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
211
  - !ruby/object:Gem::Version
184
212
  version: '0'
185
213
  requirements: []
186
- rubygems_version: 3.0.3
214
+ rubygems_version: 3.1.2
187
215
  signing_key:
188
216
  specification_version: 4
189
217
  summary: Read a .git/index file and list the files