packfile_reader 0.0.1 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3b8e93c293d659c69561f903785550ebdcf9cbb4eff620d5f711a2eb55e2452f
4
- data.tar.gz: 504bec783784ff1bd3233d7e39b1e53e3621239c79e54d6c54375358d69d504c
3
+ metadata.gz: 8eb4f8bc34379b5872964c517b718bbe1d5ee2488f02bc18983f3678be2b83da
4
+ data.tar.gz: 17effd8b76eca4b3bd7902b96b1a0c7494bdd977dbd11d9482ea31798cff5742
5
5
  SHA512:
6
- metadata.gz: 06f757f92ed4ea72c888784ce94ede9d4502da4ee840f986ee74fb4fc984387387eba316ed7fc54dbb604d7df295f38bf5df589c0fee5bb962b49eac9fb5ebf7
7
- data.tar.gz: 23dbeb6424004becebd49eb8823d14230a95fcd14e842054d3be46b54acfbee887e1d47d97f4f6f66e01d63b435d97331fe21daa27b6267f142230e5bdd14fad
6
+ metadata.gz: 66fda3747bc5d05ad5ae7ccd529831f5bf5491e4558fd1147bd562b4a7d9d157a9829d5fa5c5d9ae53dae36e66ecc8f4dec790f7eed7e35835cce44510fec0db
7
+ data.tar.gz: 7767f4c076cbfcafb99c4fd5219684eeba31b2081a9d86fcc4e9f8328159df86ee4950b497f8cb29148d7691803105a6e1e4e73f337df6800d9a1280b24073b7
data/README.md CHANGED
@@ -6,7 +6,7 @@ Git packs up several of "loose" objects into a single binary file called a “pa
6
6
 
7
7
  The `.idx` file contains offsets for all the objects in the `.pack` file, so it is easier to find the content you are looking for on the packfile.
8
8
 
9
- When we have both files, we can use `git verify-pack' command to read the content and metadata about the objects in the packfile, but sometimes we only have the `.pack` file, and in this case `git` is not really helpful.
9
+ When we have both files, we can use `git verify-pack` command to read the content and metadata about the objects in the packfile, but sometimes we only have the `.pack` file, and in this case `git` is not really helpful.
10
10
 
11
11
  ![packfile](packfile-format.png?raw=true "Packfile Format")
12
12
 
@@ -39,9 +39,10 @@ where [options] are:
39
39
  -i, --filter-by-ids=<s> Comma separated list of object ids to look for (default: any)
40
40
  -e, --expand-objects Whether to expand objects data
41
41
  -o, --output-dir=<s> Directory to store the expanded objects (default: .)
42
- -v, --version Print version and exit
42
+ -w, --window-size=<i> Size of the window (in bytes) to use when looking for objects (default: 10000)
43
+ -v, --verbose Log some debugging informaiton to stderr
44
+ -r, --version Print version and exit
43
45
  -l, --help Show this message
44
-
45
46
  ```
46
47
 
47
48
  ## Example:
@@ -114,5 +115,27 @@ $ cat /tmp/5297f8f21ad868d9eb6a9c01ad09a9d186177047.txt
114
115
  # test-git%
115
116
  ```
116
117
 
118
+ ### Debugging information
119
+
120
+ Passing `--verbose` to the command will add some debugging information to the output as well as the timestamp when the entry got processed at the beginning of the entry line.
121
+
122
+ ```
123
+ packfile_reader --no-headers -verbose pack.sample
124
+ ```
125
+
126
+ ```
127
+ [2020-12-01 21:24:43 -0800] 96438dd1e26e6963fa65be0012e8f6e84209bc5d OBJ_COMMIT 653
128
+ [2020-12-01 21:24:43 -0800] 5297f8f21ad868d9eb6a9c01ad09a9d186177047 OBJ_BLOB 10
129
+ [2020-12-01 21:24:44 -0800] bf195faf9d23ce0615cdefd2b746a077ef82f03f OBJ_TREE 37
130
+ ```
131
+
132
+ # Development
133
+
134
+ ## Testing
135
+
136
+ ```
137
+ rake test
138
+ ```
139
+
117
140
  # References
118
141
  - http://shafiul.github.io/gitbook/7_the_packfile.html
data/bin/packfile_reader CHANGED
@@ -4,27 +4,29 @@ require 'optimist'
4
4
  require 'packfile_reader'
5
5
 
6
6
  opts = Optimist::options do
7
- version "v0.0.1 (c) 2020 Robison WR Santos"
7
+ version "v0.0.2 (c) 2020 Robison WR Santos"
8
8
  banner <<~EOS
9
9
  This tool is used to parse and extract data from git packfiles without a .idx file.
10
10
  By default, the script will only report the object ids, their type and their deflated sizes.
11
11
  You can also make the script expand the content of the objects in the local directory or a directory
12
12
  of your choice.
13
-
13
+
14
14
  Usage:
15
15
  packfile_reader [options] <packfile>
16
16
  where [options] are:
17
17
  EOS
18
-
18
+
19
19
  opt :headers_only, 'Display only the headers of the packfile'
20
20
  opt :no_headers, 'Skip displaying the headers of the packfile'
21
21
  opt :filter_by_ids, 'Comma separated list of object ids to look for', :default => 'any', :short => '-i', :type => String
22
22
  opt :expand_objects, 'Whether to expand objects data', :default => false, :short => '-e'
23
23
  opt :output_dir, 'Directory to store the expanded objects', :default => '.', :short => '-o', :type => String
24
+ opt :window_size, 'Size of the window (in bytes) to use when looking for objects', :default => 10_000, :short => '-w', :type => Integer
25
+ opt :verbose, 'Log some debugging informaiton to stderr', :default => false, :short => '-v'
24
26
  end
25
-
27
+
26
28
  (puts "You must inform a single packfile, found #{ARGV.size}"; exit 1) if ARGV.size > 1 or ARGV.empty?
27
-
29
+
28
30
  packfile = ARGV.first
29
31
  (puts 'Packfile not found'; exit 2) unless File.exist?(packfile)
30
32
 
@@ -50,13 +52,14 @@ File.open(packfile, 'rb') do |f|
50
52
  entries_processed = 0
51
53
  limit = objects_to_find == :any ? header.n_entries : objects_to_find.size
52
54
  (0...limit).each do
53
- entry = PackfileReader::PackfileEntry.next_entry(f, objects_to_find) do |c,u,id|
55
+ entry = PackfileReader::PackfileEntry.next_entry(f, objects_to_find, opts[:verbose], opts[:window_size]) do |c,u,id|
54
56
  if opts[:expand_objects]
55
57
  dir = opts[:output_dir]
56
58
  File.open(File.join(dir, "#{id}.txt"), 'w') {|o| o.write u}
57
59
  end
58
60
  end
59
61
 
60
- puts "#{entry.id}\t#{entry.type}\t#{entry.size}"
62
+ timestamp = opts[:verbose] ? "[#{Time.now}] " : ''
63
+ puts "\u001b[0K#{timestamp}#{entry.id}\t#{entry.type}\t#{entry.size}"
61
64
  end
62
- end
65
+ end
@@ -16,8 +16,11 @@ module PackfileReader
16
16
  ]
17
17
 
18
18
  # Accepts a block that will receive the compressed data, uncompressed data and
19
- # the computed object id
20
- def self.next_entry(packfile_io, objects_to_find=:any)
19
+ # the computed object id. Window size is the amount of bytes to read at once
20
+ # while searching for the compressed data
21
+ def self.next_entry(packfile_io, objects_to_find=:any, log_verbose=false, window_size=10_000)
22
+ raise 'Object id must be a valid sha1' unless objects_to_find == :any || objects_to_find.all? {|id| /^[0-9a-f]{40}$/.match? id }
23
+
21
24
  loop do
22
25
  return nil if packfile_io.eof?
23
26
 
@@ -27,15 +30,19 @@ module PackfileReader
27
30
  size = hunk.size
28
31
  offset = hunk.offset_size
29
32
 
33
+ # Clean the current line before printing the message
34
+ $stderr.puts "\u001b[0K>>>> Processing new entry [#{type}]" if log_verbose
30
35
  while hunk.continuation?
31
36
  hunk = PackfileReader::Hunk.new_without_type(packfile_io)
32
37
  size = (hunk.size << offset) | size # Data size is a combination of all hunk sizes
33
38
  offset += hunk.offset_size
34
39
  end
35
40
 
36
- compressed_data, uncompressed_data = find_data(packfile_io)
41
+ compressed_data, uncompressed_data = find_data(packfile_io, log_verbose, window_size)
37
42
  object_id = compute_id(type, size, uncompressed_data)
38
43
 
44
+ type = "#{type} [CORRUPTED] " if uncompressed_data.nil?
45
+
39
46
  if objects_to_find == :any || objects_to_find.member?(object_id)
40
47
  yield compressed_data, uncompressed_data, object_id if block_given?
41
48
  return PackfileEntry.new(type, size, object_id)
@@ -44,23 +51,35 @@ module PackfileReader
44
51
  end
45
52
 
46
53
  private
47
- def self.find_data(packfile_io)
54
+ def self.find_data(packfile_io, log_verbose, window_size)
48
55
  data_header = find_zlib_data_header(packfile_io)
49
56
 
50
57
  # since we don't have the index file that accompanies pack files
51
58
  # we need to use brute force to find where the compressed data ends
52
- # to do that, we go byte by byte and try to deflate the data, when
59
+ # to do that, we go <window_size> bytes by <window_size> bytes and try to deflate the data, when
53
60
  # that succeedes, we know we go it all
54
61
  compressed_data = data_header
55
- compressed_data += packfile_io.read(1)
62
+ compressed_data += packfile_io.read(window_size)
56
63
 
64
+ bytes_read = compressed_data.size
57
65
  begin
58
66
  uncompressed_data = Zlib.inflate(compressed_data)
59
67
  rescue Zlib::BufError
60
- compressed_data += packfile_io.read(1)
68
+ compressed_data += packfile_io.read(window_size)
69
+ bytes_read = compressed_data.size
70
+ $stderr.print " .... retrying on data gathering [#{bytes_read}] bytes read\r" if log_verbose
61
71
  retry
72
+ rescue Zlib::DataError
73
+ uncompressed_data = nil
62
74
  end
63
75
 
76
+ $stderr.puts " .... data read [#{bytes_read}] bytes" if log_verbose
77
+ $stderr.print " .... repositioning file pointer to the end of current compressed data\r" if log_verbose
78
+ compressed_data = Zlib.deflate(uncompressed_data) if uncompressed_data
79
+
80
+ # reposition the file pointer to end of compressed data
81
+ packfile_io.seek(packfile_io.pos - (bytes_read - compressed_data.size))
82
+
64
83
  [compressed_data, uncompressed_data]
65
84
  end
66
85
 
@@ -92,7 +111,7 @@ module PackfileReader
92
111
  end
93
112
 
94
113
  return '000' if header_type.empty?
95
-
114
+
96
115
  header = "#{header_type} #{size}\0"
97
116
  store = "#{header}#{uncompressed_data}"
98
117
  Digest::SHA1.hexdigest(store)
@@ -104,4 +123,4 @@ module PackfileReader
104
123
  @id = id
105
124
  end
106
125
  end
107
- end
126
+ end
metadata CHANGED
@@ -1,11 +1,11 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: packfile_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robison WR Santos
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2020-11-30 00:00:00.000000000 Z
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 3.0.1
19
+ version: 3.1.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 3.0.1
26
+ version: 3.1.0
27
27
  description: A tool to parse git packfile when idx files are not present
28
28
  email: ''
29
29
  executables:
@@ -41,7 +41,7 @@ homepage: https://github.com/robisonsantos/packfile_reader
41
41
  licenses:
42
42
  - MIT
43
43
  metadata: {}
44
- post_install_message:
44
+ post_install_message:
45
45
  rdoc_options: []
46
46
  require_paths:
47
47
  - lib
@@ -56,8 +56,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
56
56
  - !ruby/object:Gem::Version
57
57
  version: '0'
58
58
  requirements: []
59
- rubygems_version: 3.1.2
60
- signing_key:
59
+ rubygems_version: 3.2.33
60
+ signing_key:
61
61
  specification_version: 4
62
62
  summary: Parses git packfiles without the help of idx companion
63
63
  test_files: []