packfile_reader 0.0.1 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +26 -3
- data/bin/packfile_reader +11 -8
- data/lib/packfile_reader/packfile_entry.rb +28 -9
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8eb4f8bc34379b5872964c517b718bbe1d5ee2488f02bc18983f3678be2b83da
|
4
|
+
data.tar.gz: 17effd8b76eca4b3bd7902b96b1a0c7494bdd977dbd11d9482ea31798cff5742
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66fda3747bc5d05ad5ae7ccd529831f5bf5491e4558fd1147bd562b4a7d9d157a9829d5fa5c5d9ae53dae36e66ecc8f4dec790f7eed7e35835cce44510fec0db
|
7
|
+
data.tar.gz: 7767f4c076cbfcafb99c4fd5219684eeba31b2081a9d86fcc4e9f8328159df86ee4950b497f8cb29148d7691803105a6e1e4e73f337df6800d9a1280b24073b7
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ Git packs up several of "loose" objects into a single binary file called a “pa
|
|
6
6
|
|
7
7
|
The `.idx` file contains offsets for all the objects in the `.pack` file, so it is easier to find the content you are looking for on the packfile.
|
8
8
|
|
9
|
-
When we have both files, we can use `git verify-pack
|
9
|
+
When we have both files, we can use `git verify-pack` command to read the content and metadata about the objects in the packfile, but sometimes we only have the `.pack` file, and in this case `git` is not really helpful.
|
10
10
|
|
11
11
|

|
12
12
|
|
@@ -39,9 +39,10 @@ where [options] are:
|
|
39
39
|
-i, --filter-by-ids=<s> Comma separated list of object ids to look for (default: any)
|
40
40
|
-e, --expand-objects Whether to expand objects data
|
41
41
|
-o, --output-dir=<s> Directory to store the expanded objects (default: .)
|
42
|
-
-
|
42
|
+
-w, --window-size=<i> Size of the window (in bytes) to use when looking for objects (default: 10000)
|
43
|
+
-v, --verbose Log some debugging informaiton to stderr
|
44
|
+
-r, --version Print version and exit
|
43
45
|
-l, --help Show this message
|
44
|
-
|
45
46
|
```
|
46
47
|
|
47
48
|
## Example:
|
@@ -114,5 +115,27 @@ $ cat /tmp/5297f8f21ad868d9eb6a9c01ad09a9d186177047.txt
|
|
114
115
|
# test-git%
|
115
116
|
```
|
116
117
|
|
118
|
+
### Debugging information
|
119
|
+
|
120
|
+
Passing `--verbose` to the command will add some debugging information to the output as well as the timestamp when the entry got processed at the beginning of the entry line.
|
121
|
+
|
122
|
+
```
|
123
|
+
packfile_reader --no-headers -verbose pack.sample
|
124
|
+
```
|
125
|
+
|
126
|
+
```
|
127
|
+
[2020-12-01 21:24:43 -0800] 96438dd1e26e6963fa65be0012e8f6e84209bc5d OBJ_COMMIT 653
|
128
|
+
[2020-12-01 21:24:43 -0800] 5297f8f21ad868d9eb6a9c01ad09a9d186177047 OBJ_BLOB 10
|
129
|
+
[2020-12-01 21:24:44 -0800] bf195faf9d23ce0615cdefd2b746a077ef82f03f OBJ_TREE 37
|
130
|
+
```
|
131
|
+
|
132
|
+
# Development
|
133
|
+
|
134
|
+
## Testing
|
135
|
+
|
136
|
+
```
|
137
|
+
rake test
|
138
|
+
```
|
139
|
+
|
117
140
|
# References
|
118
141
|
- http://shafiul.github.io/gitbook/7_the_packfile.html
|
data/bin/packfile_reader
CHANGED
@@ -4,27 +4,29 @@ require 'optimist'
|
|
4
4
|
require 'packfile_reader'
|
5
5
|
|
6
6
|
opts = Optimist::options do
|
7
|
-
version "v0.0.
|
7
|
+
version "v0.0.2 (c) 2020 Robison WR Santos"
|
8
8
|
banner <<~EOS
|
9
9
|
This tool is used to parse and extract data from git packfiles without a .idx file.
|
10
10
|
By default, the script will only report the object ids, their type and their deflated sizes.
|
11
11
|
You can also make the script expand the content of the objects in the local directory or a directory
|
12
12
|
of your choice.
|
13
|
-
|
13
|
+
|
14
14
|
Usage:
|
15
15
|
packfile_reader [options] <packfile>
|
16
16
|
where [options] are:
|
17
17
|
EOS
|
18
|
-
|
18
|
+
|
19
19
|
opt :headers_only, 'Display only the headers of the packfile'
|
20
20
|
opt :no_headers, 'Skip displaying the headers of the packfile'
|
21
21
|
opt :filter_by_ids, 'Comma separated list of object ids to look for', :default => 'any', :short => '-i', :type => String
|
22
22
|
opt :expand_objects, 'Whether to expand objects data', :default => false, :short => '-e'
|
23
23
|
opt :output_dir, 'Directory to store the expanded objects', :default => '.', :short => '-o', :type => String
|
24
|
+
opt :window_size, 'Size of the window (in bytes) to use when looking for objects', :default => 10_000, :short => '-w', :type => Integer
|
25
|
+
opt :verbose, 'Log some debugging informaiton to stderr', :default => false, :short => '-v'
|
24
26
|
end
|
25
|
-
|
27
|
+
|
26
28
|
(puts "You must inform a single packfile, found #{ARGV.size}"; exit 1) if ARGV.size > 1 or ARGV.empty?
|
27
|
-
|
29
|
+
|
28
30
|
packfile = ARGV.first
|
29
31
|
(puts 'Packfile not found'; exit 2) unless File.exist?(packfile)
|
30
32
|
|
@@ -50,13 +52,14 @@ File.open(packfile, 'rb') do |f|
|
|
50
52
|
entries_processed = 0
|
51
53
|
limit = objects_to_find == :any ? header.n_entries : objects_to_find.size
|
52
54
|
(0...limit).each do
|
53
|
-
entry = PackfileReader::PackfileEntry.next_entry(f, objects_to_find) do |c,u,id|
|
55
|
+
entry = PackfileReader::PackfileEntry.next_entry(f, objects_to_find, opts[:verbose], opts[:window_size]) do |c,u,id|
|
54
56
|
if opts[:expand_objects]
|
55
57
|
dir = opts[:output_dir]
|
56
58
|
File.open(File.join(dir, "#{id}.txt"), 'w') {|o| o.write u}
|
57
59
|
end
|
58
60
|
end
|
59
61
|
|
60
|
-
|
62
|
+
timestamp = opts[:verbose] ? "[#{Time.now}] " : ''
|
63
|
+
puts "\u001b[0K#{timestamp}#{entry.id}\t#{entry.type}\t#{entry.size}"
|
61
64
|
end
|
62
|
-
end
|
65
|
+
end
|
@@ -16,8 +16,11 @@ module PackfileReader
|
|
16
16
|
]
|
17
17
|
|
18
18
|
# Accepts a block that will receive the compressed data, uncompressed data and
|
19
|
-
# the computed object id
|
20
|
-
|
19
|
+
# the computed object id. Window size is the amount of bytes to read at once
|
20
|
+
# while searching for the compressed data
|
21
|
+
def self.next_entry(packfile_io, objects_to_find=:any, log_verbose=false, window_size=10_000)
|
22
|
+
raise 'Object id must be a valid sha1' unless objects_to_find == :any || objects_to_find.all? {|id| /^[0-9a-f]{40}$/.match? id }
|
23
|
+
|
21
24
|
loop do
|
22
25
|
return nil if packfile_io.eof?
|
23
26
|
|
@@ -27,15 +30,19 @@ module PackfileReader
|
|
27
30
|
size = hunk.size
|
28
31
|
offset = hunk.offset_size
|
29
32
|
|
33
|
+
# Clean the current line before printing the message
|
34
|
+
$stderr.puts "\u001b[0K>>>> Processing new entry [#{type}]" if log_verbose
|
30
35
|
while hunk.continuation?
|
31
36
|
hunk = PackfileReader::Hunk.new_without_type(packfile_io)
|
32
37
|
size = (hunk.size << offset) | size # Data size is a combination of all hunk sizes
|
33
38
|
offset += hunk.offset_size
|
34
39
|
end
|
35
40
|
|
36
|
-
compressed_data, uncompressed_data = find_data(packfile_io)
|
41
|
+
compressed_data, uncompressed_data = find_data(packfile_io, log_verbose, window_size)
|
37
42
|
object_id = compute_id(type, size, uncompressed_data)
|
38
43
|
|
44
|
+
type = "#{type} [CORRUPTED] " if uncompressed_data.nil?
|
45
|
+
|
39
46
|
if objects_to_find == :any || objects_to_find.member?(object_id)
|
40
47
|
yield compressed_data, uncompressed_data, object_id if block_given?
|
41
48
|
return PackfileEntry.new(type, size, object_id)
|
@@ -44,23 +51,35 @@ module PackfileReader
|
|
44
51
|
end
|
45
52
|
|
46
53
|
private
|
47
|
-
def self.find_data(packfile_io)
|
54
|
+
def self.find_data(packfile_io, log_verbose, window_size)
|
48
55
|
data_header = find_zlib_data_header(packfile_io)
|
49
56
|
|
50
57
|
# since we don't have the index file that accompanies pack files
|
51
58
|
# we need to use brute force to find where the compressed data ends
|
52
|
-
# to do that, we go
|
59
|
+
# to do that, we go <window_size> bytes by <window_size> bytes and try to deflate the data, when
|
53
60
|
# that succeedes, we know we go it all
|
54
61
|
compressed_data = data_header
|
55
|
-
compressed_data += packfile_io.read(
|
62
|
+
compressed_data += packfile_io.read(window_size)
|
56
63
|
|
64
|
+
bytes_read = compressed_data.size
|
57
65
|
begin
|
58
66
|
uncompressed_data = Zlib.inflate(compressed_data)
|
59
67
|
rescue Zlib::BufError
|
60
|
-
compressed_data += packfile_io.read(
|
68
|
+
compressed_data += packfile_io.read(window_size)
|
69
|
+
bytes_read = compressed_data.size
|
70
|
+
$stderr.print " .... retrying on data gathering [#{bytes_read}] bytes read\r" if log_verbose
|
61
71
|
retry
|
72
|
+
rescue Zlib::DataError
|
73
|
+
uncompressed_data = nil
|
62
74
|
end
|
63
75
|
|
76
|
+
$stderr.puts " .... data read [#{bytes_read}] bytes" if log_verbose
|
77
|
+
$stderr.print " .... repositioning file pointer to the end of current compressed data\r" if log_verbose
|
78
|
+
compressed_data = Zlib.deflate(uncompressed_data) if uncompressed_data
|
79
|
+
|
80
|
+
# reposition the file pointer to end of compressed data
|
81
|
+
packfile_io.seek(packfile_io.pos - (bytes_read - compressed_data.size))
|
82
|
+
|
64
83
|
[compressed_data, uncompressed_data]
|
65
84
|
end
|
66
85
|
|
@@ -92,7 +111,7 @@ module PackfileReader
|
|
92
111
|
end
|
93
112
|
|
94
113
|
return '000' if header_type.empty?
|
95
|
-
|
114
|
+
|
96
115
|
header = "#{header_type} #{size}\0"
|
97
116
|
store = "#{header}#{uncompressed_data}"
|
98
117
|
Digest::SHA1.hexdigest(store)
|
@@ -104,4 +123,4 @@ module PackfileReader
|
|
104
123
|
@id = id
|
105
124
|
end
|
106
125
|
end
|
107
|
-
end
|
126
|
+
end
|
metadata
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: packfile_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robison WR Santos
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2020-11-30 00:00:00.000000000 Z
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 3.0
|
19
|
+
version: 3.1.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 3.0
|
26
|
+
version: 3.1.0
|
27
27
|
description: A tool to parse git packfile when idx files are not present
|
28
28
|
email: ''
|
29
29
|
executables:
|
@@ -41,7 +41,7 @@ homepage: https://github.com/robisonsantos/packfile_reader
|
|
41
41
|
licenses:
|
42
42
|
- MIT
|
43
43
|
metadata: {}
|
44
|
-
post_install_message:
|
44
|
+
post_install_message:
|
45
45
|
rdoc_options: []
|
46
46
|
require_paths:
|
47
47
|
- lib
|
@@ -56,8 +56,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
56
56
|
- !ruby/object:Gem::Version
|
57
57
|
version: '0'
|
58
58
|
requirements: []
|
59
|
-
rubygems_version: 3.
|
60
|
-
signing_key:
|
59
|
+
rubygems_version: 3.2.33
|
60
|
+
signing_key:
|
61
61
|
specification_version: 4
|
62
62
|
summary: Parses git packfiles without the help of idx companion
|
63
63
|
test_files: []
|