packfile_reader 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -2
- data/bin/packfile_reader +3 -2
- data/lib/packfile_reader/packfile_entry.rb +17 -9
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8eb4f8bc34379b5872964c517b718bbe1d5ee2488f02bc18983f3678be2b83da
|
4
|
+
data.tar.gz: 17effd8b76eca4b3bd7902b96b1a0c7494bdd977dbd11d9482ea31798cff5742
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66fda3747bc5d05ad5ae7ccd529831f5bf5491e4558fd1147bd562b4a7d9d157a9829d5fa5c5d9ae53dae36e66ecc8f4dec790f7eed7e35835cce44510fec0db
|
7
|
+
data.tar.gz: 7767f4c076cbfcafb99c4fd5219684eeba31b2081a9d86fcc4e9f8328159df86ee4950b497f8cb29148d7691803105a6e1e4e73f337df6800d9a1280b24073b7
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ Git packs up several of "loose" objects into a single binary file called a “pa
|
|
6
6
|
|
7
7
|
The `.idx` file contains offsets for all the objects in the `.pack` file, so it is easier to find the content you are looking for on the packfile.
|
8
8
|
|
9
|
-
When we have both files, we can use `git verify-pack
|
9
|
+
When we have both files, we can use `git verify-pack` command to read the content and metadata about the objects in the packfile, but sometimes we only have the `.pack` file, and in this case `git` is not really helpful.
|
10
10
|
|
11
11
|

|
12
12
|
|
@@ -39,10 +39,10 @@ where [options] are:
|
|
39
39
|
-i, --filter-by-ids=<s> Comma separated list of object ids to look for (default: any)
|
40
40
|
-e, --expand-objects Whether to expand objects data
|
41
41
|
-o, --output-dir=<s> Directory to store the expanded objects (default: .)
|
42
|
+
-w, --window-size=<i> Size of the window (in bytes) to use when looking for objects (default: 10000)
|
42
43
|
-v, --verbose Log some debugging informaiton to stderr
|
43
44
|
-r, --version Print version and exit
|
44
45
|
-l, --help Show this message
|
45
|
-
|
46
46
|
```
|
47
47
|
|
48
48
|
## Example:
|
@@ -128,5 +128,14 @@ packfile_reader --no-headers -verbose pack.sample
|
|
128
128
|
[2020-12-01 21:24:43 -0800] 5297f8f21ad868d9eb6a9c01ad09a9d186177047 OBJ_BLOB 10
|
129
129
|
[2020-12-01 21:24:44 -0800] bf195faf9d23ce0615cdefd2b746a077ef82f03f OBJ_TREE 37
|
130
130
|
```
|
131
|
+
|
132
|
+
# Development
|
133
|
+
|
134
|
+
## Testing
|
135
|
+
|
136
|
+
```
|
137
|
+
rake test
|
138
|
+
```
|
139
|
+
|
131
140
|
# References
|
132
141
|
- http://shafiul.github.io/gitbook/7_the_packfile.html
|
data/bin/packfile_reader
CHANGED
@@ -4,7 +4,7 @@ require 'optimist'
|
|
4
4
|
require 'packfile_reader'
|
5
5
|
|
6
6
|
opts = Optimist::options do
|
7
|
-
version "v0.0.
|
7
|
+
version "v0.0.2 (c) 2020 Robison WR Santos"
|
8
8
|
banner <<~EOS
|
9
9
|
This tool is used to parse and extract data from git packfiles without a .idx file.
|
10
10
|
By default, the script will only report the object ids, their type and their deflated sizes.
|
@@ -21,6 +21,7 @@ opts = Optimist::options do
|
|
21
21
|
opt :filter_by_ids, 'Comma separated list of object ids to look for', :default => 'any', :short => '-i', :type => String
|
22
22
|
opt :expand_objects, 'Whether to expand objects data', :default => false, :short => '-e'
|
23
23
|
opt :output_dir, 'Directory to store the expanded objects', :default => '.', :short => '-o', :type => String
|
24
|
+
opt :window_size, 'Size of the window (in bytes) to use when looking for objects', :default => 10_000, :short => '-w', :type => Integer
|
24
25
|
opt :verbose, 'Log some debugging informaiton to stderr', :default => false, :short => '-v'
|
25
26
|
end
|
26
27
|
|
@@ -51,7 +52,7 @@ File.open(packfile, 'rb') do |f|
|
|
51
52
|
entries_processed = 0
|
52
53
|
limit = objects_to_find == :any ? header.n_entries : objects_to_find.size
|
53
54
|
(0...limit).each do
|
54
|
-
entry = PackfileReader::PackfileEntry.next_entry(f, objects_to_find, opts[:verbose]) do |c,u,id|
|
55
|
+
entry = PackfileReader::PackfileEntry.next_entry(f, objects_to_find, opts[:verbose], opts[:window_size]) do |c,u,id|
|
55
56
|
if opts[:expand_objects]
|
56
57
|
dir = opts[:output_dir]
|
57
58
|
File.open(File.join(dir, "#{id}.txt"), 'w') {|o| o.write u}
|
@@ -16,8 +16,9 @@ module PackfileReader
|
|
16
16
|
]
|
17
17
|
|
18
18
|
# Accepts a block that will receive the compressed data, uncompressed data and
|
19
|
-
# the computed object id
|
20
|
-
|
19
|
+
# the computed object id. Window size is the amount of bytes to read at once
|
20
|
+
# while searching for the compressed data
|
21
|
+
def self.next_entry(packfile_io, objects_to_find=:any, log_verbose=false, window_size=10_000)
|
21
22
|
raise 'Object id must be a valid sha1' unless objects_to_find == :any || objects_to_find.all? {|id| /^[0-9a-f]{40}$/.match? id }
|
22
23
|
|
23
24
|
loop do
|
@@ -37,7 +38,7 @@ module PackfileReader
|
|
37
38
|
offset += hunk.offset_size
|
38
39
|
end
|
39
40
|
|
40
|
-
compressed_data, uncompressed_data = find_data(packfile_io, log_verbose)
|
41
|
+
compressed_data, uncompressed_data = find_data(packfile_io, log_verbose, window_size)
|
41
42
|
object_id = compute_id(type, size, uncompressed_data)
|
42
43
|
|
43
44
|
type = "#{type} [CORRUPTED] " if uncompressed_data.nil?
|
@@ -50,28 +51,35 @@ module PackfileReader
|
|
50
51
|
end
|
51
52
|
|
52
53
|
private
|
53
|
-
def self.find_data(packfile_io, log_verbose)
|
54
|
+
def self.find_data(packfile_io, log_verbose, window_size)
|
54
55
|
data_header = find_zlib_data_header(packfile_io)
|
55
56
|
|
56
57
|
# since we don't have the index file that accompanies pack files
|
57
58
|
# we need to use brute force to find where the compressed data ends
|
58
|
-
# to do that, we go
|
59
|
+
# to do that, we go <window_size> bytes by <window_size> bytes and try to deflate the data, when
|
59
60
|
# that succeedes, we know we go it all
|
60
61
|
compressed_data = data_header
|
61
|
-
compressed_data += packfile_io.read(
|
62
|
+
compressed_data += packfile_io.read(window_size)
|
62
63
|
|
63
|
-
bytes_read =
|
64
|
+
bytes_read = compressed_data.size
|
64
65
|
begin
|
65
66
|
uncompressed_data = Zlib.inflate(compressed_data)
|
66
67
|
rescue Zlib::BufError
|
67
|
-
compressed_data += packfile_io.read(
|
68
|
-
bytes_read
|
68
|
+
compressed_data += packfile_io.read(window_size)
|
69
|
+
bytes_read = compressed_data.size
|
69
70
|
$stderr.print " .... retrying on data gathering [#{bytes_read}] bytes read\r" if log_verbose
|
70
71
|
retry
|
71
72
|
rescue Zlib::DataError
|
72
73
|
uncompressed_data = nil
|
73
74
|
end
|
74
75
|
|
76
|
+
$stderr.puts " .... data read [#{bytes_read}] bytes" if log_verbose
|
77
|
+
$stderr.print " .... repositioning file pointer to the end of current compressed data\r" if log_verbose
|
78
|
+
compressed_data = Zlib.deflate(uncompressed_data) if uncompressed_data
|
79
|
+
|
80
|
+
# reposition the file pointer to end of compressed data
|
81
|
+
packfile_io.seek(packfile_io.pos - (bytes_read - compressed_data.size))
|
82
|
+
|
75
83
|
[compressed_data, uncompressed_data]
|
76
84
|
end
|
77
85
|
|
metadata
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: packfile_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robison WR Santos
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
date: 2020-11-30 00:00:00.000000000 Z
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 3.0
|
19
|
+
version: 3.1.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 3.0
|
26
|
+
version: 3.1.0
|
27
27
|
description: A tool to parse git packfile when idx files are not present
|
28
28
|
email: ''
|
29
29
|
executables:
|
@@ -41,7 +41,7 @@ homepage: https://github.com/robisonsantos/packfile_reader
|
|
41
41
|
licenses:
|
42
42
|
- MIT
|
43
43
|
metadata: {}
|
44
|
-
post_install_message:
|
44
|
+
post_install_message:
|
45
45
|
rdoc_options: []
|
46
46
|
require_paths:
|
47
47
|
- lib
|
@@ -56,8 +56,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
56
56
|
- !ruby/object:Gem::Version
|
57
57
|
version: '0'
|
58
58
|
requirements: []
|
59
|
-
rubygems_version: 3.
|
60
|
-
signing_key:
|
59
|
+
rubygems_version: 3.2.33
|
60
|
+
signing_key:
|
61
61
|
specification_version: 4
|
62
62
|
summary: Parses git packfiles without the help of idx companion
|
63
63
|
test_files: []
|