packfile_reader 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea469cdf74d5f5c3d95fc87de089c9ffe622f159200003d423e5c8d00e273ad5
4
- data.tar.gz: aeea839368939c2b328c737c67acf987a94f67c1e3bfb5a6bd5a84c8afcf2d97
3
+ metadata.gz: 8eb4f8bc34379b5872964c517b718bbe1d5ee2488f02bc18983f3678be2b83da
4
+ data.tar.gz: 17effd8b76eca4b3bd7902b96b1a0c7494bdd977dbd11d9482ea31798cff5742
5
5
  SHA512:
6
- metadata.gz: e5b3980bd08e06c19e8ca9ab9d46104b1a8282cf2e54129c3f63ee204eb0d477ea8b43316c361a70bba3a788c4d2fbfc7c66ea1812aa32581255f181ac0e7492
7
- data.tar.gz: 95e3c21fa8c71e6bae77018fb18472055e0ceab34490103a683787e3f189615499eda5dadb850d28ad0c242400ca853f6acb1dfdaffb3445b3c705c17f395ade
6
+ metadata.gz: 66fda3747bc5d05ad5ae7ccd529831f5bf5491e4558fd1147bd562b4a7d9d157a9829d5fa5c5d9ae53dae36e66ecc8f4dec790f7eed7e35835cce44510fec0db
7
+ data.tar.gz: 7767f4c076cbfcafb99c4fd5219684eeba31b2081a9d86fcc4e9f8328159df86ee4950b497f8cb29148d7691803105a6e1e4e73f337df6800d9a1280b24073b7
data/README.md CHANGED
@@ -6,7 +6,7 @@ Git packs up several of "loose" objects into a single binary file called a “pa
6
6
 
7
7
  The `.idx` file contains offsets for all the objects in the `.pack` file, so it is easier to find the content you are looking for on the packfile.
8
8
 
9
- When we have both files, we can use `git verify-pack' command to read the content and metadata about the objects in the packfile, but sometimes we only have the `.pack` file, and in this case `git` is not really helpful.
9
+ When we have both files, we can use `git verify-pack` command to read the content and metadata about the objects in the packfile, but sometimes we only have the `.pack` file, and in this case `git` is not really helpful.
10
10
 
11
11
  ![packfile](packfile-format.png?raw=true "Packfile Format")
12
12
 
@@ -39,10 +39,10 @@ where [options] are:
39
39
  -i, --filter-by-ids=<s> Comma separated list of object ids to look for (default: any)
40
40
  -e, --expand-objects Whether to expand objects data
41
41
  -o, --output-dir=<s> Directory to store the expanded objects (default: .)
42
+ -w, --window-size=<i> Size of the window (in bytes) to use when looking for objects (default: 10000)
42
43
  -v, --verbose Log some debugging informaiton to stderr
43
44
  -r, --version Print version and exit
44
45
  -l, --help Show this message
45
-
46
46
  ```
47
47
 
48
48
  ## Example:
@@ -128,5 +128,14 @@ packfile_reader --no-headers -verbose pack.sample
128
128
  [2020-12-01 21:24:43 -0800] 5297f8f21ad868d9eb6a9c01ad09a9d186177047 OBJ_BLOB 10
129
129
  [2020-12-01 21:24:44 -0800] bf195faf9d23ce0615cdefd2b746a077ef82f03f OBJ_TREE 37
130
130
  ```
131
+
132
+ # Development
133
+
134
+ ## Testing
135
+
136
+ ```
137
+ rake test
138
+ ```
139
+
131
140
  # References
132
141
  - http://shafiul.github.io/gitbook/7_the_packfile.html
data/bin/packfile_reader CHANGED
@@ -4,7 +4,7 @@ require 'optimist'
4
4
  require 'packfile_reader'
5
5
 
6
6
  opts = Optimist::options do
7
- version "v0.0.1 (c) 2020 Robison WR Santos"
7
+ version "v0.0.2 (c) 2020 Robison WR Santos"
8
8
  banner <<~EOS
9
9
  This tool is used to parse and extract data from git packfiles without a .idx file.
10
10
  By default, the script will only report the object ids, their type and their deflated sizes.
@@ -21,6 +21,7 @@ opts = Optimist::options do
21
21
  opt :filter_by_ids, 'Comma separated list of object ids to look for', :default => 'any', :short => '-i', :type => String
22
22
  opt :expand_objects, 'Whether to expand objects data', :default => false, :short => '-e'
23
23
  opt :output_dir, 'Directory to store the expanded objects', :default => '.', :short => '-o', :type => String
24
+ opt :window_size, 'Size of the window (in bytes) to use when looking for objects', :default => 10_000, :short => '-w', :type => Integer
24
25
  opt :verbose, 'Log some debugging informaiton to stderr', :default => false, :short => '-v'
25
26
  end
26
27
 
@@ -51,7 +52,7 @@ File.open(packfile, 'rb') do |f|
51
52
  entries_processed = 0
52
53
  limit = objects_to_find == :any ? header.n_entries : objects_to_find.size
53
54
  (0...limit).each do
54
- entry = PackfileReader::PackfileEntry.next_entry(f, objects_to_find, opts[:verbose]) do |c,u,id|
55
+ entry = PackfileReader::PackfileEntry.next_entry(f, objects_to_find, opts[:verbose], opts[:window_size]) do |c,u,id|
55
56
  if opts[:expand_objects]
56
57
  dir = opts[:output_dir]
57
58
  File.open(File.join(dir, "#{id}.txt"), 'w') {|o| o.write u}
@@ -16,8 +16,9 @@ module PackfileReader
16
16
  ]
17
17
 
18
18
  # Accepts a block that will receive the compressed data, uncompressed data and
19
- # the computed object id
20
- def self.next_entry(packfile_io, objects_to_find=:any, log_verbose=false)
19
+ # the computed object id. Window size is the amount of bytes to read at once
20
+ # while searching for the compressed data
21
+ def self.next_entry(packfile_io, objects_to_find=:any, log_verbose=false, window_size=10_000)
21
22
  raise 'Object id must be a valid sha1' unless objects_to_find == :any || objects_to_find.all? {|id| /^[0-9a-f]{40}$/.match? id }
22
23
 
23
24
  loop do
@@ -37,7 +38,7 @@ module PackfileReader
37
38
  offset += hunk.offset_size
38
39
  end
39
40
 
40
- compressed_data, uncompressed_data = find_data(packfile_io, log_verbose)
41
+ compressed_data, uncompressed_data = find_data(packfile_io, log_verbose, window_size)
41
42
  object_id = compute_id(type, size, uncompressed_data)
42
43
 
43
44
  type = "#{type} [CORRUPTED] " if uncompressed_data.nil?
@@ -50,28 +51,35 @@ module PackfileReader
50
51
  end
51
52
 
52
53
  private
53
- def self.find_data(packfile_io, log_verbose)
54
+ def self.find_data(packfile_io, log_verbose, window_size)
54
55
  data_header = find_zlib_data_header(packfile_io)
55
56
 
56
57
  # since we don't have the index file that accompanies pack files
57
58
  # we need to use brute force to find where the compressed data ends
58
- # to do that, we go byte by byte and try to deflate the data, when
59
+ # to do that, we go <window_size> bytes by <window_size> bytes and try to deflate the data, when
59
60
  # that succeedes, we know we go it all
60
61
  compressed_data = data_header
61
- compressed_data += packfile_io.read(1)
62
+ compressed_data += packfile_io.read(window_size)
62
63
 
63
- bytes_read = 1
64
+ bytes_read = compressed_data.size
64
65
  begin
65
66
  uncompressed_data = Zlib.inflate(compressed_data)
66
67
  rescue Zlib::BufError
67
- compressed_data += packfile_io.read(1)
68
- bytes_read += 1
68
+ compressed_data += packfile_io.read(window_size)
69
+ bytes_read = compressed_data.size
69
70
  $stderr.print " .... retrying on data gathering [#{bytes_read}] bytes read\r" if log_verbose
70
71
  retry
71
72
  rescue Zlib::DataError
72
73
  uncompressed_data = nil
73
74
  end
74
75
 
76
+ $stderr.puts " .... data read [#{bytes_read}] bytes" if log_verbose
77
+ $stderr.print " .... repositioning file pointer to the end of current compressed data\r" if log_verbose
78
+ compressed_data = Zlib.deflate(uncompressed_data) if uncompressed_data
79
+
80
+ # reposition the file pointer to end of compressed data
81
+ packfile_io.seek(packfile_io.pos - (bytes_read - compressed_data.size))
82
+
75
83
  [compressed_data, uncompressed_data]
76
84
  end
77
85
 
metadata CHANGED
@@ -1,11 +1,11 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: packfile_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robison WR Santos
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2020-11-30 00:00:00.000000000 Z
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 3.0.1
19
+ version: 3.1.0
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 3.0.1
26
+ version: 3.1.0
27
27
  description: A tool to parse git packfile when idx files are not present
28
28
  email: ''
29
29
  executables:
@@ -41,7 +41,7 @@ homepage: https://github.com/robisonsantos/packfile_reader
41
41
  licenses:
42
42
  - MIT
43
43
  metadata: {}
44
- post_install_message:
44
+ post_install_message:
45
45
  rdoc_options: []
46
46
  require_paths:
47
47
  - lib
@@ -56,8 +56,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
56
56
  - !ruby/object:Gem::Version
57
57
  version: '0'
58
58
  requirements: []
59
- rubygems_version: 3.0.3
60
- signing_key:
59
+ rubygems_version: 3.2.33
60
+ signing_key:
61
61
  specification_version: 4
62
62
  summary: Parses git packfiles without the help of idx companion
63
63
  test_files: []