snappy-ruby 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +117 -0
- data/ext/snappy/extconf.rb +20 -70
- data/ext/snappy/snappy-src/snappy-stubs-public.h +5 -2
- data/ext/snappy/snappy-src/snappy_benchmark.cc +398 -0
- data/ext/snappy/snappy-src/snappy_compress_fuzzer.cc +64 -0
- data/ext/snappy/snappy-src/snappy_uncompress_fuzzer.cc +58 -0
- data/ext/snappy/snappy-src/testdata/alice29.txt +3609 -0
- data/ext/snappy/snappy-src/testdata/asyoulik.txt +4122 -0
- data/ext/snappy/snappy-src/testdata/baddata1.snappy +0 -0
- data/ext/snappy/snappy-src/testdata/baddata2.snappy +0 -0
- data/ext/snappy/snappy-src/testdata/baddata3.snappy +0 -0
- data/ext/snappy/snappy-src/testdata/fireworks.jpeg +0 -0
- data/ext/snappy/snappy-src/testdata/geo.protodata +0 -0
- data/ext/snappy/snappy-src/testdata/html +1 -0
- data/ext/snappy/snappy-src/testdata/html_x_4 +1 -0
- data/ext/snappy/snappy-src/testdata/kppkn.gtb +0 -0
- data/ext/snappy/snappy-src/testdata/lcet10.txt +7519 -0
- data/ext/snappy/snappy-src/testdata/paper-100k.pdf +600 -2
- data/ext/snappy/snappy-src/testdata/plrabn12.txt +10699 -0
- data/ext/snappy/snappy-src/testdata/urls.10K +10000 -0
- data/ext/snappy/snappy_ext.cpp +218 -0
- data/lib/snappy/snappy.so +0 -0
- data/lib/snappy/version.rb +3 -0
- data/lib/snappy.rb +92 -2
- metadata +35 -31
- data/LICENSE +0 -21
- data/README.md +0 -77
- data/Rakefile +0 -12
- data/ext/snappy/snappy-src/config.h +0 -78
- data/ext/snappy/snappy.c +0 -282
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 814ece9efc09833256f299bcef7a97d36524ab0cbdd1c4cde3f5e38960ac79eb
|
|
4
|
+
data.tar.gz: 42d07303e3ba0b508bb4c15ef41bf7408bbd3ea3fe662700a9bbdf61dee2bf0b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9498f0ccf790e7ccdaeaec8e75f1fadf6d21f679f1bf06b1fb1f26e20e9b27b0ab17886d24a7cc1b6f0812e208f816bb46566002df41282337cbffa7c283d8ef
|
|
7
|
+
data.tar.gz: 074c38a93b2d7f99c5353dabffaeba4e419aeb8f6eb529243f73213564135730b2adf7e4cbfefa7a22a4040193692f02f7e9e8ab9165b21569a1a5f22df3d717
|
data/CLAUDE.md
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
snappy-ruby is a Ruby gem that provides bindings to Google's Snappy compression library. The gem bundles the Snappy C++ source code and provides a complete Ruby interface for all external functions defined in snappy.h.
|
|
8
|
+
|
|
9
|
+
## Repository Structure
|
|
10
|
+
|
|
11
|
+
- `ext/snappy/` - Native C extension code that wraps the Snappy library
|
|
12
|
+
- `snappy-src/` - Bundled Google Snappy C++ source code
|
|
13
|
+
- `extconf.rb` - Configuration script for building the native extension
|
|
14
|
+
- `lib/snappy/` - Ruby code and version information
|
|
15
|
+
- `test/` - Test suite using Minitest or RSpec
|
|
16
|
+
- `snappy-ruby.gemspec` - Gem specification file
|
|
17
|
+
|
|
18
|
+
## Development Commands
|
|
19
|
+
|
|
20
|
+
### Building the Extension
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
# Build the native extension
|
|
24
|
+
cd ext/snappy
|
|
25
|
+
ruby extconf.rb
|
|
26
|
+
make
|
|
27
|
+
cd ../..
|
|
28
|
+
|
|
29
|
+
# Or use rake to build
|
|
30
|
+
rake compile
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Running Tests
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# Run the full test suite
|
|
37
|
+
rake test
|
|
38
|
+
|
|
39
|
+
# Run a single test file
|
|
40
|
+
ruby -Ilib:test test/test_snappy.rb
|
|
41
|
+
|
|
42
|
+
# Run a specific test method
|
|
43
|
+
ruby -Ilib:test test/test_snappy.rb -n test_compress
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Building the Gem
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
# Build the gem package
|
|
50
|
+
gem build snappy-ruby.gemspec
|
|
51
|
+
|
|
52
|
+
# Install the gem locally for testing
|
|
53
|
+
gem install ./snappy-ruby-1.0.1.gem
|
|
54
|
+
|
|
55
|
+
# Clean build artifacts
|
|
56
|
+
rake clean
|
|
57
|
+
rake clobber
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Git Operations
|
|
61
|
+
|
|
62
|
+
Git is configured with:
|
|
63
|
+
- User: John Greninger
|
|
64
|
+
- Email: jgreninger@hotmail.com
|
|
65
|
+
|
|
66
|
+
## Architecture
|
|
67
|
+
|
|
68
|
+
### Native Extension Layer
|
|
69
|
+
|
|
70
|
+
The C extension (`ext/snappy/snappy_ext.c`) provides Ruby method bindings for all Snappy operations:
|
|
71
|
+
- Compression: `Snappy.compress(data)`
|
|
72
|
+
- Decompression: `Snappy.uncompress(data)` / `Snappy.decompress(data)`
|
|
73
|
+
- Raw compression: `Snappy.raw_compress(data)`
|
|
74
|
+
- Raw decompression: `Snappy.raw_uncompress(data)`
|
|
75
|
+
- Utilities: `Snappy.max_compressed_length(source_length)`, `Snappy.uncompressed_length(compressed)`
|
|
76
|
+
- Validation: `Snappy.valid?(compressed_data)`
|
|
77
|
+
|
|
78
|
+
All snappy.h external interfaces are mapped to corresponding Ruby methods.
|
|
79
|
+
|
|
80
|
+
### Ruby Layer
|
|
81
|
+
|
|
82
|
+
The Ruby layer (`lib/snappy.rb`) provides:
|
|
83
|
+
- Module structure and organization
|
|
84
|
+
- Version information
|
|
85
|
+
- High-level convenience methods
|
|
86
|
+
- Error handling and type validation
|
|
87
|
+
|
|
88
|
+
### Bundled Source Code
|
|
89
|
+
|
|
90
|
+
The Snappy C++ source code is bundled in `ext/snappy/snappy-src/`. During gem installation, the extension is compiled against this bundled source, ensuring the gem works without requiring system-level Snappy installation.
|
|
91
|
+
|
|
92
|
+
## Testing Strategy
|
|
93
|
+
|
|
94
|
+
Tests are organized to cover:
|
|
95
|
+
1. Basic compression/decompression operations
|
|
96
|
+
2. All snappy.h interface methods
|
|
97
|
+
3. Edge cases (empty data, invalid data, large data)
|
|
98
|
+
4. Error handling and validation
|
|
99
|
+
5. Round-trip compression/decompression verification
|
|
100
|
+
6. Raw compression API
|
|
101
|
+
7. Utility functions (length calculations, validation)
|
|
102
|
+
|
|
103
|
+
## Gem Publishing Configuration
|
|
104
|
+
|
|
105
|
+
The gem is configured for RubyGems.org publication:
|
|
106
|
+
- Version: 1.0.1
|
|
107
|
+
- Name: snappy-ruby
|
|
108
|
+
- Required Ruby version and dependencies specified in gemspec
|
|
109
|
+
- Native extension properly configured for compilation during installation
|
|
110
|
+
|
|
111
|
+
## Important Implementation Details
|
|
112
|
+
|
|
113
|
+
- The native extension must properly handle memory management for C++ objects
|
|
114
|
+
- Ruby strings need proper encoding handling (BINARY/ASCII-8BIT for compressed data)
|
|
115
|
+
- Error handling must convert C++ exceptions to Ruby exceptions
|
|
116
|
+
- Buffer size calculations must use Snappy's provided functions to avoid overflows
|
|
117
|
+
- The gemspec must include all necessary files (lib, ext, bundled source) in the package
|
data/ext/snappy/extconf.rb
CHANGED
|
@@ -1,83 +1,33 @@
|
|
|
1
|
-
require
|
|
1
|
+
require 'mkmf'
|
|
2
2
|
|
|
3
|
-
#
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
abort <<-ERROR
|
|
8
|
-
|
|
9
|
-
ERROR: Snappy source directory not found at #{SNAPPY_DIR}
|
|
10
|
-
|
|
11
|
-
The Snappy library source should be bundled with this gem.
|
|
12
|
-
Please ensure the gem was installed correctly.
|
|
13
|
-
|
|
14
|
-
ERROR
|
|
15
|
-
end
|
|
3
|
+
# Set C++ compiler
|
|
4
|
+
CONFIG['CC'] = 'g++'
|
|
5
|
+
CONFIG['CXX'] = 'g++'
|
|
6
|
+
CONFIG['LDSHARED'] = 'g++ -shared'
|
|
16
7
|
|
|
17
8
|
# Add Snappy source directory to include path
|
|
18
|
-
$INCFLAGS << " -I
|
|
19
|
-
|
|
20
|
-
# Add C++11 support (required by Snappy)
|
|
21
|
-
$CXXFLAGS << " -std=c++11 -fno-exceptions -fno-rtti"
|
|
22
|
-
|
|
23
|
-
# Link against C++ standard library
|
|
24
|
-
$LIBS << " -lstdc++"
|
|
25
|
-
|
|
26
|
-
# Define HAVE_CONFIG_H to use our config.h
|
|
27
|
-
$CPPFLAGS << " -DHAVE_CONFIG_H"
|
|
9
|
+
$INCFLAGS << " -I$(srcdir)/snappy-src"
|
|
28
10
|
|
|
29
|
-
#
|
|
30
|
-
$CXXFLAGS << " -
|
|
11
|
+
# Add C++11 standard flag
|
|
12
|
+
$CXXFLAGS << " -std=c++11"
|
|
31
13
|
|
|
32
|
-
#
|
|
33
|
-
|
|
34
|
-
# The Snappy library (snappy-lib.cc) will compile to snappy-lib.o
|
|
35
|
-
snappy_lib_link = File.join(SNAPPY_DIR, "snappy-lib.cc")
|
|
36
|
-
snappy_lib_src = File.join(SNAPPY_DIR, "snappy.cc")
|
|
14
|
+
# Add optimization flags
|
|
15
|
+
$CXXFLAGS << " -O2"
|
|
37
16
|
|
|
38
|
-
#
|
|
39
|
-
|
|
40
|
-
File.symlink("snappy.cc", snappy_lib_link)
|
|
17
|
+
# Add VPATH for source files
|
|
18
|
+
$VPATH << "$(srcdir)/snappy-src"
|
|
41
19
|
|
|
42
|
-
#
|
|
20
|
+
# Source files to compile (without path prefix, since we're using VPATH)
|
|
43
21
|
snappy_sources = %w[
|
|
44
|
-
snappy
|
|
45
|
-
snappy-c.cc
|
|
22
|
+
snappy.cc
|
|
46
23
|
snappy-sinksource.cc
|
|
47
24
|
snappy-stubs-internal.cc
|
|
25
|
+
snappy-c.cc
|
|
48
26
|
]
|
|
49
27
|
|
|
50
|
-
#
|
|
51
|
-
$
|
|
52
|
-
$
|
|
53
|
-
|
|
54
|
-
# Initialize $objs if it's not already initialized
|
|
55
|
-
$objs ||= []
|
|
56
|
-
|
|
57
|
-
# First, add the main Ruby binding (snappy.c -> snappy.o)
|
|
58
|
-
# This file is in ext/snappy/, not in snappy-src/
|
|
59
|
-
$objs << "snappy.o"
|
|
60
|
-
|
|
61
|
-
# Add Snappy library source files
|
|
62
|
-
snappy_sources.each do |src|
|
|
63
|
-
$objs << src.sub(/\.cc$/, ".o")
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
# Verify all required source files exist
|
|
67
|
-
missing_files = snappy_sources.reject { |src| File.exist?(File.join(SNAPPY_DIR, src)) || File.symlink?(File.join(SNAPPY_DIR, src)) }
|
|
68
|
-
unless missing_files.empty?
|
|
69
|
-
abort "ERROR: Missing Snappy source files:\n #{missing_files.join("\n ")}"
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
puts "=" * 70
|
|
73
|
-
puts "Building snappy-ruby with bundled Snappy library"
|
|
74
|
-
puts "=" * 70
|
|
75
|
-
puts "Snappy source: #{SNAPPY_DIR}"
|
|
76
|
-
puts "Include flags: #{$INCFLAGS}"
|
|
77
|
-
puts "C++ flags: #{$CXXFLAGS}"
|
|
78
|
-
puts "Main Ruby binding: snappy.c"
|
|
79
|
-
puts "Compiling Snappy sources:"
|
|
80
|
-
snappy_sources.each { |src| puts " - #{src}" }
|
|
81
|
-
puts "=" * 70
|
|
28
|
+
# Set the object files list
|
|
29
|
+
$objs = snappy_sources.map { |src| src.sub(/\.cc$/, '.o') }
|
|
30
|
+
$objs << 'snappy_ext.o'
|
|
82
31
|
|
|
83
|
-
|
|
32
|
+
# Create Makefile
|
|
33
|
+
create_makefile('snappy/snappy')
|
|
@@ -27,7 +27,10 @@
|
|
|
27
27
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
28
28
|
//
|
|
29
29
|
// Various type stubs for the open-source version of Snappy.
|
|
30
|
-
//
|
|
30
|
+
//
|
|
31
|
+
// This file cannot include config.h, as it is included from snappy.h,
|
|
32
|
+
// which is a public header. Instead, snappy-stubs-public.h is generated by
|
|
33
|
+
// from snappy-stubs-public.h.in at configure time.
|
|
31
34
|
|
|
32
35
|
#ifndef THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
|
|
33
36
|
#define THIRD_PARTY_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
|
|
@@ -40,7 +43,7 @@
|
|
|
40
43
|
|
|
41
44
|
#define SNAPPY_MAJOR 1
|
|
42
45
|
#define SNAPPY_MINOR 2
|
|
43
|
-
#define SNAPPY_PATCHLEVEL
|
|
46
|
+
#define SNAPPY_PATCHLEVEL 2
|
|
44
47
|
#define SNAPPY_VERSION \
|
|
45
48
|
((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
|
|
46
49
|
|
|
@@ -0,0 +1,398 @@
|
|
|
1
|
+
// Copyright 2020 Google Inc. All Rights Reserved.
|
|
2
|
+
//
|
|
3
|
+
// Redistribution and use in source and binary forms, with or without
|
|
4
|
+
// modification, are permitted provided that the following conditions are
|
|
5
|
+
// met:
|
|
6
|
+
//
|
|
7
|
+
// * Redistributions of source code must retain the above copyright
|
|
8
|
+
// notice, this list of conditions and the following disclaimer.
|
|
9
|
+
// * Redistributions in binary form must reproduce the above
|
|
10
|
+
// copyright notice, this list of conditions and the following disclaimer
|
|
11
|
+
// in the documentation and/or other materials provided with the
|
|
12
|
+
// distribution.
|
|
13
|
+
// * Neither the name of Google Inc. nor the names of its
|
|
14
|
+
// contributors may be used to endorse or promote products derived from
|
|
15
|
+
// this software without specific prior written permission.
|
|
16
|
+
//
|
|
17
|
+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
18
|
+
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
19
|
+
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
20
|
+
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
21
|
+
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
22
|
+
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
23
|
+
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
24
|
+
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
25
|
+
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
26
|
+
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
27
|
+
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
28
|
+
|
|
29
|
+
#include <cstddef>
|
|
30
|
+
#include <cstdint>
|
|
31
|
+
#include <string>
|
|
32
|
+
#include <vector>
|
|
33
|
+
|
|
34
|
+
#include "benchmark/benchmark.h"
|
|
35
|
+
#include "snappy-internal.h"
|
|
36
|
+
#include "snappy-sinksource.h"
|
|
37
|
+
#include "snappy-test.h"
|
|
38
|
+
#include "snappy.h"
|
|
39
|
+
#include "snappy_test_data.h"
|
|
40
|
+
|
|
41
|
+
namespace snappy {
|
|
42
|
+
|
|
43
|
+
namespace {
|
|
44
|
+
|
|
45
|
+
void FilesAndLevels(benchmark::internal::Benchmark* benchmark) {
|
|
46
|
+
for (int i = 0; i < ARRAYSIZE(kTestDataFiles); ++i) {
|
|
47
|
+
for (int level = snappy::CompressionOptions::MinCompressionLevel();
|
|
48
|
+
level <= snappy::CompressionOptions::MaxCompressionLevel(); ++level) {
|
|
49
|
+
benchmark->ArgPair(i, level);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
void BM_UFlat(benchmark::State& state) {
|
|
55
|
+
// Pick file to process based on state.range(0).
|
|
56
|
+
int file_index = state.range(0);
|
|
57
|
+
|
|
58
|
+
CHECK_GE(file_index, 0);
|
|
59
|
+
CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
|
|
60
|
+
std::string contents =
|
|
61
|
+
ReadTestDataFile(kTestDataFiles[file_index].filename,
|
|
62
|
+
kTestDataFiles[file_index].size_limit);
|
|
63
|
+
|
|
64
|
+
std::string zcontents;
|
|
65
|
+
snappy::Compress(
|
|
66
|
+
contents.data(), contents.size(), &zcontents,
|
|
67
|
+
snappy::CompressionOptions{/*level=*/static_cast<int>(state.range(1))});
|
|
68
|
+
char* dst = new char[contents.size()];
|
|
69
|
+
|
|
70
|
+
for (auto s : state) {
|
|
71
|
+
CHECK(snappy::RawUncompress(zcontents.data(), zcontents.size(), dst));
|
|
72
|
+
benchmark::DoNotOptimize(dst);
|
|
73
|
+
}
|
|
74
|
+
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
|
|
75
|
+
static_cast<int64_t>(contents.size()));
|
|
76
|
+
state.SetLabel(kTestDataFiles[file_index].label);
|
|
77
|
+
|
|
78
|
+
delete[] dst;
|
|
79
|
+
}
|
|
80
|
+
BENCHMARK(BM_UFlat)->Apply(FilesAndLevels);
|
|
81
|
+
|
|
82
|
+
struct SourceFiles {
|
|
83
|
+
SourceFiles() {
|
|
84
|
+
for (int i = 0; i < kFiles; i++) {
|
|
85
|
+
std::string contents = ReadTestDataFile(kTestDataFiles[i].filename,
|
|
86
|
+
kTestDataFiles[i].size_limit);
|
|
87
|
+
max_size = std::max(max_size, contents.size());
|
|
88
|
+
sizes[i] = contents.size();
|
|
89
|
+
snappy::Compress(contents.data(), contents.size(), &zcontents[i]);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
static constexpr int kFiles = ARRAYSIZE(kTestDataFiles);
|
|
93
|
+
std::string zcontents[kFiles];
|
|
94
|
+
size_t sizes[kFiles];
|
|
95
|
+
size_t max_size = 0;
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
void BM_UFlatMedley(benchmark::State& state) {
|
|
99
|
+
static const SourceFiles* const source = new SourceFiles();
|
|
100
|
+
|
|
101
|
+
std::vector<char> dst(source->max_size);
|
|
102
|
+
|
|
103
|
+
for (auto s : state) {
|
|
104
|
+
for (int i = 0; i < SourceFiles::kFiles; i++) {
|
|
105
|
+
CHECK(snappy::RawUncompress(source->zcontents[i].data(),
|
|
106
|
+
source->zcontents[i].size(), dst.data()));
|
|
107
|
+
benchmark::DoNotOptimize(dst);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
int64_t source_sizes = 0;
|
|
112
|
+
for (int i = 0; i < SourceFiles::kFiles; i++) {
|
|
113
|
+
source_sizes += static_cast<int64_t>(source->sizes[i]);
|
|
114
|
+
}
|
|
115
|
+
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
|
|
116
|
+
source_sizes);
|
|
117
|
+
}
|
|
118
|
+
BENCHMARK(BM_UFlatMedley);
|
|
119
|
+
|
|
120
|
+
void BM_UValidate(benchmark::State& state) {
|
|
121
|
+
// Pick file to process based on state.range(0).
|
|
122
|
+
int file_index = state.range(0);
|
|
123
|
+
|
|
124
|
+
CHECK_GE(file_index, 0);
|
|
125
|
+
CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
|
|
126
|
+
std::string contents =
|
|
127
|
+
ReadTestDataFile(kTestDataFiles[file_index].filename,
|
|
128
|
+
kTestDataFiles[file_index].size_limit);
|
|
129
|
+
|
|
130
|
+
std::string zcontents;
|
|
131
|
+
snappy::Compress(
|
|
132
|
+
contents.data(), contents.size(), &zcontents,
|
|
133
|
+
snappy::CompressionOptions{/*level=*/static_cast<int>(state.range(1))});
|
|
134
|
+
|
|
135
|
+
for (auto s : state) {
|
|
136
|
+
CHECK(snappy::IsValidCompressedBuffer(zcontents.data(), zcontents.size()));
|
|
137
|
+
}
|
|
138
|
+
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
|
|
139
|
+
static_cast<int64_t>(contents.size()));
|
|
140
|
+
state.SetLabel(kTestDataFiles[file_index].label);
|
|
141
|
+
}
|
|
142
|
+
BENCHMARK(BM_UValidate)->Apply(FilesAndLevels);
|
|
143
|
+
|
|
144
|
+
void BM_UValidateMedley(benchmark::State& state) {
|
|
145
|
+
static const SourceFiles* const source = new SourceFiles();
|
|
146
|
+
|
|
147
|
+
for (auto s : state) {
|
|
148
|
+
for (int i = 0; i < SourceFiles::kFiles; i++) {
|
|
149
|
+
CHECK(snappy::IsValidCompressedBuffer(source->zcontents[i].data(),
|
|
150
|
+
source->zcontents[i].size()));
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
int64_t source_sizes = 0;
|
|
155
|
+
for (int i = 0; i < SourceFiles::kFiles; i++) {
|
|
156
|
+
source_sizes += static_cast<int64_t>(source->sizes[i]);
|
|
157
|
+
}
|
|
158
|
+
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
|
|
159
|
+
source_sizes);
|
|
160
|
+
}
|
|
161
|
+
BENCHMARK(BM_UValidateMedley);
|
|
162
|
+
|
|
163
|
+
void BM_UIOVecSource(benchmark::State& state) {
|
|
164
|
+
// Pick file to process based on state.range(0).
|
|
165
|
+
int file_index = state.range(0);
|
|
166
|
+
int level = state.range(1);
|
|
167
|
+
|
|
168
|
+
CHECK_GE(file_index, 0);
|
|
169
|
+
CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
|
|
170
|
+
std::string contents =
|
|
171
|
+
ReadTestDataFile(kTestDataFiles[file_index].filename,
|
|
172
|
+
kTestDataFiles[file_index].size_limit);
|
|
173
|
+
|
|
174
|
+
// Create `iovec`s of the `contents`.
|
|
175
|
+
const int kNumEntries = 10;
|
|
176
|
+
struct iovec iov[kNumEntries];
|
|
177
|
+
size_t used_so_far = 0;
|
|
178
|
+
for (int i = 0; i < kNumEntries; ++i) {
|
|
179
|
+
iov[i].iov_base = const_cast<char*>(contents.data()) + used_so_far;
|
|
180
|
+
if (used_so_far == contents.size()) {
|
|
181
|
+
iov[i].iov_len = 0;
|
|
182
|
+
continue;
|
|
183
|
+
}
|
|
184
|
+
if (i == kNumEntries - 1) {
|
|
185
|
+
iov[i].iov_len = contents.size() - used_so_far;
|
|
186
|
+
} else {
|
|
187
|
+
iov[i].iov_len = contents.size() / kNumEntries;
|
|
188
|
+
}
|
|
189
|
+
used_so_far += iov[i].iov_len;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
|
|
193
|
+
size_t zsize = 0;
|
|
194
|
+
for (auto s : state) {
|
|
195
|
+
snappy::RawCompressFromIOVec(iov, contents.size(), dst, &zsize,
|
|
196
|
+
snappy::CompressionOptions{/*level=*/level});
|
|
197
|
+
benchmark::DoNotOptimize(iov);
|
|
198
|
+
}
|
|
199
|
+
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
|
|
200
|
+
static_cast<int64_t>(contents.size()));
|
|
201
|
+
const double compression_ratio =
|
|
202
|
+
static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
|
|
203
|
+
state.SetLabel(StrFormat("%s (%.2f %%)", kTestDataFiles[file_index].label,
|
|
204
|
+
100.0 * compression_ratio));
|
|
205
|
+
VLOG(0) << StrFormat("compression for %s: %d -> %d bytes",
|
|
206
|
+
kTestDataFiles[file_index].label, contents.size(),
|
|
207
|
+
zsize);
|
|
208
|
+
|
|
209
|
+
delete[] dst;
|
|
210
|
+
}
|
|
211
|
+
BENCHMARK(BM_UIOVecSource)->Apply(FilesAndLevels);
|
|
212
|
+
|
|
213
|
+
void BM_UIOVecSink(benchmark::State& state) {
|
|
214
|
+
// Pick file to process based on state.range(0).
|
|
215
|
+
int file_index = state.range(0);
|
|
216
|
+
|
|
217
|
+
CHECK_GE(file_index, 0);
|
|
218
|
+
CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
|
|
219
|
+
std::string contents =
|
|
220
|
+
ReadTestDataFile(kTestDataFiles[file_index].filename,
|
|
221
|
+
kTestDataFiles[file_index].size_limit);
|
|
222
|
+
|
|
223
|
+
std::string zcontents;
|
|
224
|
+
snappy::Compress(contents.data(), contents.size(), &zcontents);
|
|
225
|
+
|
|
226
|
+
// Uncompress into an iovec containing ten entries.
|
|
227
|
+
const int kNumEntries = 10;
|
|
228
|
+
struct iovec iov[kNumEntries];
|
|
229
|
+
char* dst = new char[contents.size()];
|
|
230
|
+
size_t used_so_far = 0;
|
|
231
|
+
for (int i = 0; i < kNumEntries; ++i) {
|
|
232
|
+
iov[i].iov_base = dst + used_so_far;
|
|
233
|
+
if (used_so_far == contents.size()) {
|
|
234
|
+
iov[i].iov_len = 0;
|
|
235
|
+
continue;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
if (i == kNumEntries - 1) {
|
|
239
|
+
iov[i].iov_len = contents.size() - used_so_far;
|
|
240
|
+
} else {
|
|
241
|
+
iov[i].iov_len = contents.size() / kNumEntries;
|
|
242
|
+
}
|
|
243
|
+
used_so_far += iov[i].iov_len;
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
for (auto s : state) {
|
|
247
|
+
CHECK(snappy::RawUncompressToIOVec(zcontents.data(), zcontents.size(), iov,
|
|
248
|
+
kNumEntries));
|
|
249
|
+
benchmark::DoNotOptimize(iov);
|
|
250
|
+
}
|
|
251
|
+
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
|
|
252
|
+
static_cast<int64_t>(contents.size()));
|
|
253
|
+
state.SetLabel(kTestDataFiles[file_index].label);
|
|
254
|
+
|
|
255
|
+
delete[] dst;
|
|
256
|
+
}
|
|
257
|
+
BENCHMARK(BM_UIOVecSink)->DenseRange(0, 4);
|
|
258
|
+
|
|
259
|
+
void BM_UFlatSink(benchmark::State& state) {
|
|
260
|
+
// Pick file to process based on state.range(0).
|
|
261
|
+
int file_index = state.range(0);
|
|
262
|
+
|
|
263
|
+
CHECK_GE(file_index, 0);
|
|
264
|
+
CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
|
|
265
|
+
std::string contents =
|
|
266
|
+
ReadTestDataFile(kTestDataFiles[file_index].filename,
|
|
267
|
+
kTestDataFiles[file_index].size_limit);
|
|
268
|
+
|
|
269
|
+
std::string zcontents;
|
|
270
|
+
snappy::Compress(
|
|
271
|
+
contents.data(), contents.size(), &zcontents,
|
|
272
|
+
snappy::CompressionOptions{/*level=*/static_cast<int>(state.range(1))});
|
|
273
|
+
char* dst = new char[contents.size()];
|
|
274
|
+
|
|
275
|
+
for (auto s : state) {
|
|
276
|
+
snappy::ByteArraySource source(zcontents.data(), zcontents.size());
|
|
277
|
+
snappy::UncheckedByteArraySink sink(dst);
|
|
278
|
+
CHECK(snappy::Uncompress(&source, &sink));
|
|
279
|
+
benchmark::DoNotOptimize(sink);
|
|
280
|
+
}
|
|
281
|
+
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
|
|
282
|
+
static_cast<int64_t>(contents.size()));
|
|
283
|
+
state.SetLabel(kTestDataFiles[file_index].label);
|
|
284
|
+
|
|
285
|
+
std::string s(dst, contents.size());
|
|
286
|
+
CHECK_EQ(contents, s);
|
|
287
|
+
|
|
288
|
+
delete[] dst;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
BENCHMARK(BM_UFlatSink)->Apply(FilesAndLevels);
|
|
292
|
+
|
|
293
|
+
void BM_ZFlat(benchmark::State& state) {
|
|
294
|
+
// Pick file to process based on state.range(0).
|
|
295
|
+
int file_index = state.range(0);
|
|
296
|
+
int level = state.range(1);
|
|
297
|
+
|
|
298
|
+
CHECK_GE(file_index, 0);
|
|
299
|
+
CHECK_LT(file_index, ARRAYSIZE(kTestDataFiles));
|
|
300
|
+
std::string contents =
|
|
301
|
+
ReadTestDataFile(kTestDataFiles[file_index].filename,
|
|
302
|
+
kTestDataFiles[file_index].size_limit);
|
|
303
|
+
char* dst = new char[snappy::MaxCompressedLength(contents.size())];
|
|
304
|
+
|
|
305
|
+
size_t zsize = 0;
|
|
306
|
+
for (auto s : state) {
|
|
307
|
+
snappy::RawCompress(contents.data(), contents.size(), dst, &zsize,
|
|
308
|
+
snappy::CompressionOptions{/*level=*/level});
|
|
309
|
+
benchmark::DoNotOptimize(dst);
|
|
310
|
+
}
|
|
311
|
+
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
|
|
312
|
+
static_cast<int64_t>(contents.size()));
|
|
313
|
+
const double compression_ratio =
|
|
314
|
+
static_cast<double>(zsize) / std::max<size_t>(1, contents.size());
|
|
315
|
+
state.SetLabel(StrFormat("%s (%.2f %%)", kTestDataFiles[file_index].label,
|
|
316
|
+
100.0 * compression_ratio));
|
|
317
|
+
VLOG(0) << StrFormat("compression for %s: %d -> %d bytes",
|
|
318
|
+
kTestDataFiles[file_index].label, contents.size(),
|
|
319
|
+
zsize);
|
|
320
|
+
delete[] dst;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
BENCHMARK(BM_ZFlat)->Apply(FilesAndLevels);
|
|
324
|
+
|
|
325
|
+
void BM_ZFlatAll(benchmark::State& state) {
|
|
326
|
+
const int num_files = ARRAYSIZE(kTestDataFiles);
|
|
327
|
+
int level = state.range(0);
|
|
328
|
+
|
|
329
|
+
std::vector<std::string> contents(num_files);
|
|
330
|
+
std::vector<char*> dst(num_files);
|
|
331
|
+
|
|
332
|
+
int64_t total_contents_size = 0;
|
|
333
|
+
for (int i = 0; i < num_files; ++i) {
|
|
334
|
+
contents[i] = ReadTestDataFile(kTestDataFiles[i].filename,
|
|
335
|
+
kTestDataFiles[i].size_limit);
|
|
336
|
+
dst[i] = new char[snappy::MaxCompressedLength(contents[i].size())];
|
|
337
|
+
total_contents_size += contents[i].size();
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
size_t zsize = 0;
|
|
341
|
+
for (auto s : state) {
|
|
342
|
+
for (int i = 0; i < num_files; ++i) {
|
|
343
|
+
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
|
344
|
+
&zsize, snappy::CompressionOptions{/*level=*/level});
|
|
345
|
+
benchmark::DoNotOptimize(dst);
|
|
346
|
+
}
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
|
|
350
|
+
total_contents_size);
|
|
351
|
+
|
|
352
|
+
for (char* dst_item : dst) {
|
|
353
|
+
delete[] dst_item;
|
|
354
|
+
}
|
|
355
|
+
state.SetLabel(StrFormat("%d kTestDataFiles", num_files));
|
|
356
|
+
}
|
|
357
|
+
BENCHMARK(BM_ZFlatAll)->DenseRange(1, 2);
|
|
358
|
+
|
|
359
|
+
void BM_ZFlatIncreasingTableSize(benchmark::State& state) {
|
|
360
|
+
CHECK_GT(ARRAYSIZE(kTestDataFiles), 0);
|
|
361
|
+
int level = state.range(0);
|
|
362
|
+
const std::string base_content = ReadTestDataFile(
|
|
363
|
+
kTestDataFiles[0].filename, kTestDataFiles[0].size_limit);
|
|
364
|
+
|
|
365
|
+
std::vector<std::string> contents;
|
|
366
|
+
std::vector<char*> dst;
|
|
367
|
+
int64_t total_contents_size = 0;
|
|
368
|
+
for (int table_bits = kMinHashTableBits; table_bits <= kMaxHashTableBits;
|
|
369
|
+
++table_bits) {
|
|
370
|
+
std::string content = base_content;
|
|
371
|
+
content.resize(1 << table_bits);
|
|
372
|
+
dst.push_back(new char[snappy::MaxCompressedLength(content.size())]);
|
|
373
|
+
total_contents_size += content.size();
|
|
374
|
+
contents.push_back(std::move(content));
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
size_t zsize = 0;
|
|
378
|
+
for (auto s : state) {
|
|
379
|
+
for (size_t i = 0; i < contents.size(); ++i) {
|
|
380
|
+
snappy::RawCompress(contents[i].data(), contents[i].size(), dst[i],
|
|
381
|
+
&zsize, snappy::CompressionOptions{/*level=*/level});
|
|
382
|
+
benchmark::DoNotOptimize(dst);
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) *
|
|
387
|
+
total_contents_size);
|
|
388
|
+
|
|
389
|
+
for (char* dst_item : dst) {
|
|
390
|
+
delete[] dst_item;
|
|
391
|
+
}
|
|
392
|
+
state.SetLabel(StrFormat("%d tables", contents.size()));
|
|
393
|
+
}
|
|
394
|
+
BENCHMARK(BM_ZFlatIncreasingTableSize)->DenseRange(1, 2);
|
|
395
|
+
|
|
396
|
+
} // namespace
|
|
397
|
+
|
|
398
|
+
} // namespace snappy
|