dedup 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 021b0183f99ad0cc33215d7162fc85eab71c399c1c13fd80db913c5fa80a2667
4
+ data.tar.gz: a708b1ae9f1333b4efad03efedad3a292568f881ba13796619ee1b7df24f1967
5
+ SHA512:
6
+ metadata.gz: 4db7a0a4be18afdd7458d06d330087f015fd05425bad1a4be0832029d6ef038066a2f0fc8128e99b5b923cdd555e2e5fb15d14b5f7cd89dc723292dca53da043
7
+ data.tar.gz: 5f74f2056de6d70128ce87c4a124844d677dbbe7bddd550f6f0de7554d1a495b185419797f25c0a4af250b9ad99d3a775a744ef1ffad64ac6d7b4eb0acb7f41b
@@ -0,0 +1,23 @@
1
+ name: CI
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ runs-on: ubuntu-latest
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby: [ ruby-head, 2.7, 2.6, 2.5 ]
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - name: Set up Ruby
15
+ uses: ruby/setup-ruby@v1
16
+ with:
17
+ ruby-version: ${{ matrix.ruby }}
18
+ - name: Install dependencies
19
+ run: bundle install
20
+ - name: Run test
21
+ run: rake
22
+ - name: Install gem
23
+ run: rake install
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ *.bundle
10
+ *.so
11
+ .byebug_history
@@ -0,0 +1,6 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.7.1
6
+ before_install: gem install bundler -v 2.1.4
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in dedup.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "rake-compiler"
8
+ gem "minitest", "~> 5.0"
9
+ gem "byebug"
10
+ gem "rubocop-shopify", require: false
11
+ gem "benchmark-ips"
12
+ gem "stackprof"
13
+ gem "bootsnap"
@@ -0,0 +1,57 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ dedup (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ast (2.4.1)
10
+ benchmark-ips (2.8.2)
11
+ bootsnap (1.4.8)
12
+ msgpack (~> 1.0)
13
+ byebug (11.1.3)
14
+ minitest (5.14.2)
15
+ msgpack (1.3.3)
16
+ parallel (1.19.2)
17
+ parser (2.7.1.4)
18
+ ast (~> 2.4.1)
19
+ rainbow (3.0.0)
20
+ rake (12.3.3)
21
+ rake-compiler (1.1.1)
22
+ rake
23
+ regexp_parser (1.7.1)
24
+ rexml (3.2.4)
25
+ rubocop (0.86.0)
26
+ parallel (~> 1.10)
27
+ parser (>= 2.7.0.1)
28
+ rainbow (>= 2.2.2, < 4.0)
29
+ regexp_parser (>= 1.7)
30
+ rexml
31
+ rubocop-ast (>= 0.0.3, < 1.0)
32
+ ruby-progressbar (~> 1.7)
33
+ unicode-display_width (>= 1.4.0, < 2.0)
34
+ rubocop-ast (0.3.0)
35
+ parser (>= 2.7.1.4)
36
+ rubocop-shopify (1.0.4)
37
+ rubocop (>= 0.85, < 0.87)
38
+ ruby-progressbar (1.10.1)
39
+ stackprof (0.2.15)
40
+ unicode-display_width (1.7.0)
41
+
42
+ PLATFORMS
43
+ ruby
44
+
45
+ DEPENDENCIES
46
+ benchmark-ips
47
+ bootsnap
48
+ byebug
49
+ dedup!
50
+ minitest (~> 5.0)
51
+ rake (~> 12.0)
52
+ rake-compiler
53
+ rubocop-shopify
54
+ stackprof
55
+
56
+ BUNDLED WITH
57
+ 2.1.4
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 Jean Boussier
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,49 @@
1
+ # Dedup
2
+
3
+ Deep object deduplication.
4
+
5
+ If your app keeps lots of static data in memory, such as i18n data or large configurations,
6
+ this can reduce memory retention.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ ```ruby
13
+ gem 'dedup'
14
+ ```
15
+
16
+ And then execute:
17
+
18
+ $ bundle install
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install dedup
23
+
24
+ ## Usage
25
+
26
+ This library is meant to be called on large static data structures loaded during boot:
27
+
28
+ ```ruby
29
+ SOME_DATA = Dedup.deep_intern!(YAML.load_file('path.yml'))
30
+ ```
31
+
32
+ Keep in mind that it trades CPU during boot reduced for memory retention.
33
+ It isn't meant to be applied on runtime data with reduced lifetime, but on
34
+ static data loaded during boot.
35
+
36
+ ## Development
37
+
38
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
39
+
40
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
41
+
42
+ ## Contributing
43
+
44
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/dedup.
45
+
46
+
47
+ ## License
48
+
49
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+ require "bundler/gem_tasks"
3
+ require "rake/testtask"
4
+
5
+ Rake::TestTask.new(:test) do |t|
6
+ t.libs << "test"
7
+ t.libs << "lib"
8
+ t.test_files = FileList["test/**/*_test.rb"]
9
+ end
10
+
11
+ if RUBY_ENGINE == 'ruby' && RUBY_VERSION >= '2.7'
12
+ require "rake/extensiontask"
13
+
14
+ Rake::ExtensionTask.new("dedup") do |ext|
15
+ ext.ext_dir = 'ext/dedup'
16
+ ext.lib_dir = "lib/dedup"
17
+ end
18
+
19
+ task default: %i(compile test)
20
+ else
21
+ task default: %i(test)
22
+ end
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'tmpdir'
5
+ require 'yaml'
6
+ require 'bootsnap'
7
+ require 'benchmark/ips'
8
+ require 'bundler/setup'
9
+ require 'dedup'
10
+
11
+ Bootsnap.setup(
12
+ cache_dir: Dir.mktmpdir,
13
+ development_mode: false,
14
+ compile_cache_yaml: true,
15
+ autoload_paths_cache: false,
16
+ )
17
+
18
+ YAML_PATH = ARGV.fetch(0)
19
+
20
+ Benchmark.ips do |x|
21
+ x.time = 30
22
+ x.report('baseline') { YAML.load_file(YAML_PATH) }
23
+ x.report('ruby') { Dedup::Ruby.deep_intern!(YAML.load_file(YAML_PATH)) }
24
+ x.report('native') { Dedup::Native.deep_intern!(YAML.load_file(YAML_PATH)) }
25
+ x.compare!
26
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "dedup"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/dedup/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "dedup"
7
+ spec.version = Dedup::VERSION
8
+ spec.authors = ["Jean Boussier"]
9
+ spec.email = ["jean.boussier@gmail.com"]
10
+
11
+ spec.summary = %q{Fast object deduplication}
12
+ spec.description = %q{If your app keeps lots of static data in memory, such as i18n data or large configurations, this can reduce memory retention.}
13
+ spec.homepage = "https://github.com/Shopify/dedup"
14
+ spec.license = "MIT"
15
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
16
+
17
+ spec.metadata["allowed_push_host"] = "https://rubygems.org"
18
+
19
+ spec.metadata["homepage_uri"] = spec.homepage
20
+ spec.metadata["source_code_uri"] = spec.homepage
21
+
22
+ # Specify which files should be added to the gem when it is released.
23
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
24
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
25
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ end
27
+
28
+ if RUBY_ENGINE == 'ruby' && RUBY_VERSION >= '2.7'
29
+ spec.platform = Gem::Platform::RUBY
30
+ spec.extensions = ['ext/dedup/extconf.rb']
31
+ end
32
+
33
+ spec.bindir = "exe"
34
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
35
+ spec.require_paths = ["lib"]
36
+ end
data/dev.yml ADDED
@@ -0,0 +1,20 @@
1
+ name: dedup
2
+
3
+ type: ruby
4
+
5
+ up:
6
+ - ruby: 2.7.1
7
+ - bundler
8
+
9
+ commands:
10
+ console:
11
+ desc: 'start a console'
12
+ run: bin/console
13
+ run:
14
+ desc: 'start the application'
15
+ run: bin/run
16
+ test:
17
+ syntax:
18
+ argument: file
19
+ optional: args...
20
+ run: bin/testunit
@@ -0,0 +1,111 @@
1
+ #include <ruby.h>
2
+ #include <ruby/st.h>
3
+ #include <stdbool.h>
4
+
5
+ #define RHASH_AR_TABLE_MAX_SIZE SIZEOF_VALUE
6
+
7
+ static ID id_uminus;
8
+ static VALUE empty_array, empty_hash;
9
+
10
+ static VALUE deep_intern(VALUE);
11
+
12
+ typedef struct {
13
+ bool changed;
14
+ long index;
15
+ VALUE *pairs;
16
+ } hash_iter_arg;
17
+
18
+ int
19
+ dedup_hash_iter_callback(VALUE key, VALUE value, VALUE arg)
20
+ {
21
+ hash_iter_arg *iter_arg = (hash_iter_arg *)arg;
22
+
23
+ VALUE new_key = deep_intern(key);
24
+ iter_arg->pairs[iter_arg->index] = new_key;
25
+ iter_arg->index++;
26
+ iter_arg->changed |= new_key != key;
27
+
28
+ VALUE new_value = deep_intern(value);
29
+ iter_arg->pairs[iter_arg->index] = new_value;
30
+ iter_arg->index++;
31
+ iter_arg->changed |= new_value != value;
32
+
33
+ return ST_CONTINUE;
34
+ }
35
+
36
+ static VALUE
37
+ deep_intern(VALUE data)
38
+ {
39
+ if (RB_SPECIAL_CONST_P(data)) {
40
+ return data;
41
+ }
42
+
43
+ int type = BUILTIN_TYPE(data);
44
+
45
+ if (RB_OBJ_FROZEN(data) && (type != T_STRING || FL_TEST(data, RSTRING_FSTR))) {
46
+ return data;
47
+ }
48
+
49
+ switch (type) {
50
+ case T_STRING:
51
+ return rb_funcall(rb_str_freeze(data), id_uminus, 0);
52
+ break;
53
+ case T_HASH:
54
+ {
55
+ long size = RHASH_SIZE(data);
56
+ if (size == 0) {
57
+ return empty_hash;
58
+ }
59
+
60
+ hash_iter_arg arg;
61
+ arg.changed = false;
62
+ arg.index = 0;
63
+ arg.pairs = alloca(sizeof(VALUE) * 2 * size);
64
+ rb_hash_foreach(data, dedup_hash_iter_callback, (VALUE)&arg);
65
+ if (arg.changed) {
66
+ rb_hash_clear(data);
67
+ rb_hash_bulk_insert(arg.index, arg.pairs, data);
68
+ }
69
+
70
+ rb_obj_freeze(data);
71
+ }
72
+ break;
73
+ case T_ARRAY:
74
+ {
75
+ long size = RARRAY_LEN(data);
76
+ if (size == 0) {
77
+ return empty_array;
78
+ }
79
+ for (long index = 0; index < size; index++) {
80
+ RARRAY_ASET(data, index, deep_intern(RARRAY_AREF(data, index)));
81
+ }
82
+ rb_obj_freeze(data);
83
+ }
84
+ break;
85
+ default:
86
+ rb_obj_freeze(data);
87
+ }
88
+ return data;
89
+ }
90
+
91
+ static VALUE
92
+ dedup_deep_intern_bang(VALUE self, VALUE data)
93
+ {
94
+
95
+ return deep_intern(data);
96
+ }
97
+
98
+ void
99
+ Init_dedup()
100
+ {
101
+ id_uminus = rb_intern("-@");
102
+
103
+ VALUE rb_mDedup = rb_const_get(rb_cObject, rb_intern("Dedup"));
104
+ empty_hash = rb_const_get(rb_mDedup, rb_intern("EMPTY_HASH"));
105
+ rb_global_variable(&empty_hash);
106
+ empty_array = rb_const_get(rb_mDedup, rb_intern("EMPTY_ARRAY"));
107
+ rb_global_variable(&empty_array);
108
+
109
+ VALUE rb_mNative = rb_define_module_under(rb_mDedup, "Native");
110
+ rb_define_method(rb_mNative, "deep_intern!", dedup_deep_intern_bang, 1);
111
+ }
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+
5
+ $CFLAGS = "-O3 -Wall"
6
+
7
+ create_makefile('dedup/dedup')
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "dedup/version"
4
+
5
+ module Dedup
6
+ EMPTY_HASH = {}.freeze
7
+ EMPTY_ARRAY = [].freeze
8
+ class Error < StandardError; end
9
+
10
+ module Ruby
11
+ extend self
12
+
13
+ if DEDUP_FROZEN_STRINGS
14
+ def deep_intern!(data)
15
+ case data
16
+ when Hash
17
+ return EMPTY_HASH if data.empty?
18
+ return data if data.frozen?
19
+
20
+ data.transform_keys! { |k| deep_intern!(k) }
21
+ data.transform_values! { |v| deep_intern!(v) }
22
+ data.freeze
23
+ when Array
24
+ return EMPTY_ARRAY if data.empty?
25
+ return data if data.frozen?
26
+
27
+ data.map! { |d| deep_intern!(d) }.freeze
28
+ when String
29
+ -data.freeze
30
+ else
31
+ data.freeze
32
+ end
33
+ end
34
+ else
35
+ def deep_intern!(data)
36
+ case data
37
+ when Hash
38
+ return data if data.frozen?
39
+
40
+ data.transform_keys! { |k| deep_intern!(k) }
41
+ data.transform_values! { |v| deep_intern!(v) }
42
+ data.freeze
43
+ when Array
44
+ return data if data.frozen?
45
+
46
+ data.map! { |d| deep_intern!(d) }.freeze
47
+ when String
48
+ -(+data)
49
+ else
50
+ data.freeze
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+ begin
58
+ require "dedup/dedup"
59
+ Dedup::Native.extend(Dedup::Native)
60
+ Dedup.extend(Dedup::Native)
61
+ rescue LoadError
62
+ Dedup.extend(Dedup::Ruby)
63
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dedup
4
+ VERSION = "0.1.0"
5
+
6
+ DEDUP_FROZEN_STRINGS = begin
7
+ rand_string = rand.to_s
8
+ (-rand_string.freeze).equal?(-(+rand_string))
9
+ end
10
+
11
+ DEDUP_HASH_ASET = begin
12
+ h = {}
13
+ x = {}
14
+ r = rand.to_s
15
+ h[%W(#{r}).join('')] = 1
16
+ x[%W(#{r}).join('')] = 1
17
+ x.keys[0].equal?(h.keys[0])
18
+ end
19
+
20
+ DEDUP_HASH_ASET_FROZEN = begin
21
+ h = {}
22
+ x = {}
23
+ r = rand.to_s
24
+ h[%W(#{r}).join('').freeze] = 1
25
+ x[%W(#{r}).join('').freeze] = 1
26
+ x.keys[0].equal?(h.keys[0])
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dedup
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jean Boussier
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-09-08 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: If your app keeps lots of static data in memory, such as i18n data or
14
+ large configurations, this can reduce memory retention.
15
+ email:
16
+ - jean.boussier@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - ".github/workflows/ci.yml"
22
+ - ".gitignore"
23
+ - ".travis.yml"
24
+ - Gemfile
25
+ - Gemfile.lock
26
+ - LICENSE.txt
27
+ - README.md
28
+ - Rakefile
29
+ - benchmark/deep_intern.rb
30
+ - bin/console
31
+ - bin/setup
32
+ - dedup.gemspec
33
+ - dev.yml
34
+ - ext/dedup/dedup.c
35
+ - ext/dedup/extconf.rb
36
+ - lib/dedup.rb
37
+ - lib/dedup/version.rb
38
+ homepage: https://github.com/Shopify/dedup
39
+ licenses:
40
+ - MIT
41
+ metadata:
42
+ allowed_push_host: https://rubygems.org
43
+ homepage_uri: https://github.com/Shopify/dedup
44
+ source_code_uri: https://github.com/Shopify/dedup
45
+ post_install_message:
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 2.5.0
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubygems_version: 3.0.2
61
+ signing_key:
62
+ specification_version: 4
63
+ summary: Fast object deduplication
64
+ test_files: []