dedup 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 021b0183f99ad0cc33215d7162fc85eab71c399c1c13fd80db913c5fa80a2667
4
+ data.tar.gz: a708b1ae9f1333b4efad03efedad3a292568f881ba13796619ee1b7df24f1967
5
+ SHA512:
6
+ metadata.gz: 4db7a0a4be18afdd7458d06d330087f015fd05425bad1a4be0832029d6ef038066a2f0fc8128e99b5b923cdd555e2e5fb15d14b5f7cd89dc723292dca53da043
7
+ data.tar.gz: 5f74f2056de6d70128ce87c4a124844d677dbbe7bddd550f6f0de7554d1a495b185419797f25c0a4af250b9ad99d3a775a744ef1ffad64ac6d7b4eb0acb7f41b
@@ -0,0 +1,23 @@
1
+ name: CI
2
+
3
+ on: [push, pull_request]
4
+
5
+ jobs:
6
+ build:
7
+ runs-on: ubuntu-latest
8
+ strategy:
9
+ fail-fast: false
10
+ matrix:
11
+ ruby: [ ruby-head, 2.7, 2.6, 2.5 ]
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - name: Set up Ruby
15
+ uses: ruby/setup-ruby@v1
16
+ with:
17
+ ruby-version: ${{ matrix.ruby }}
18
+ - name: Install dependencies
19
+ run: bundle install
20
+ - name: Run test
21
+ run: rake
22
+ - name: Install gem
23
+ run: rake install
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+ *.bundle
10
+ *.so
11
+ .byebug_history
@@ -0,0 +1,6 @@
1
+ ---
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 2.7.1
6
+ before_install: gem install bundler -v 2.1.4
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in dedup.gemspec
4
+ gemspec
5
+
6
+ gem "rake", "~> 12.0"
7
+ gem "rake-compiler"
8
+ gem "minitest", "~> 5.0"
9
+ gem "byebug"
10
+ gem "rubocop-shopify", require: false
11
+ gem "benchmark-ips"
12
+ gem "stackprof"
13
+ gem "bootsnap"
@@ -0,0 +1,57 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ dedup (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ast (2.4.1)
10
+ benchmark-ips (2.8.2)
11
+ bootsnap (1.4.8)
12
+ msgpack (~> 1.0)
13
+ byebug (11.1.3)
14
+ minitest (5.14.2)
15
+ msgpack (1.3.3)
16
+ parallel (1.19.2)
17
+ parser (2.7.1.4)
18
+ ast (~> 2.4.1)
19
+ rainbow (3.0.0)
20
+ rake (12.3.3)
21
+ rake-compiler (1.1.1)
22
+ rake
23
+ regexp_parser (1.7.1)
24
+ rexml (3.2.4)
25
+ rubocop (0.86.0)
26
+ parallel (~> 1.10)
27
+ parser (>= 2.7.0.1)
28
+ rainbow (>= 2.2.2, < 4.0)
29
+ regexp_parser (>= 1.7)
30
+ rexml
31
+ rubocop-ast (>= 0.0.3, < 1.0)
32
+ ruby-progressbar (~> 1.7)
33
+ unicode-display_width (>= 1.4.0, < 2.0)
34
+ rubocop-ast (0.3.0)
35
+ parser (>= 2.7.1.4)
36
+ rubocop-shopify (1.0.4)
37
+ rubocop (>= 0.85, < 0.87)
38
+ ruby-progressbar (1.10.1)
39
+ stackprof (0.2.15)
40
+ unicode-display_width (1.7.0)
41
+
42
+ PLATFORMS
43
+ ruby
44
+
45
+ DEPENDENCIES
46
+ benchmark-ips
47
+ bootsnap
48
+ byebug
49
+ dedup!
50
+ minitest (~> 5.0)
51
+ rake (~> 12.0)
52
+ rake-compiler
53
+ rubocop-shopify
54
+ stackprof
55
+
56
+ BUNDLED WITH
57
+ 2.1.4
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2020 Jean Boussier
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,49 @@
1
+ # Dedup
2
+
3
+ Deep object deduplication.
4
+
5
+ If your app keeps lots of static data in memory, such as i18n data or large configurations,
6
+ this can reduce memory retention.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ ```ruby
13
+ gem 'dedup'
14
+ ```
15
+
16
+ And then execute:
17
+
18
+ $ bundle install
19
+
20
+ Or install it yourself as:
21
+
22
+ $ gem install dedup
23
+
24
+ ## Usage
25
+
26
+ This library is meant to be called on large static data structures loaded during boot:
27
+
28
+ ```ruby
29
+ SOME_DATA = Dedup.deep_intern!(YAML.load_file('path.yml'))
30
+ ```
31
+
32
+ Keep in mind that it trades CPU during boot reduced for memory retention.
33
+ It isn't meant to be applied on runtime data with reduced lifetime, but on
34
+ static data loaded during boot.
35
+
36
+ ## Development
37
+
38
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
39
+
40
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
41
+
42
+ ## Contributing
43
+
44
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/dedup.
45
+
46
+
47
+ ## License
48
+
49
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+ require "bundler/gem_tasks"
3
+ require "rake/testtask"
4
+
5
+ Rake::TestTask.new(:test) do |t|
6
+ t.libs << "test"
7
+ t.libs << "lib"
8
+ t.test_files = FileList["test/**/*_test.rb"]
9
+ end
10
+
11
+ if RUBY_ENGINE == 'ruby' && RUBY_VERSION >= '2.7'
12
+ require "rake/extensiontask"
13
+
14
+ Rake::ExtensionTask.new("dedup") do |ext|
15
+ ext.ext_dir = 'ext/dedup'
16
+ ext.lib_dir = "lib/dedup"
17
+ end
18
+
19
+ task default: %i(compile test)
20
+ else
21
+ task default: %i(test)
22
+ end
@@ -0,0 +1,26 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'tmpdir'
5
+ require 'yaml'
6
+ require 'bootsnap'
7
+ require 'benchmark/ips'
8
+ require 'bundler/setup'
9
+ require 'dedup'
10
+
11
+ Bootsnap.setup(
12
+ cache_dir: Dir.mktmpdir,
13
+ development_mode: false,
14
+ compile_cache_yaml: true,
15
+ autoload_paths_cache: false,
16
+ )
17
+
18
+ YAML_PATH = ARGV.fetch(0)
19
+
20
+ Benchmark.ips do |x|
21
+ x.time = 30
22
+ x.report('baseline') { YAML.load_file(YAML_PATH) }
23
+ x.report('ruby') { Dedup::Ruby.deep_intern!(YAML.load_file(YAML_PATH)) }
24
+ x.report('native') { Dedup::Native.deep_intern!(YAML.load_file(YAML_PATH)) }
25
+ x.compare!
26
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "dedup"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/dedup/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "dedup"
7
+ spec.version = Dedup::VERSION
8
+ spec.authors = ["Jean Boussier"]
9
+ spec.email = ["jean.boussier@gmail.com"]
10
+
11
+ spec.summary = %q{Fast object deduplication}
12
+ spec.description = %q{If your app keeps lots of static data in memory, such as i18n data or large configurations, this can reduce memory retention.}
13
+ spec.homepage = "https://github.com/Shopify/dedup"
14
+ spec.license = "MIT"
15
+ spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
16
+
17
+ spec.metadata["allowed_push_host"] = "https://rubygems.org"
18
+
19
+ spec.metadata["homepage_uri"] = spec.homepage
20
+ spec.metadata["source_code_uri"] = spec.homepage
21
+
22
+ # Specify which files should be added to the gem when it is released.
23
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
24
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
25
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
26
+ end
27
+
28
+ if RUBY_ENGINE == 'ruby' && RUBY_VERSION >= '2.7'
29
+ spec.platform = Gem::Platform::RUBY
30
+ spec.extensions = ['ext/dedup/extconf.rb']
31
+ end
32
+
33
+ spec.bindir = "exe"
34
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
35
+ spec.require_paths = ["lib"]
36
+ end
data/dev.yml ADDED
@@ -0,0 +1,20 @@
1
+ name: dedup
2
+
3
+ type: ruby
4
+
5
+ up:
6
+ - ruby: 2.7.1
7
+ - bundler
8
+
9
+ commands:
10
+ console:
11
+ desc: 'start a console'
12
+ run: bin/console
13
+ run:
14
+ desc: 'start the application'
15
+ run: bin/run
16
+ test:
17
+ syntax:
18
+ argument: file
19
+ optional: args...
20
+ run: bin/testunit
@@ -0,0 +1,111 @@
1
+ #include <ruby.h>
2
+ #include <ruby/st.h>
3
+ #include <stdbool.h>
4
+
5
+ #define RHASH_AR_TABLE_MAX_SIZE SIZEOF_VALUE
6
+
7
+ static ID id_uminus;
8
+ static VALUE empty_array, empty_hash;
9
+
10
+ static VALUE deep_intern(VALUE);
11
+
12
+ typedef struct {
13
+ bool changed;
14
+ long index;
15
+ VALUE *pairs;
16
+ } hash_iter_arg;
17
+
18
+ int
19
+ dedup_hash_iter_callback(VALUE key, VALUE value, VALUE arg)
20
+ {
21
+ hash_iter_arg *iter_arg = (hash_iter_arg *)arg;
22
+
23
+ VALUE new_key = deep_intern(key);
24
+ iter_arg->pairs[iter_arg->index] = new_key;
25
+ iter_arg->index++;
26
+ iter_arg->changed |= new_key != key;
27
+
28
+ VALUE new_value = deep_intern(value);
29
+ iter_arg->pairs[iter_arg->index] = new_value;
30
+ iter_arg->index++;
31
+ iter_arg->changed |= new_value != value;
32
+
33
+ return ST_CONTINUE;
34
+ }
35
+
36
+ static VALUE
37
+ deep_intern(VALUE data)
38
+ {
39
+ if (RB_SPECIAL_CONST_P(data)) {
40
+ return data;
41
+ }
42
+
43
+ int type = BUILTIN_TYPE(data);
44
+
45
+ if (RB_OBJ_FROZEN(data) && (type != T_STRING || FL_TEST(data, RSTRING_FSTR))) {
46
+ return data;
47
+ }
48
+
49
+ switch (type) {
50
+ case T_STRING:
51
+ return rb_funcall(rb_str_freeze(data), id_uminus, 0);
52
+ break;
53
+ case T_HASH:
54
+ {
55
+ long size = RHASH_SIZE(data);
56
+ if (size == 0) {
57
+ return empty_hash;
58
+ }
59
+
60
+ hash_iter_arg arg;
61
+ arg.changed = false;
62
+ arg.index = 0;
63
+ arg.pairs = alloca(sizeof(VALUE) * 2 * size);
64
+ rb_hash_foreach(data, dedup_hash_iter_callback, (VALUE)&arg);
65
+ if (arg.changed) {
66
+ rb_hash_clear(data);
67
+ rb_hash_bulk_insert(arg.index, arg.pairs, data);
68
+ }
69
+
70
+ rb_obj_freeze(data);
71
+ }
72
+ break;
73
+ case T_ARRAY:
74
+ {
75
+ long size = RARRAY_LEN(data);
76
+ if (size == 0) {
77
+ return empty_array;
78
+ }
79
+ for (long index = 0; index < size; index++) {
80
+ RARRAY_ASET(data, index, deep_intern(RARRAY_AREF(data, index)));
81
+ }
82
+ rb_obj_freeze(data);
83
+ }
84
+ break;
85
+ default:
86
+ rb_obj_freeze(data);
87
+ }
88
+ return data;
89
+ }
90
+
91
+ static VALUE
92
+ dedup_deep_intern_bang(VALUE self, VALUE data)
93
+ {
94
+
95
+ return deep_intern(data);
96
+ }
97
+
98
+ void
99
+ Init_dedup()
100
+ {
101
+ id_uminus = rb_intern("-@");
102
+
103
+ VALUE rb_mDedup = rb_const_get(rb_cObject, rb_intern("Dedup"));
104
+ empty_hash = rb_const_get(rb_mDedup, rb_intern("EMPTY_HASH"));
105
+ rb_global_variable(&empty_hash);
106
+ empty_array = rb_const_get(rb_mDedup, rb_intern("EMPTY_ARRAY"));
107
+ rb_global_variable(&empty_array);
108
+
109
+ VALUE rb_mNative = rb_define_module_under(rb_mDedup, "Native");
110
+ rb_define_method(rb_mNative, "deep_intern!", dedup_deep_intern_bang, 1);
111
+ }
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mkmf'
4
+
5
+ $CFLAGS = "-O3 -Wall"
6
+
7
+ create_makefile('dedup/dedup')
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "dedup/version"
4
+
5
+ module Dedup
6
+ EMPTY_HASH = {}.freeze
7
+ EMPTY_ARRAY = [].freeze
8
+ class Error < StandardError; end
9
+
10
+ module Ruby
11
+ extend self
12
+
13
+ if DEDUP_FROZEN_STRINGS
14
+ def deep_intern!(data)
15
+ case data
16
+ when Hash
17
+ return EMPTY_HASH if data.empty?
18
+ return data if data.frozen?
19
+
20
+ data.transform_keys! { |k| deep_intern!(k) }
21
+ data.transform_values! { |v| deep_intern!(v) }
22
+ data.freeze
23
+ when Array
24
+ return EMPTY_ARRAY if data.empty?
25
+ return data if data.frozen?
26
+
27
+ data.map! { |d| deep_intern!(d) }.freeze
28
+ when String
29
+ -data.freeze
30
+ else
31
+ data.freeze
32
+ end
33
+ end
34
+ else
35
+ def deep_intern!(data)
36
+ case data
37
+ when Hash
38
+ return data if data.frozen?
39
+
40
+ data.transform_keys! { |k| deep_intern!(k) }
41
+ data.transform_values! { |v| deep_intern!(v) }
42
+ data.freeze
43
+ when Array
44
+ return data if data.frozen?
45
+
46
+ data.map! { |d| deep_intern!(d) }.freeze
47
+ when String
48
+ -(+data)
49
+ else
50
+ data.freeze
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+
57
+ begin
58
+ require "dedup/dedup"
59
+ Dedup::Native.extend(Dedup::Native)
60
+ Dedup.extend(Dedup::Native)
61
+ rescue LoadError
62
+ Dedup.extend(Dedup::Ruby)
63
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Dedup
4
+ VERSION = "0.1.0"
5
+
6
+ DEDUP_FROZEN_STRINGS = begin
7
+ rand_string = rand.to_s
8
+ (-rand_string.freeze).equal?(-(+rand_string))
9
+ end
10
+
11
+ DEDUP_HASH_ASET = begin
12
+ h = {}
13
+ x = {}
14
+ r = rand.to_s
15
+ h[%W(#{r}).join('')] = 1
16
+ x[%W(#{r}).join('')] = 1
17
+ x.keys[0].equal?(h.keys[0])
18
+ end
19
+
20
+ DEDUP_HASH_ASET_FROZEN = begin
21
+ h = {}
22
+ x = {}
23
+ r = rand.to_s
24
+ h[%W(#{r}).join('').freeze] = 1
25
+ x[%W(#{r}).join('').freeze] = 1
26
+ x.keys[0].equal?(h.keys[0])
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dedup
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jean Boussier
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-09-08 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: If your app keeps lots of static data in memory, such as i18n data or
14
+ large configurations, this can reduce memory retention.
15
+ email:
16
+ - jean.boussier@gmail.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - ".github/workflows/ci.yml"
22
+ - ".gitignore"
23
+ - ".travis.yml"
24
+ - Gemfile
25
+ - Gemfile.lock
26
+ - LICENSE.txt
27
+ - README.md
28
+ - Rakefile
29
+ - benchmark/deep_intern.rb
30
+ - bin/console
31
+ - bin/setup
32
+ - dedup.gemspec
33
+ - dev.yml
34
+ - ext/dedup/dedup.c
35
+ - ext/dedup/extconf.rb
36
+ - lib/dedup.rb
37
+ - lib/dedup/version.rb
38
+ homepage: https://github.com/Shopify/dedup
39
+ licenses:
40
+ - MIT
41
+ metadata:
42
+ allowed_push_host: https://rubygems.org
43
+ homepage_uri: https://github.com/Shopify/dedup
44
+ source_code_uri: https://github.com/Shopify/dedup
45
+ post_install_message:
46
+ rdoc_options: []
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 2.5.0
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ requirements:
56
+ - - ">="
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ requirements: []
60
+ rubygems_version: 3.0.2
61
+ signing_key:
62
+ specification_version: 4
63
+ summary: Fast object deduplication
64
+ test_files: []