cord 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,29 @@
1
+ Simple Concat Tree / Rope / Cord implementation for Ruby MRI
2
+ (c) 2010 Lourens Naudé (methodmissing)
3
+
4
+ http://github.com/methodmissing/cord
5
+
6
+ See http://www.cs.ubc.ca/local/reading/proceedings/spe91-95/spe/vol25/issue12/spe986.pdf and http://en.wikipedia.org/wiki/Rope_(computer_science) for context.
7
+
8
+ This library works with Ruby 1.8 and 1.9 and exposes the following API :
9
+
10
+ c = Cord.new('test')
11
+ c.depth #=> 1
12
+ c << 'cord'
13
+ c.depth #=> 2
14
+
15
+ c = Cord.new('aaaa')
16
+ c << 'bbbb'
17
+ c.to_s #=> 'aaaabbbb'
18
+
19
+ To run the test suite:
20
+
21
+ rake
22
+
23
+ Todo:
24
+
25
+ Tree rebalance / normalization
26
+ Exploit shared string / COW semantics further
27
+ Cord#each
28
+
29
+ Work in progress, thanks for watching!
@@ -0,0 +1,78 @@
1
+ #!/usr/bin/env rake
2
+ require 'rake/testtask'
3
+ require 'rake/clean'
4
+ $:.unshift(File.expand_path('lib'))
5
+ CORD_ROOT = 'ext/cord'
6
+
7
+ desc 'Default: test'
8
+ task :default => :test
9
+
10
+ desc 'Run cord tests.'
11
+ Rake::TestTask.new(:test) do |t|
12
+ t.libs = [CORD_ROOT]
13
+ t.pattern = 'test/test_*.rb'
14
+ t.ruby_opts << '-rtest'
15
+ t.libs << 'test'
16
+ t.warning = true
17
+ t.verbose = true
18
+ end
19
+ task :test => :build
20
+
21
+ namespace :build do
22
+ file "#{CORD_ROOT}/cord.c"
23
+ file "#{CORD_ROOT}/extconf.rb"
24
+ file "#{CORD_ROOT}/Makefile" => %W(#{CORD_ROOT}/cord.c #{CORD_ROOT}/extconf.rb) do
25
+ Dir.chdir(CORD_ROOT) do
26
+ ruby 'extconf.rb'
27
+ end
28
+ end
29
+
30
+ desc "generate makefile"
31
+ task :makefile => %W(#{CORD_ROOT}/Makefile #{CORD_ROOT}/cord.c)
32
+
33
+ dlext = Config::CONFIG['DLEXT']
34
+ file "#{CORD_ROOT}/cord.#{dlext}" => %W(#{CORD_ROOT}/Makefile #{CORD_ROOT}/cord.c) do
35
+ Dir.chdir(CORD_ROOT) do
36
+ sh 'make' # TODO - is there a config for which make somewhere?
37
+ end
38
+ end
39
+
40
+ desc "compile cord extension"
41
+ task :compile => "#{CORD_ROOT}/cord.#{dlext}"
42
+
43
+ task :clean do
44
+ Dir.chdir(CORD_ROOT) do
45
+ sh 'make clean'
46
+ end if File.exists?("#{CORD_ROOT}/Makefile")
47
+ end
48
+
49
+ CLEAN.include("#{CORD_ROOT}/Makefile")
50
+ CLEAN.include("#{CORD_ROOT}/cord.#{dlext}")
51
+ end
52
+
53
+ task :clean => %w(build:clean)
54
+
55
+ desc "compile"
56
+ task :build => %w(build:compile)
57
+
58
+ task :install do |t|
59
+ Dir.chdir(CORD_ROOT) do
60
+ sh 'sudo make install'
61
+ end
62
+ end
63
+
64
+ desc "clean build install"
65
+ task :setup => %w(clean build install)
66
+
67
+ desc 'Run benchmarks'
68
+ task :bench do
69
+ ruby "bench/cord.rb"
70
+ end
71
+ task :bench => :build
72
+
73
+ desc 'Run qsort benchmarks from RubyQuiz'
74
+ task :bench_qsort do
75
+ ruby "bench/quiz.rb String"
76
+ ruby "bench/quiz.rb Cord"
77
+ end
78
+ task :bench_qsort => :build
@@ -0,0 +1,25 @@
1
+ $:.unshift "."
2
+ require 'benchmark'
3
+ require File.dirname(__FILE__) + '/../ext/cord/cord'
4
+
5
+ TESTS = 100_000
6
+ PAYLOADS = [
7
+ ('a' * 5),
8
+ ('b' * 10),
9
+ ('c' * 20),
10
+ ('d' * 40),
11
+ ('e' * 80),
12
+ ('f' * 120),
13
+ ('g' * 200),
14
+ ('h' * 300)
15
+ ]
16
+
17
+ string = ''
18
+ cord = Cord.new
19
+ ary = []
20
+
21
+ Benchmark.bmbm do |results|
22
+ results.report("[cord] obj << a") { TESTS.times{ cord << PAYLOADS[rand(6)] } }
23
+ results.report("[str] obj << a") { TESTS.times{ string << PAYLOADS[rand(6)] } }
24
+ results.report("[ary] obj << a") { TESTS.times{ ary << PAYLOADS[rand(6)] } }
25
+ end
@@ -0,0 +1,52 @@
1
+ # from http://www.rubyquiz.com/quiz137.html
2
+ $:.unshift "."
3
+ require 'benchmark'
4
+ require File.dirname(__FILE__) + '/../ext/cord/cord'
5
+
6
+ #This code make a String/Rope of CHUNCKS chunks of text
7
+ #each chunck is SIZE bytes long. Each chunck starts with
8
+ #an 8 byte number. Initially the chuncks are shuffled the
9
+ #qsort method sorts them into ascending order.
10
+ #
11
+ #pass the name of the class to use as a parameter
12
+ #ruby -r rope.rb this_file Rope
13
+
14
+ puts 'preparing data...'
15
+ TextClass = Object.const_get(ARGV.shift || :String)
16
+
17
+ def qsort(text)
18
+ return TextClass.new if text.length == 0
19
+ pivot = text.slice(0,8).to_s.to_i
20
+ less = TextClass.new
21
+ more = TextClass.new
22
+ offset = 8+SIZE
23
+ while (offset < text.length)
24
+ i = text.slice(offset,8).to_s.to_i
25
+ (i < pivot ? less : more) << text.slice(offset,8+SIZE)
26
+ offset = offset + 8+SIZE
27
+ end
28
+ print "*"
29
+ return qsort(less) << text.slice(0,8+SIZE) << qsort(more)
30
+ end
31
+
32
+ SIZE = 512 * 1024
33
+ CHUNCKS = 128
34
+ CHARS = %w[R O P E]
35
+ data = TextClass.new
36
+ bulk_string =
37
+ TextClass.new(Array.new(SIZE) { CHARS[rand(4)] }.join)
38
+ puts 'Building Text...'
39
+ build = Benchmark.measure do
40
+ (0..CHUNCKS).sort_by { rand }.each do |n|
41
+ data<< sprintf("%08i",n) << bulk_string
42
+ end
43
+ data.normalize if data.respond_to? :normalize
44
+ end
45
+ GC.start
46
+ sort = Benchmark.measure do
47
+ puts "Sorting Text..."
48
+ qsort(data)
49
+ puts"\nEND"
50
+ end
51
+
52
+ puts "Build: #{build}Sort: #{sort}"
@@ -0,0 +1,14 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'cord'
3
+ s.version = '0.1'
4
+ s.date = '2010-10-23'
5
+ s.authors = ['Lourens Naudé']
6
+ s.email = ['lourens@methodmissing.com']
7
+ s.description = 'WIP implementation of a Concat Tree / Rope / Cord for Ruby MRI.'
8
+ s.homepage = 'http://github.com/methodmissing/rehash'
9
+ s.summary = 'WIP implementation of a Concat Tree / Rope / Cord for Ruby MRI.'
10
+ s.extensions = 'ext/cord/extconf.rb'
11
+ s.files = Dir.glob("{ext,test,bench}/**/*") + %w[README Rakefile cord.gemspec]
12
+ s.has_rdoc = true
13
+ s.extra_rdoc_files = Dir['ext/cord/*.c']
14
+ end
@@ -0,0 +1,180 @@
1
+ #include <ruby.h>
2
+ #include "cord.h"
3
+
4
+ static void rb_mark_cord_i(void *c)
5
+ {
6
+ CORD_WALK_GC(STRING_CORD_P(c), rb_mark_cord_i, rb_gc_mark_maybe, VALUE);
7
+ }
8
+
9
+ static void rb_mark_cord(RCord *c)
10
+ {
11
+ if (c) rb_mark_cord_i(c->cord);
12
+ }
13
+
14
+ static void rb_free_cord_i(void *c)
15
+ {
16
+ CORD_WALK_GC((!STRING_CORD_P(c)), rb_free_cord_i, xfree, void*);
17
+ }
18
+
19
+ static void rb_free_cord(RCord *c)
20
+ {
21
+ if (c){
22
+ rb_free_cord_i(c->cord);
23
+ xfree(c);
24
+ }
25
+ }
26
+
27
+ static VALUE rb_cord_alloc(VALUE klass)
28
+ {
29
+ RCord *cs = NULL;
30
+ VALUE c;
31
+ c = Data_Make_Struct(klass, RCord, rb_mark_cord, rb_free_cord, cs);
32
+ cs->len = 0;
33
+ cs->depth = 0;
34
+ cs->cord = EMPTY_CORD;
35
+ return c;
36
+ }
37
+
38
+ static void *rb_cord_balance(void *cord)
39
+ {
40
+ return cord;
41
+ }
42
+
43
+ static char *rb_merge_cords(void *left, void *right)
44
+ {
45
+ char *cord = NULL;
46
+ long left_len, right_len, len;
47
+ left_len = CORD_LENGTH(left);
48
+ right_len = CORD_LENGTH(right);
49
+ len = left_len + right_len;
50
+ cord = ALLOC_N(char, len + 1);
51
+ memcpy(cord, RSTRING_PTR(left), left_len);
52
+ memcpy(cord + left_len, RSTRING_PTR(right), right_len);
53
+ cord[len] = '\0';
54
+ return cord;
55
+ }
56
+
57
+ static VALUE rb_new_flat_cord(void *left, void *right)
58
+ {
59
+ char *cord = NULL;
60
+ cord = rb_merge_cords(left, right);
61
+ return rb_str_new2(cord);
62
+ }
63
+
64
+ static void *rb_new_concat_cord(void * left, void * right){
65
+ RConcatCord *cord = NULL;
66
+ short int depth;
67
+ cord = ALLOC(RConcatCord);
68
+ cord->left = (void*)left;
69
+ cord->right = (void*)right;
70
+ cord->len = CORD_LENGTH(left) + RSTRING_LEN((VALUE)right);
71
+ depth = (CORD_DEPTH(right) > CORD_DEPTH(left)) ? CORD_DEPTH(right) : CORD_DEPTH(left);
72
+ cord->depth = depth + 1;
73
+ return cord;
74
+ }
75
+
76
+ static void *rb_append_cords(void * left, void *right)
77
+ {
78
+ RConcatCord *new_left = NULL;
79
+ RConcatCord *new_right = NULL;
80
+ if (CORD_LENGTH(left) + CORD_LENGTH(right) < STRING_CORD_THRESHOLD){
81
+ return (void *)rb_new_flat_cord(left, right);
82
+ }
83
+ if (!CONCAT_CORD_P(left) && CONCAT_CORD_P(right)){
84
+ new_right = CONCAT_CORD(right);
85
+ if (CORD_LENGTH(left) + CORD_LENGTH(new_right->left) < STRING_CORD_THRESHOLD){
86
+ return rb_new_concat_cord((void *)rb_new_flat_cord(left, new_right->left), new_right->right);
87
+ }
88
+ }
89
+ if (!CONCAT_CORD_P(right) && CONCAT_CORD_P(left)){
90
+ new_left = CONCAT_CORD(left);
91
+ if (CORD_LENGTH(right) + CORD_LENGTH(new_left->right) < STRING_CORD_THRESHOLD){
92
+ return rb_new_concat_cord(new_left->left, (void *)rb_new_flat_cord(new_left->right, right));
93
+ }
94
+ }
95
+ return rb_new_concat_cord(left, right);
96
+ }
97
+
98
+ static void concat_cord_to_s(VALUE buffer, void * cord)
99
+ {
100
+ if (EMPTY_CORD_P(cord) || STRING_CORD_P(cord)) return;
101
+ concat_cord_to_s(buffer, CONCAT_CORD(cord)->left);
102
+ concat_cord_to_s(buffer, CONCAT_CORD(cord)->right);
103
+ if (STRING_CORD_P(CONCAT_CORD(cord)->left)) rb_str_cat2(buffer, RSTRING_PTR(CONCAT_CORD(cord)->left));
104
+ rb_str_cat2(buffer, RSTRING_PTR(CONCAT_CORD(cord)->right));
105
+ }
106
+
107
+ static VALUE rb_cord_to_s(VALUE obj)
108
+ {
109
+ VALUE buffer;
110
+ RCord *c = GetCord(obj);
111
+ if (EMPTY_CORD_P(c->cord)) return rb_str_new2("");
112
+ if (STRING_CORD_P(c->cord)) return (VALUE)c->cord;
113
+ if (CONCAT_CORD_P(c->cord)){
114
+ buffer = rb_str_buf_new((long)CORD_LENGTH(c->cord));
115
+ concat_cord_to_s(buffer, c->cord);
116
+ return buffer;
117
+ }
118
+ return Qnil;
119
+ }
120
+
121
+ static VALUE rb_cord_append(VALUE obj, VALUE str)
122
+ {
123
+ RCord *c = GetCord(obj);
124
+ if (rb_obj_is_kind_of(str, rb_cCord)) str = rb_cord_to_s(str);
125
+ Check_Type(str, T_STRING);
126
+ str = rb_str_new4(str);
127
+ if (RSTRING_LEN(str) == 0) return Qnil;
128
+ if (EMPTY_CORD_P(c->cord)){
129
+ c->cord = (void *)str;
130
+ }else{
131
+ c->cord = rb_cord_balance(rb_append_cords((void *)c->cord, (void *)str));
132
+ }
133
+ return obj;
134
+ }
135
+
136
+ static VALUE rb_cord_length(VALUE obj)
137
+ {
138
+ RCord *c = GetCord(obj);
139
+ if (EMPTY_CORD_P(c->cord)) return INT2FIX(0);
140
+ return INT2FIX(CORD_LENGTH(c->cord));
141
+ }
142
+
143
+ static VALUE rb_cord_depth(VALUE obj)
144
+ {
145
+ RCord *c = GetCord(obj);
146
+ if (EMPTY_CORD_P(c->cord)) return INT2FIX(0);
147
+ return INT2FIX(CORD_DEPTH(c->cord));
148
+ }
149
+
150
+ static VALUE rb_cord_initialize(int argc, VALUE *argv, VALUE obj)
151
+ {
152
+ VALUE val;
153
+ RCord *c = GetCord(obj);
154
+ rb_scan_args(argc, argv, "01", &val);
155
+ if (!NIL_P(val)) rb_cord_append(obj, val);
156
+ return obj;
157
+ }
158
+
159
+ static VALUE
160
+ rb_cord_slice(int argc, VALUE *argv, VALUE obj)
161
+ {
162
+ VALUE str;
163
+ str = rb_cord_to_s(obj);
164
+ return rb_funcall2(str, rb_intern("slice"), argc, argv);
165
+ }
166
+
167
+ void
168
+ Init_cord()
169
+ {
170
+ rb_cCord = rb_define_class("Cord", rb_cObject);
171
+ rb_define_alloc_func(rb_cCord, rb_cord_alloc);
172
+
173
+ rb_define_method(rb_cCord, "initialize", rb_cord_initialize, -1);
174
+ rb_define_method(rb_cCord, "<<", rb_cord_append, 1);
175
+ rb_define_method(rb_cCord, "to_s", rb_cord_to_s, 0);
176
+ rb_define_method(rb_cCord, "to_str", rb_cord_to_s, 0);
177
+ rb_define_method(rb_cCord, "length", rb_cord_length, 0);
178
+ rb_define_method(rb_cCord, "depth", rb_cord_depth, 0);
179
+ rb_define_method(rb_cCord, "slice", rb_cord_slice, -1);
180
+ }
@@ -0,0 +1,35 @@
1
+ typedef struct {
2
+ long len;
3
+ short int depth;
4
+ void *left, *right;
5
+ } RConcatCord;
6
+
7
+ typedef struct {
8
+ long len;
9
+ long depth;
10
+ void *cord;
11
+ } RCord;
12
+
13
+ VALUE rb_cCord;
14
+
15
+ #define STRING_CORD_THRESHOLD 17
16
+ #define MAX_CORD_DEPTH 96
17
+
18
+ #define GetCord(obj) (Check_Type(obj, T_DATA), (RCord*)DATA_PTR(obj))
19
+
20
+ #define EMPTY_CORD NULL
21
+ #define STRING_CORD(c) RSTRING(c)
22
+ #define CONCAT_CORD(c) ((RConcatCord *)(c))
23
+ #define EMPTY_CORD_P(c) (!(c))
24
+ #define STRING_CORD_P(c) (BUILTIN_TYPE(c) == T_STRING)
25
+ #define CONCAT_CORD_P(c) (!EMPTY_CORD_P(c) && !(STRING_CORD_P(c)))
26
+ #define CORD_LENGTH(c) (STRING_CORD_P(c) ? RSTRING_LEN(c) : CONCAT_CORD(c)->len)
27
+ #define CORD_DEPTH(c) (STRING_CORD_P(c) ? 1 : CONCAT_CORD(c)->depth)
28
+ #define CORD_WALK_GC(cond, iter, func, cast) \
29
+ if (c == NULL) return; \
30
+ if (CONCAT_CORD_P(c)){ \
31
+ (iter)(CONCAT_CORD(c)->left); \
32
+ (iter)(CONCAT_CORD(c)->right); \
33
+ }else{ \
34
+ if((cond)) (func)((cast)c); \
35
+ }
@@ -0,0 +1,6 @@
1
+ require 'mkmf'
2
+
3
+ dir_config('cord')
4
+ have_func('rb_thread_blocking_region')
5
+ $defs.push("-pedantic")
6
+ create_makefile('cord')
@@ -0,0 +1,3 @@
1
+ $:.unshift "."
2
+ require 'ext/cord/cord'
3
+ require 'test/unit'
@@ -0,0 +1,71 @@
1
+ class TestCord < Test::Unit::TestCase
2
+ def test_append
3
+ c = Cord.new
4
+ assert_equal 'string', (c << 'string').to_s
5
+ oc = Cord.new('test')
6
+ assert_equal 'stringtest', (c << oc).to_s
7
+ end
8
+
9
+ def test_to_s_empty_cord
10
+ c = Cord.new
11
+ assert_equal '', c.to_s
12
+ end
13
+
14
+ def test_append_non_string
15
+ c = Cord.new
16
+ assert_raises(TypeError){ c << :symbol }
17
+ end
18
+
19
+ def test_slice
20
+ c = Cord.new('test')
21
+ assert_equal 'te', c.slice(0..1)
22
+ assert_equal 'es', c.slice(1,2)
23
+ end
24
+
25
+ def test_length
26
+ c = Cord.new
27
+ assert_equal 0, c.length
28
+ c << 'cord'
29
+ assert_equal 4, c.length
30
+ c << 'test'
31
+ assert_equal 8, c.length
32
+ end
33
+
34
+ def test_depth
35
+ c = Cord.new
36
+ assert_equal 0, c.depth
37
+ c << 'aaaa'
38
+ assert_equal 1, c.depth
39
+ c << 'bbbbbbbbbbbbbbbbbbbbbbbbbbb'
40
+ assert_equal 2, c.depth
41
+ c << 'ccccccccccccccccccccccccccc'
42
+ assert_equal 3, c.depth
43
+ end
44
+
45
+ def test_initialize
46
+ c = Cord.new
47
+ assert_instance_of Cord, c
48
+ assert_equal '', c.to_s
49
+ c = Cord.new('test')
50
+ assert_equal 'test', c.to_s
51
+ end
52
+
53
+ def test_to_s
54
+ c = Cord.new
55
+ s = 'aaaa'
56
+ c << s
57
+ assert_equal 'aaaa', c.to_s
58
+ c << 'bbbb'
59
+ assert_equal 'aaaabbbb', c.to_s
60
+ c << ''
61
+ assert_equal 'aaaabbbb', c.to_s
62
+ c << 'cccccccccccccccccccccccccccccccccccc'
63
+ assert_equal "aaaabbbbcccccccccccccccccccccccccccccccccccc", c.to_s
64
+ c << 'ddddddddddddddddddddddddddddddddddddddd'
65
+ assert_equal 'aaaabbbbccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddd', c.to_s
66
+ c << 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee'
67
+ assert_equal 'aaaabbbbccccccccccccccccccccccccccccccccccccdddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee', c.to_s
68
+ c << 'ffff'
69
+ assert_equal 'aaaabbbbccccccccccccccccccccccccccccccccccccdddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffff', c.to_s
70
+ end
71
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cord
3
+ version: !ruby/object:Gem::Version
4
+ hash: 9
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 1
9
+ version: "0.1"
10
+ platform: ruby
11
+ authors:
12
+ - "Lourens Naud\xC3\xA9"
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-10-23 00:00:00 +01:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: WIP implementation of a Concat Tree / Rope / Cord for Ruby MRI.
22
+ email:
23
+ - lourens@methodmissing.com
24
+ executables: []
25
+
26
+ extensions:
27
+ - ext/cord/extconf.rb
28
+ extra_rdoc_files:
29
+ - ext/cord/cord.c
30
+ files:
31
+ - ext/cord/cord.c
32
+ - ext/cord/cord.h
33
+ - ext/cord/extconf.rb
34
+ - test/test.rb
35
+ - test/test_cord.rb
36
+ - bench/cord.rb
37
+ - bench/quiz.rb
38
+ - README
39
+ - Rakefile
40
+ - cord.gemspec
41
+ has_rdoc: true
42
+ homepage: http://github.com/methodmissing/rehash
43
+ licenses: []
44
+
45
+ post_install_message:
46
+ rdoc_options: []
47
+
48
+ require_paths:
49
+ - lib
50
+ required_ruby_version: !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ hash: 3
56
+ segments:
57
+ - 0
58
+ version: "0"
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ hash: 3
65
+ segments:
66
+ - 0
67
+ version: "0"
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.3.7
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: WIP implementation of a Concat Tree / Rope / Cord for Ruby MRI.
75
+ test_files: []
76
+