dbd 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8ceec5332dfdb3174ee303dcd09cf401186990c8
4
- data.tar.gz: a7e61a7b4acc1dcb9a966369ba1c4d8a853b5f9f
3
+ metadata.gz: 0b21e77e8316f18a011e2356b79a28a35e5dde7d
4
+ data.tar.gz: d8a0adeebbc7311a512ef08bfbc63902e16f2159
5
5
  SHA512:
6
- metadata.gz: f1da941433fd3f5dc0f077992bec69d6dc2a3dc761d52549934dd707e713d6317495ae54659e764c21bab079789ed3e74e260ff639bf237715f14222bc267e9b
7
- data.tar.gz: cfdd88e0c3ed9e012f354008ab32685c052fffef7897492d78663d2d243c76abd9ae0a3f3a0c2b8b4f5c54ab8371553a9f129436a36a25ea57e46b86d555d8ef
6
+ metadata.gz: c8e9c42c082ad4bdc8c6af283c9a73fad3c35a75e5ebace47aefbbe127cb0ee001ef066d17ef6e9b253dcd2a6311ccbb520810bdb061ab233c77950be7a99542
7
+ data.tar.gz: a63bb4462ebdc8fa4a6c2e0f3a1513146f7aac1e85a9d748c3126f5352464a613ee9bf956e27e0fad06edc4816e7226e78a37a1c5218196e6ea0f4ccf13db5e5
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ /data/foo.csv
@@ -4,6 +4,8 @@ script: "bundle exec rspec spec"
4
4
  rvm:
5
5
  - 2.0.0
6
6
  - 1.9.3
7
- - 1.9.2
8
7
  - jruby-19mode
9
8
  - jruby-head
9
+ branches:
10
+ only:
11
+ - master
@@ -43,3 +43,10 @@
43
43
 
44
44
  * bin/test_1.rb was used for first successful writing of 10M facts using
45
45
  ruby-1.9.3-p429 (will report issues on ruby-2.0.0-p195 and jruby-1.7.4)
46
+
47
+ 0.0.9 (30 June 2013)
48
+ =====
49
+
50
+ * new function graph#to_CSV_file
51
+ * bin/test_5.rb was used to write 10M facts using ruby-2.0.0, 1.9.3 and jruby-1.7.4
52
+ * jruby is 3 time faster, but 10% more memory comsumption
data/README.md CHANGED
@@ -44,7 +44,7 @@ Open Source [MIT]
44
44
 
45
45
  ## Installation
46
46
 
47
- $ gem install dbd # Ruby 1.9.2, 1.9.3, 2.0.x, jruby (see .travis.yml)
47
+ $ gem install dbd # Ruby 1.9.3, 2.0.x, jruby (see .travis.yml)
48
48
 
49
49
  ## Examples
50
50
 
@@ -118,6 +118,25 @@ puts imported_graph.map(&:short)
118
118
  # 5eb1ea27 : 3767c493 : todo:story : A long period of peace,_ that is a "bliss".
119
119
  ```
120
120
 
121
+ ## Performance tests on 10 M facts
122
+
123
+ In version 0.0.9 a number of test programs where added (e.g. ../bin/test_5.rb)
124
+ that where used to populated in memory and write to disk a data set with 10 M facts.
125
+
126
+ This function was tested on ruby-2.0.0, ruby-1.9.3 and jruby-1.7.4. The facts
127
+ had an approximate size of 250 Bytes each (80 Bytes object).
128
+
129
+ The time needed and memory size (RSS) for populating the in-memory dataset was:
130
+
131
+ 10 M facts (of 250 Bytes; 2.5 GB netto data):
132
+
133
+ | ruby | time | memory (RSS} |
134
+ |------------|-------------| ------------:|
135
+ | ruby-1.9.3 | 863 seconds | 8.1 GB |
136
+ | ruby-2.0.0 | 862 seconds | 9.0 GB |
137
+ |jruby-1.7.4 | 345 seconds | 10.8 GB |
138
+
139
+
121
140
  [RDF]: http://www.w3.org/RDF/
122
141
  [Rationale]: http://github.com/petervandenabeele/dbd/blob/master/docs/rationale.md
123
142
  [MIT]: https://github.com/petervandenabeele/dbd/blob/master/LICENSE.txt
@@ -12,7 +12,7 @@ unless count > 0
12
12
  end
13
13
 
14
14
  filename = ARGV[1]
15
- unless filename.size > 0
15
+ unless filename
16
16
  puts "Give a 'filename' as second argument."
17
17
  exit(1)
18
18
  end
@@ -0,0 +1,11 @@
1
+ # encoding=us-ascii
2
+
3
+ # this is a test program for an exception that is thrown in JRuby
4
+ # see http://markmail.org/message/e2ote7rkwht2quel?q=list:org.codehaus.jruby.user
5
+
6
+ #row = "A" * 300 # does NOT fail with this value of `row`
7
+ row = "A" * 301
8
+ count = 5_000_000
9
+
10
+ csv_string = row * count
11
+ encoded_string = csv_string.encode("utf-8")
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This is a test program for an issue with CSV.generate
4
+ # in ruby-2.0.0 and ruby-head, see http://bugs.ruby-lang.org/issues/8585
5
+
6
+ count = ARGV[0].to_i
7
+ unless count > 0
8
+ puts "Give a 'count' as first argument."
9
+ exit(1)
10
+ end
11
+
12
+ require 'csv'
13
+
14
+ row_data = [
15
+ "59ffbb3b-1e48-4c1f-81d8-d93afc84c966",
16
+ "2013-06-28 19:14:55.975000806 UTC",
17
+ "a11f290e-c441-41bc-8b8c-4e6c27b1b6fc",
18
+ "c73e6241-d46f-4952-8377-c11372346d15",
19
+ "test",
20
+ "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0"]
21
+
22
+ puts "starting CSV.open"
23
+
24
+ start_time = Time.now
25
+
26
+ csv_string = CSV.generate(force_quotes: true) do |csv|
27
+ count.times do
28
+ csv << row_data
29
+ end
30
+ end
31
+
32
+ puts "CSV.open took #{Time.now - start_time} seconds"
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This implementation now streams to disk.
4
+
5
+ FACTS_PER_RESOURCE = 1000
6
+
7
+ count = ARGV[0].to_i
8
+ unless count > 0
9
+ puts "Give a 'count' as first argument."
10
+ exit(1)
11
+ end
12
+
13
+ filename = ARGV[1]
14
+ unless filename
15
+ puts "Give a 'filename' as second argument."
16
+ exit(1)
17
+ end
18
+
19
+ require 'dbd'
20
+
21
+ start = Time.now
22
+
23
+ graph = Dbd::Graph.new
24
+
25
+ (0...count).each do |i|
26
+ provenance_resource = Dbd::ProvenanceResource.new
27
+ provenance_resource << Dbd::ProvenanceFact.new(predicate: "prov:test" , object: "A" * 10)
28
+
29
+ resource = Dbd::Resource.new(provenance_subject: provenance_resource.subject)
30
+ (0...FACTS_PER_RESOURCE).each do |j|
31
+ resource << Dbd::Fact.new(predicate: "test", object: "#{'B' * 80} #{i * FACTS_PER_RESOURCE + j}")
32
+ end
33
+
34
+ graph << provenance_resource << resource
35
+ puts ("added resource #{i} to the graph")
36
+ end
37
+
38
+ puts "Graph is ready (took #{Time.now - start}s), now starting the write to disk"
39
+
40
+ graph.to_CSV_file(filename)
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This implementation streams from disk
4
+
5
+ filename = ARGV[0]
6
+ unless filename
7
+ puts "Give a 'filename' as argument."
8
+ exit(1)
9
+ end
10
+
11
+ require 'dbd'
12
+
13
+ start = Time.now
14
+
15
+ graph = File.open(filename) do |f|
16
+ Dbd::Graph.from_CSV(f)
17
+ end
18
+
19
+ puts "Graph is ready (took #{Time.now - start}s), now starting the write to disk"
20
+
21
+ puts "graph.size is #{graph.size}"
File without changes
@@ -156,7 +156,7 @@ module Dbd
156
156
  # Constructs a Fact or ProvenanceFact from a string values array
157
157
  # (e.g. pulled from a CSV row).
158
158
  #
159
- # @param [Array] values Required : the array with values, organized as in attributes
159
+ # @param [Array] string_values Required : the array with values, organized as in attributes
160
160
  # @return [Fact, ProvenanceFact] the constructed fact
161
161
  def self.from_string_values(string_values)
162
162
  string_hash = hash_from_values(string_values)
@@ -32,17 +32,25 @@ module Dbd
32
32
  #
33
33
  # @return [String] comma separated string with double quoted cells
34
34
  def to_CSV
35
- CSV.generate(force_quotes: true) do |csv|
36
- @internal_collection.each do |fact|
37
- csv << fact.values
38
- end
39
- end.encode("utf-8")
35
+ CSV.generate(csv_defaults) do |csv|
36
+ push_facts(csv)
37
+ end
38
+ end
39
+
40
+ ##
41
+ # Export the graph to a CSV file
42
+ #
43
+ # @param [String] :filename the filename to stream the CSV to
44
+ def to_CSV_file(filename)
45
+ CSV.open(filename, 'w', csv_defaults) do |csv|
46
+ push_facts(csv)
47
+ end
40
48
  end
41
49
 
42
50
  ##
43
- # Import a graph from a CSV string.
51
+ # Import a graph from a CSV IO stream
44
52
  #
45
- # @param [String] csv a string that contains the CSV serialization
53
+ # @param [IO Stream] csv an IO Stream that contains the CSV serialization
46
54
  # @return [Graph] the imported graph
47
55
  def self.from_CSV(csv)
48
56
  new.tap do |graph|
@@ -63,5 +71,16 @@ module Dbd
63
71
  fact.time_stamp = TimeStamp.new(larger_than: newest_time_stamp) unless fact.time_stamp
64
72
  end
65
73
 
74
+ def csv_defaults
75
+ {force_quotes: true,
76
+ encoding: 'utf-8'}
77
+ end
78
+
79
+ def push_facts(target)
80
+ @internal_collection.each do |fact|
81
+ target << fact.values
82
+ end
83
+ end
84
+
66
85
  end
67
86
  end
@@ -18,14 +18,14 @@ module Dbd
18
18
  # Store a SecureRandom.uuid.
19
19
  # @return [void]
20
20
  def initialize
21
- @uuid = SecureRandom.uuid
21
+ @uuid = SecureRandom.uuid.encode('utf-8')
22
22
  end
23
23
 
24
24
  ##
25
25
  # The to_s of the uuid.
26
26
  # @return [String]
27
27
  def to_s
28
- @uuid.to_s
28
+ @uuid
29
29
  end
30
30
 
31
31
  end
@@ -1,3 +1,3 @@
1
1
  module Dbd
2
- VERSION = "0.0.8"
2
+ VERSION = "0.0.9"
3
3
  end
@@ -1,3 +1,4 @@
1
+ # encoding=utf-8
1
2
  module Factories
2
3
  module ProvenanceFact
3
4
 
@@ -1,3 +1,4 @@
1
+ # encoding=utf-8
1
2
  require 'spec_helper'
2
3
 
3
4
  module Dbd
@@ -10,6 +11,7 @@ module Dbd
10
11
  let(:provenance_facts) { Factories::Fact::Collection.provenance_facts(new_subject) }
11
12
  let(:provenance_fact_1) { provenance_facts.first }
12
13
  let(:fact_2_3) { Factories::Fact::Collection.fact_2_3(provenance_fact_1.subject) }
14
+ let(:fact_special_characters) { Factories::Fact::fact_with_special_chars(provenance_fact_1.subject, new_subject) }
13
15
 
14
16
  let(:subject_regexp) { Fact::Subject.regexp }
15
17
  let(:id_regexp) { Fact::ID.regexp }
@@ -157,5 +159,34 @@ module Dbd
157
159
  subject.to_CSV.lines.count.should == 6
158
160
  end
159
161
  end
162
+
163
+ describe "#to_CSV_file" do
164
+
165
+ before do
166
+ provenance_facts.each do |provenance_fact|
167
+ subject << provenance_fact
168
+ end
169
+ fact_2_3.each do |fact|
170
+ subject << fact
171
+ end
172
+ subject << fact_special_characters
173
+ end
174
+
175
+ it "has eight lines" do
176
+ filename = 'data/foo.csv'
177
+ subject.to_CSV_file(filename)
178
+ File.open(filename) do |f|
179
+ f.readlines.count.should == 8
180
+ end
181
+ end
182
+
183
+ it "reads back UTF-8 characters correctly" do
184
+ filename = 'data/foo.csv'
185
+ subject.to_CSV_file(filename)
186
+ File.open(filename) do |f|
187
+ f.readlines.detect{|l| l.match(%r{really with a comma, a double quote "" and a non-ASCII char éà Über.})}.should_not be_nil
188
+ end
189
+ end
190
+ end
160
191
  end
161
192
  end
@@ -10,6 +10,10 @@ module Dbd
10
10
  it ".new creates a new random UUID" do
11
11
  described_class.new.to_s.should match(UUID.regexp)
12
12
  end
13
+
14
+ it ".new creates a new random UUID with UTF-8 encoding" do
15
+ described_class.new.to_s.encoding.should == Encoding::UTF_8
16
+ end
13
17
  end
14
18
  end
15
19
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dbd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Vandenabeele
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-06-23 00:00:00.000000000 Z
11
+ date: 2013-06-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -141,6 +141,10 @@ email:
141
141
  - peter@vandenabeele.com
142
142
  executables:
143
143
  - test_1.rb
144
+ - test_3.rb
145
+ - test_4.rb
146
+ - test_5.rb
147
+ - test_6.rb
144
148
  extensions: []
145
149
  extra_rdoc_files: []
146
150
  files:
@@ -155,6 +159,11 @@ files:
155
159
  - README.md
156
160
  - Rakefile
157
161
  - bin/test_1.rb
162
+ - bin/test_3.rb
163
+ - bin/test_4.rb
164
+ - bin/test_5.rb
165
+ - bin/test_6.rb
166
+ - data/.gitkeep
158
167
  - dbd.gemspec
159
168
  - docs/rationale.md
160
169
  - docs/stories/001_create_a_fact.txt