dbd 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8ceec5332dfdb3174ee303dcd09cf401186990c8
4
- data.tar.gz: a7e61a7b4acc1dcb9a966369ba1c4d8a853b5f9f
3
+ metadata.gz: 0b21e77e8316f18a011e2356b79a28a35e5dde7d
4
+ data.tar.gz: d8a0adeebbc7311a512ef08bfbc63902e16f2159
5
5
  SHA512:
6
- metadata.gz: f1da941433fd3f5dc0f077992bec69d6dc2a3dc761d52549934dd707e713d6317495ae54659e764c21bab079789ed3e74e260ff639bf237715f14222bc267e9b
7
- data.tar.gz: cfdd88e0c3ed9e012f354008ab32685c052fffef7897492d78663d2d243c76abd9ae0a3f3a0c2b8b4f5c54ab8371553a9f129436a36a25ea57e46b86d555d8ef
6
+ metadata.gz: c8e9c42c082ad4bdc8c6af283c9a73fad3c35a75e5ebace47aefbbe127cb0ee001ef066d17ef6e9b253dcd2a6311ccbb520810bdb061ab233c77950be7a99542
7
+ data.tar.gz: a63bb4462ebdc8fa4a6c2e0f3a1513146f7aac1e85a9d748c3126f5352464a613ee9bf956e27e0fad06edc4816e7226e78a37a1c5218196e6ea0f4ccf13db5e5
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ /data/foo.csv
@@ -4,6 +4,8 @@ script: "bundle exec rspec spec"
4
4
  rvm:
5
5
  - 2.0.0
6
6
  - 1.9.3
7
- - 1.9.2
8
7
  - jruby-19mode
9
8
  - jruby-head
9
+ branches:
10
+ only:
11
+ - master
@@ -43,3 +43,10 @@
43
43
 
44
44
  * bin/test_1.rb was used for first successful writing of 10M facts using
45
45
  ruby-1.9.3-p429 (will report issues on ruby-2.0.0-p195 and jruby-1.7.4)
46
+
47
+ 0.0.9 (30 June 2013)
48
+ =====
49
+
50
+ * new function graph#to_CSV_file
51
+ * bin/test_5.rb was used to write 10M facts using ruby-2.0.0, 1.9.3 and jruby-1.7.4
52
+ * jruby is 3 time faster, but 10% more memory comsumption
data/README.md CHANGED
@@ -44,7 +44,7 @@ Open Source [MIT]
44
44
 
45
45
  ## Installation
46
46
 
47
- $ gem install dbd # Ruby 1.9.2, 1.9.3, 2.0.x, jruby (see .travis.yml)
47
+ $ gem install dbd # Ruby 1.9.3, 2.0.x, jruby (see .travis.yml)
48
48
 
49
49
  ## Examples
50
50
 
@@ -118,6 +118,25 @@ puts imported_graph.map(&:short)
118
118
  # 5eb1ea27 : 3767c493 : todo:story : A long period of peace,_ that is a "bliss".
119
119
  ```
120
120
 
121
+ ## Performance tests on 10 M facts
122
+
123
+ In version 0.0.9 a number of test programs where added (e.g. ../bin/test_5.rb)
124
+ that where used to populated in memory and write to disk a data set with 10 M facts.
125
+
126
+ This function was tested on ruby-2.0.0, ruby-1.9.3 and jruby-1.7.4. The facts
127
+ had an approximate size of 250 Bytes each (80 Bytes object).
128
+
129
+ The time needed and memory size (RSS) for populating the in-memory dataset was:
130
+
131
+ 10 M facts (of 250 Bytes; 2.5 GB netto data):
132
+
133
+ | ruby | time | memory (RSS} |
134
+ |------------|-------------| ------------:|
135
+ | ruby-1.9.3 | 863 seconds | 8.1 GB |
136
+ | ruby-2.0.0 | 862 seconds | 9.0 GB |
137
+ |jruby-1.7.4 | 345 seconds | 10.8 GB |
138
+
139
+
121
140
  [RDF]: http://www.w3.org/RDF/
122
141
  [Rationale]: http://github.com/petervandenabeele/dbd/blob/master/docs/rationale.md
123
142
  [MIT]: https://github.com/petervandenabeele/dbd/blob/master/LICENSE.txt
@@ -12,7 +12,7 @@ unless count > 0
12
12
  end
13
13
 
14
14
  filename = ARGV[1]
15
- unless filename.size > 0
15
+ unless filename
16
16
  puts "Give a 'filename' as second argument."
17
17
  exit(1)
18
18
  end
@@ -0,0 +1,11 @@
1
+ # encoding=us-ascii
2
+
3
+ # this is a test program for an exception that is thrown in JRuby
4
+ # see http://markmail.org/message/e2ote7rkwht2quel?q=list:org.codehaus.jruby.user
5
+
6
+ #row = "A" * 300 # does NOT fail with this value of `row`
7
+ row = "A" * 301
8
+ count = 5_000_000
9
+
10
+ csv_string = row * count
11
+ encoded_string = csv_string.encode("utf-8")
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This is a test program for an issue with CSV.generate
4
+ # in ruby-2.0.0 and ruby-head, see http://bugs.ruby-lang.org/issues/8585
5
+
6
+ count = ARGV[0].to_i
7
+ unless count > 0
8
+ puts "Give a 'count' as first argument."
9
+ exit(1)
10
+ end
11
+
12
+ require 'csv'
13
+
14
+ row_data = [
15
+ "59ffbb3b-1e48-4c1f-81d8-d93afc84c966",
16
+ "2013-06-28 19:14:55.975000806 UTC",
17
+ "a11f290e-c441-41bc-8b8c-4e6c27b1b6fc",
18
+ "c73e6241-d46f-4952-8377-c11372346d15",
19
+ "test",
20
+ "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB 0"]
21
+
22
+ puts "starting CSV.open"
23
+
24
+ start_time = Time.now
25
+
26
+ csv_string = CSV.generate(force_quotes: true) do |csv|
27
+ count.times do
28
+ csv << row_data
29
+ end
30
+ end
31
+
32
+ puts "CSV.open took #{Time.now - start_time} seconds"
@@ -0,0 +1,40 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This implementation now streams to disk.
4
+
5
+ FACTS_PER_RESOURCE = 1000
6
+
7
+ count = ARGV[0].to_i
8
+ unless count > 0
9
+ puts "Give a 'count' as first argument."
10
+ exit(1)
11
+ end
12
+
13
+ filename = ARGV[1]
14
+ unless filename
15
+ puts "Give a 'filename' as second argument."
16
+ exit(1)
17
+ end
18
+
19
+ require 'dbd'
20
+
21
+ start = Time.now
22
+
23
+ graph = Dbd::Graph.new
24
+
25
+ (0...count).each do |i|
26
+ provenance_resource = Dbd::ProvenanceResource.new
27
+ provenance_resource << Dbd::ProvenanceFact.new(predicate: "prov:test" , object: "A" * 10)
28
+
29
+ resource = Dbd::Resource.new(provenance_subject: provenance_resource.subject)
30
+ (0...FACTS_PER_RESOURCE).each do |j|
31
+ resource << Dbd::Fact.new(predicate: "test", object: "#{'B' * 80} #{i * FACTS_PER_RESOURCE + j}")
32
+ end
33
+
34
+ graph << provenance_resource << resource
35
+ puts ("added resource #{i} to the graph")
36
+ end
37
+
38
+ puts "Graph is ready (took #{Time.now - start}s), now starting the write to disk"
39
+
40
+ graph.to_CSV_file(filename)
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # This implementation streams from disk
4
+
5
+ filename = ARGV[0]
6
+ unless filename
7
+ puts "Give a 'filename' as argument."
8
+ exit(1)
9
+ end
10
+
11
+ require 'dbd'
12
+
13
+ start = Time.now
14
+
15
+ graph = File.open(filename) do |f|
16
+ Dbd::Graph.from_CSV(f)
17
+ end
18
+
19
+ puts "Graph is ready (took #{Time.now - start}s), now starting the write to disk"
20
+
21
+ puts "graph.size is #{graph.size}"
File without changes
@@ -156,7 +156,7 @@ module Dbd
156
156
  # Constructs a Fact or ProvenanceFact from a string values array
157
157
  # (e.g. pulled from a CSV row).
158
158
  #
159
- # @param [Array] values Required : the array with values, organized as in attributes
159
+ # @param [Array] string_values Required : the array with values, organized as in attributes
160
160
  # @return [Fact, ProvenanceFact] the constructed fact
161
161
  def self.from_string_values(string_values)
162
162
  string_hash = hash_from_values(string_values)
@@ -32,17 +32,25 @@ module Dbd
32
32
  #
33
33
  # @return [String] comma separated string with double quoted cells
34
34
  def to_CSV
35
- CSV.generate(force_quotes: true) do |csv|
36
- @internal_collection.each do |fact|
37
- csv << fact.values
38
- end
39
- end.encode("utf-8")
35
+ CSV.generate(csv_defaults) do |csv|
36
+ push_facts(csv)
37
+ end
38
+ end
39
+
40
+ ##
41
+ # Export the graph to a CSV file
42
+ #
43
+ # @param [String] :filename the filename to stream the CSV to
44
+ def to_CSV_file(filename)
45
+ CSV.open(filename, 'w', csv_defaults) do |csv|
46
+ push_facts(csv)
47
+ end
40
48
  end
41
49
 
42
50
  ##
43
- # Import a graph from a CSV string.
51
+ # Import a graph from a CSV IO stream
44
52
  #
45
- # @param [String] csv a string that contains the CSV serialization
53
+ # @param [IO Stream] csv an IO Stream that contains the CSV serialization
46
54
  # @return [Graph] the imported graph
47
55
  def self.from_CSV(csv)
48
56
  new.tap do |graph|
@@ -63,5 +71,16 @@ module Dbd
63
71
  fact.time_stamp = TimeStamp.new(larger_than: newest_time_stamp) unless fact.time_stamp
64
72
  end
65
73
 
74
+ def csv_defaults
75
+ {force_quotes: true,
76
+ encoding: 'utf-8'}
77
+ end
78
+
79
+ def push_facts(target)
80
+ @internal_collection.each do |fact|
81
+ target << fact.values
82
+ end
83
+ end
84
+
66
85
  end
67
86
  end
@@ -18,14 +18,14 @@ module Dbd
18
18
  # Store a SecureRandom.uuid.
19
19
  # @return [void]
20
20
  def initialize
21
- @uuid = SecureRandom.uuid
21
+ @uuid = SecureRandom.uuid.encode('utf-8')
22
22
  end
23
23
 
24
24
  ##
25
25
  # The to_s of the uuid.
26
26
  # @return [String]
27
27
  def to_s
28
- @uuid.to_s
28
+ @uuid
29
29
  end
30
30
 
31
31
  end
@@ -1,3 +1,3 @@
1
1
  module Dbd
2
- VERSION = "0.0.8"
2
+ VERSION = "0.0.9"
3
3
  end
@@ -1,3 +1,4 @@
1
+ # encoding=utf-8
1
2
  module Factories
2
3
  module ProvenanceFact
3
4
 
@@ -1,3 +1,4 @@
1
+ # encoding=utf-8
1
2
  require 'spec_helper'
2
3
 
3
4
  module Dbd
@@ -10,6 +11,7 @@ module Dbd
10
11
  let(:provenance_facts) { Factories::Fact::Collection.provenance_facts(new_subject) }
11
12
  let(:provenance_fact_1) { provenance_facts.first }
12
13
  let(:fact_2_3) { Factories::Fact::Collection.fact_2_3(provenance_fact_1.subject) }
14
+ let(:fact_special_characters) { Factories::Fact::fact_with_special_chars(provenance_fact_1.subject, new_subject) }
13
15
 
14
16
  let(:subject_regexp) { Fact::Subject.regexp }
15
17
  let(:id_regexp) { Fact::ID.regexp }
@@ -157,5 +159,34 @@ module Dbd
157
159
  subject.to_CSV.lines.count.should == 6
158
160
  end
159
161
  end
162
+
163
+ describe "#to_CSV_file" do
164
+
165
+ before do
166
+ provenance_facts.each do |provenance_fact|
167
+ subject << provenance_fact
168
+ end
169
+ fact_2_3.each do |fact|
170
+ subject << fact
171
+ end
172
+ subject << fact_special_characters
173
+ end
174
+
175
+ it "has eight lines" do
176
+ filename = 'data/foo.csv'
177
+ subject.to_CSV_file(filename)
178
+ File.open(filename) do |f|
179
+ f.readlines.count.should == 8
180
+ end
181
+ end
182
+
183
+ it "reads back UTF-8 characters correctly" do
184
+ filename = 'data/foo.csv'
185
+ subject.to_CSV_file(filename)
186
+ File.open(filename) do |f|
187
+ f.readlines.detect{|l| l.match(%r{really with a comma, a double quote "" and a non-ASCII char éà Über.})}.should_not be_nil
188
+ end
189
+ end
190
+ end
160
191
  end
161
192
  end
@@ -10,6 +10,10 @@ module Dbd
10
10
  it ".new creates a new random UUID" do
11
11
  described_class.new.to_s.should match(UUID.regexp)
12
12
  end
13
+
14
+ it ".new creates a new random UUID with UTF-8 encoding" do
15
+ described_class.new.to_s.encoding.should == Encoding::UTF_8
16
+ end
13
17
  end
14
18
  end
15
19
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dbd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Vandenabeele
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-06-23 00:00:00.000000000 Z
11
+ date: 2013-06-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -141,6 +141,10 @@ email:
141
141
  - peter@vandenabeele.com
142
142
  executables:
143
143
  - test_1.rb
144
+ - test_3.rb
145
+ - test_4.rb
146
+ - test_5.rb
147
+ - test_6.rb
144
148
  extensions: []
145
149
  extra_rdoc_files: []
146
150
  files:
@@ -155,6 +159,11 @@ files:
155
159
  - README.md
156
160
  - Rakefile
157
161
  - bin/test_1.rb
162
+ - bin/test_3.rb
163
+ - bin/test_4.rb
164
+ - bin/test_5.rb
165
+ - bin/test_6.rb
166
+ - data/.gitkeep
158
167
  - dbd.gemspec
159
168
  - docs/rationale.md
160
169
  - docs/stories/001_create_a_fact.txt