dbd 0.0.12 → 0.0.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 49a3db0e5342a8c507f107cad3afd4ae3e21964c
4
- data.tar.gz: 0118bb6bddcdc103577080f5deddc42ff3d2d5d8
3
+ metadata.gz: b783665f6d90bb2b8e08d8f30b16b5ad93839442
4
+ data.tar.gz: 74a917c72399200d7a4c1ac257d2a3490cd7c38d
5
5
  SHA512:
6
- metadata.gz: 04d8a0525f3acf92a9203d63a846c829922f4363a65f7b1cfe900794f2a2ba0725de11a8a6cf330c3b6308a30e031c37d37052180c61adf99c8ab74d7b0a92f2
7
- data.tar.gz: 03087f0151bbbe1d1b8c7f7e6a9dd090122136d597453459a8ee96e082f741c9bf4d86970255bc40f93bdd317ab9a1a113d2b2f940b9358ebec5bff11bffae9b
6
+ metadata.gz: f55e7abd315c57ebd50b4bdba532ef36fdd2b352c29e0d7ae37e1aba68adbff061ca895b491807ec2a8926bf7eb9a6c30761a5ae2270997d66f344c87163ecae
7
+ data.tar.gz: 7229d01d566e2455b76e64b6bc07fbb908212bebf95d9f1ca18ebe94da76a802aceab78c53646ada5004c35937d5580c8312862e1d46af464f0a6e2555100c87
data/.gitignore CHANGED
@@ -16,3 +16,4 @@ test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
18
  /data/foo.csv
19
+ /.idea
@@ -76,3 +76,12 @@
76
76
  * ProvenanceResource => Context
77
77
  * because the context of a fact is much more than the provenance
78
78
  (also visibility, encryption, license, ...)
79
+
80
+ 0.0.13 (7 Aug 2013)
81
+ ======
82
+
83
+ * text inside quotes in the CSV format now has newlines and backslashed escaped
84
+ * newline => '\n'
85
+ * backslash => '\\\\' (double backslash)
86
+ => now 1 fact is 1 "line" in the CSV file
87
+ (allowing file operations that are line oriented)
@@ -1,6 +1,32 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  # This implementation now streams to disk.
4
+ #
5
+ # Some performance (ruby 2.0 on MacBook Pro)
6
+ # /Users/peter_v/dbd/bin $ time ./test_5.rb 100 t_5_100
7
+ # added resource 0 to the graph
8
+ # ...
9
+ # added resource 99 to the graph
10
+ # Graph is ready (took 4.285428s), now starting the write to disk
11
+ #
12
+ # real 0m8.515s
13
+ # user 0m8.331s
14
+ # sys 0m0.181s
15
+ # ...
16
+ # /Users/peter_v/dbd/bin $ time ./test_6.rb t_5_100
17
+ # Graph is ready (took 14.455278s).
18
+ # graph.size is 100100
19
+ #
20
+ # real 0m14.922s
21
+ # user 0m14.728s
22
+ # sys 0m0.189s
23
+ #
24
+ # From version 0.0.13 with newline escaping, the times went up:
25
+ # writing (test_5)
26
+ # real 0m11.656s
27
+ #
28
+ # reading back (test_6)
29
+ # real 0m18.442s
4
30
 
5
31
  FACTS_PER_RESOURCE = 1000
6
32
 
@@ -28,7 +54,7 @@ graph = Dbd::Graph.new
28
54
 
29
55
  resource = Dbd::Resource.new(context_subject: context.subject)
30
56
  (0...FACTS_PER_RESOURCE).each do |j|
31
- resource << Dbd::Fact.new(predicate: "test", object: "#{'B' * 80} #{i * FACTS_PER_RESOURCE + j}")
57
+ resource << Dbd::Fact.new(predicate: "test", object: "#{'B' * 75} #{i * FACTS_PER_RESOURCE + j} \n CD")
32
58
  end
33
59
 
34
60
  graph << context << resource
@@ -1,6 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  # This implementation streams from disk
4
+ #
5
+ # See test_5.rb for usage and basic performance test
4
6
 
5
7
  filename = ARGV[0]
6
8
  unless filename
@@ -0,0 +1,19 @@
1
+ 014_escape_newlines_in_csv
2
+
3
+ As a user of the system
4
+ I can manipulate CSV files that are written by Dbd with basic
5
+ UNIX command line tools (e.g. split by dates, merge, ...)
6
+ And the facts in that format are aligned by a newline
7
+ So, the newlines in the String object need to be escaped
8
+
9
+ * minimal profiling of write + read speed
10
+ * escape on writing, suggest:
11
+ * "\n" => "\\n" (a back slash and a letter n)
12
+ "C:\nuby" => "C:\\nuby"
13
+ * do we need to escape a "real" slash (or slash + n) now ??
14
+
15
+ * escape on reading, suggest:
16
+ * "\\n" => "\n" (a newline)
17
+ * what with a slash + n ?
18
+
19
+ * validate that write + read performance is not degraded too much
@@ -0,0 +1,10 @@
1
+ 015_change_order_of_fields
2
+
3
+ As a user of the system
4
+ When playing with the files
5
+ I want to sort on time_stamp if needed
6
+
7
+ * have time_stamp first
8
+ * then the ID
9
+ * then the context_subject
10
+ * then subject, predicate, object
@@ -140,9 +140,12 @@ module Dbd
140
140
 
141
141
  ##
142
142
  # @return [Array] The 6 values of a Fact converted to a string.
143
- # This is similar to the 6 entries in the to_CSV mapping
143
+ # The individual strings are escaped:
144
+ # * newlines are escaped to '\n'
145
+ # This is used for the 6 entries in the to_CSV mapping.
146
+ #
144
147
  def string_values
145
- values.map(&:to_s)
148
+ values.map{ |value| escaped_string(value.to_s) }
146
149
  end
147
150
 
148
151
  ##
@@ -241,5 +244,11 @@ module Dbd
241
244
  end
242
245
  end
243
246
 
247
+ def escaped_string(string)
248
+ string.
249
+ gsub(%r{\\}, "\\\\\\\\"). # single \ => double \\
250
+ gsub(%r{\n}, '\n') # newline => \n
251
+ end
252
+
244
253
  end
245
254
  end
@@ -48,10 +48,23 @@ module Dbd
48
48
 
49
49
  private
50
50
 
51
+ def unescaped_string_values(string_values)
52
+ string_values.map{ |string_value| unescaped_string(string_value) }
53
+ end
54
+
55
+ def unescaped_string(string)
56
+ r = %r{(\\\\|\\n)}
57
+ repl = {
58
+ "\\\\" => "\\", # double backslash => single backslash
59
+ "\\n" => "\n"} # backslash n => newline
60
+ string.gsub(r, repl)
61
+ end
62
+
51
63
  def string_hash_from_values(string_values)
52
- attributes_strings_array = [top_class.attributes, string_values].transpose
64
+ unescaped_values = unescaped_string_values(string_values)
65
+ attributes_strings_array = [top_class.attributes, unescaped_values].transpose
53
66
  # Remove empty values (e.g. the context_subject for a ContextFact).
54
- attributes_strings_array.delete_if{|a,v| v.nil? || v == ''}
67
+ attributes_strings_array.delete_if{ |a, v| v == '' }
55
68
  Hash[attributes_strings_array]
56
69
  end
57
70
 
@@ -30,6 +30,9 @@ module Dbd
30
30
  ##
31
31
  # Export the graph to a CSV string
32
32
  #
33
+ # Newlines in the fields are escaped to "backslash n".
34
+ # Backslashes in the field are escape to "double backslash".
35
+ #
33
36
  # @return [String] comma separated string with double quoted cells
34
37
  def to_CSV
35
38
  CSV.generate(csv_defaults) do |csv|
@@ -40,6 +43,9 @@ module Dbd
40
43
  ##
41
44
  # Export the graph to a CSV file
42
45
  #
46
+ # Newlines in the fields are escaped to "backslash n".
47
+ # Backslashes in the field are escape to "double backslash".
48
+ #
43
49
  # @param [String] filename the filename to stream the CSV to
44
50
  def to_CSV_file(filename)
45
51
  CSV.open(filename, 'w', csv_defaults) do |csv|
@@ -50,6 +56,9 @@ module Dbd
50
56
  ##
51
57
  # Import a graph from a CSV IO stream
52
58
  #
59
+ # Tokens "backslash n" in the CSV fields will be unescaped to newlines.
60
+ # Tokens "double backslash" in the CSV fields will be unescaped to single backslash
61
+ #
53
62
  # @param [IO Stream] csv an IO Stream that contains the CSV serialization
54
63
  # @return [Graph] the imported graph
55
64
  def from_CSV(csv)
@@ -76,7 +85,7 @@ module Dbd
76
85
 
77
86
  def push_facts(target)
78
87
  @internal_collection.each do |fact|
79
- target << fact.values
88
+ target << fact.string_values
80
89
  end
81
90
  end
82
91
 
@@ -1,3 +1,3 @@
1
1
  module Dbd
2
- VERSION = "0.0.12"
2
+ VERSION = "0.0.13"
3
3
  end
@@ -39,6 +39,16 @@ module Dbd
39
39
  fact.string_values.should == string_values
40
40
  end
41
41
 
42
+ it 'converts a \n (backslash n, no newline) to newline' do
43
+ fact = described_class.from_string_values(string_values)
44
+ fact.object.should match(/\n/) # a newline
45
+ end
46
+
47
+ it 'converts a \\\\ (double backslash) into a single backslash' do
48
+ fact = described_class.from_string_values(string_values)
49
+ fact.object.should match(%r{[^\\]\\n}) # a backslash + newline
50
+ end
51
+
42
52
  it 'calls validate_string_hash if options[:validate]' do
43
53
  described_class.should_receive(:validate_string_hash)
44
54
  described_class.from_string_values(string_values, validate: true)
@@ -61,11 +71,6 @@ module Dbd
61
71
  with_validation(string_values)
62
72
  end
63
73
 
64
- it 'for a nil context_subject (for context_facts)' do
65
- string_values[2] = nil
66
- with_validation(string_values)
67
- end
68
-
69
74
  it 'for an empty context_subject (for context_facts)' do
70
75
  string_values[2] = ''
71
76
  with_validation(string_values)
@@ -137,6 +137,10 @@ module Dbd
137
137
  it 'the second element (time_stamp) is a String' do
138
138
  full_fact.string_values[1].should be_a(String)
139
139
  end
140
+
141
+ it 'escapes a newline into \n and \n into \\\\n (two backslashes and a n)' do
142
+ full_fact.string_values[5].should == "Gandhi\\nKing\\\\n" # backslash newline
143
+ end
140
144
  end
141
145
 
142
146
  describe 'context_fact?' do
@@ -84,7 +84,7 @@ module Dbd
84
84
  resource << special_fact
85
85
  graph = described_class.new << resource
86
86
  csv = graph.to_CSV
87
- csv.should match(%r{A long story with a newline\nreally with a comma, a double quote "" and a non-ASCII char éà Über.})
87
+ csv.should match(%r{A long story with a newline\\nreally with a comma, a double quote "" and a non-ASCII char éà Über.})
88
88
  graph_from_CSV = described_class.new.from_CSV(csv)
89
89
  graph_from_CSV.first.should be_equivalent(graph.first)
90
90
  end
@@ -38,7 +38,7 @@ module Dbd
38
38
 
39
39
  describe 'with a single context_fact collection' do
40
40
  it 'has three logical lines (but one with embedded newline)' do
41
- subject.to_CSV.lines.count.should == 4
41
+ subject.to_CSV.lines.count.should == 3
42
42
  end
43
43
 
44
44
  it 'ends with a newline' do
@@ -79,7 +79,7 @@ module Dbd
79
79
 
80
80
  describe 'handles comma, double quote and newline correctly' do
81
81
  it 'has original_source with special characters and double quote escaped' do
82
- subject.to_CSV.should match(/"this has a comma , a newline \n and a double quote """/)
82
+ subject.to_CSV.should match(/"this has a comma , a newline \\n and a double quote """/)
83
83
  end
84
84
  end
85
85
  end
@@ -156,8 +156,8 @@ module Dbd
156
156
  end
157
157
  end
158
158
 
159
- it 'has six lines' do
160
- subject.to_CSV.lines.count.should == 6
159
+ it 'has 5 lines' do
160
+ subject.to_CSV.lines.count.should == 5
161
161
  end
162
162
  end
163
163
 
@@ -173,11 +173,11 @@ module Dbd
173
173
  subject << fact_special_characters
174
174
  end
175
175
 
176
- it 'has eight lines' do
176
+ it 'has six lines' do
177
177
  filename = 'data/foo.csv'
178
178
  subject.to_CSV_file(filename)
179
179
  File.open(filename) do |f|
180
- f.readlines.count.should == 8
180
+ f.readlines.count.should == 6
181
181
  end
182
182
  end
183
183
 
@@ -33,7 +33,7 @@ module Dbd
33
33
  duration = Time.now - start
34
34
  puts "\nDuration for inserting #{NUMBER_OF_FACTS} facts in the in-memory graph was #{duration*1000_000/NUMBER_OF_FACTS} us PER FACT"
35
35
  graph.size.should == 2 * NUMBER_OF_FACTS + 1
36
- duration.should < 0.000_15 * NUMBER_OF_FACTS
36
+ duration.should < 0.000_20 * NUMBER_OF_FACTS
37
37
  # typ. 37 us on Mac Ruby 2.0.0 (on 2013-05-15 over 15K iterations)
38
38
  # typ. 45 us on Mac Ruby 2.0.0 (on 2013-06-05 over 10K iterations)
39
39
  # typ. 47 us on Mac Ruby 2.0.0 (on 2013-06-21 over 10K iterations)
@@ -9,6 +9,9 @@ RSpec.configure do |config|
9
9
 
10
10
  config.order = 'random'
11
11
 
12
+ config.filter_run :focus => true
13
+ config.run_all_when_everything_filtered = true
14
+
12
15
  # exclude neo4j tests for now (not working on Travis)
13
16
  config.filter_run_excluding :neo4j => true
14
17
  config.filter_run_excluding :neo4j_performance => true
@@ -15,7 +15,7 @@ module TestFactories
15
15
  '40fab407-9b04-4a51-9a52-d978abfcbb1f',
16
16
  '2e9fbc87-2e94-47e9-a8fd-121cc4bc3e8f',
17
17
  'http://example.org/test/name',
18
- 'Gandhi']
18
+ "Gandhi\\nKing\\\\n"]
19
19
  end
20
20
 
21
21
  def self.fact_1(context_subject = nil)
@@ -29,7 +29,7 @@ module TestFactories
29
29
  factory_for.new(
30
30
  id: forced_id,
31
31
  predicate: 'http://example.org/test/name',
32
- object: 'Gandhi')
32
+ object: "Gandhi\nKing\\n") # newline and \n
33
33
  end
34
34
 
35
35
  def self.fact_with_special_chars(context_subject = nil, subject = nil)
@@ -76,7 +76,7 @@ module TestFactories
76
76
  context_subject: context_subject,
77
77
  subject: subject,
78
78
  predicate: 'http://example.org/test/name',
79
- object: 'European Union')
79
+ object: "\\n\n\\n\n\\\n\\\\\n\\\\\\\nEuropean\nUnion\\n")
80
80
  end
81
81
 
82
82
  def self.fact_with_newline(context_subject = nil, subject = nil)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dbd
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.12
4
+ version: 0.0.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Vandenabeele
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-10 00:00:00.000000000 Z
11
+ date: 2013-08-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -179,6 +179,8 @@ files:
179
179
  - docs/stories/011_store_resource_in_graph.txt
180
180
  - docs/stories/012_provenance_fact_properties_from_provenance_ontology.txt
181
181
  - docs/stories/013_read_graph_from_CSV.txt
182
+ - docs/stories/014_escape_newlines_in_CSV.txt
183
+ - docs/stories/015_change_order_of_fields.txt
182
184
  - docs/test.rb
183
185
  - lib/dbd.rb
184
186
  - lib/dbd/context.rb