simple-data 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ac192701653932fa65dfad509a17ff7b1e97ff530cf0d7adc7fced1c8a97474f
4
+ data.tar.gz: 28deb77f46642a96c789ed0b0e83ac9ecc069416bf57c4ecf452ca714ce57940
5
+ SHA512:
6
+ metadata.gz: 62e1ea2a685d816c61be2c4c9d0724a9bf7f9f34aa23341f94009c135704d453af9a7c00f8ff6c4b225f069e47682bc1557290540f622051fc7771bea6a30053
7
+ data.tar.gz: cb3f6b31f8dfeafe924eec3fb89f6672501363accfae6d8150471c111f53654a9c9e97b54f349ed15d43254634a7ce0f933fae9c8aec07434d931e89a183d404
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ This is the ruby implementation of the [simple-data spec][sda]
2
+
3
+ # Example
4
+
5
+ ~~~ruby
6
+ require 'simple-data'
7
+
8
+ SimpleData.generate('spacecraft.sda',
9
+ [ [ :cstr, 'name', 'spacecraft' ],
10
+ [ :cstr, 'origin', 'serie or movie' ],
11
+ [ :f64, 'x' ],
12
+ [ :f64, 'y' ],
13
+ [ :f64, 'z' ] ],
14
+ tags: { author: "Stephane D'Alu",
15
+ url: "http://www.sdalu.com/",
16
+ license: "MIT" }) do |sda|
17
+ sda.put("Enterprise", "Star Trek", 1.0, 2.0, 3.0);
18
+ sda.put("Rocinante", "The Expanse", 1e6, 2e6, 3e6);
19
+ sda.put("Serenity", "Firefly", 0.0, 0.0, 0.0);
20
+ sda.put("Bebop", "Cowboy Bebop", 4.0, 6.8, 8.0);
21
+ end
22
+
23
+ SimpleData.open('spacecraft.sda') do |sda|
24
+ while row = sda.get do
25
+ puts row.inspect
26
+ end
27
+ end
28
+ ~~~
29
+
30
+ [sda]: https://gitlab.inria.fr/dalu/simple-data/
31
+
@@ -0,0 +1,107 @@
1
+
2
+ # Loading supported compressor
3
+ begin
4
+ require 'zstd-ruby'
5
+ require 'stringio'
6
+ rescue LoadError
7
+ return
8
+ end
9
+
10
+
11
+ class SimpleData
12
+ # Magic numbers
13
+ MAGIC = { "(\xB5/\xFD".force_encoding('BINARY') => :zstd
14
+ }
15
+
16
+ # Compressed I/O wrapper
17
+ class IOCompressedWrite
18
+ def initialize(io, write: 16 * 1024)
19
+ @io = io
20
+ @write_size = write
21
+ @written = 0
22
+ @zstd = Zstd::StreamingCompress.new
23
+ end
24
+
25
+ def close
26
+ @io.write(@zstd.finish)
27
+ @io.close
28
+ end
29
+
30
+ def write(data)
31
+ @written += data.size
32
+ if (@written.size > @write_size)
33
+ @io.write(@zstd.flush)
34
+ else
35
+ @zstd << data
36
+ end
37
+ end
38
+ end
39
+
40
+ class IOCompressedRead
41
+ def initialize(io, read: 16 * 1024)
42
+ @io = io
43
+ @sio = StringIO.new
44
+ @read_size = read
45
+
46
+ # Peek at data for magic number
47
+ pos = io.tell
48
+ max = MAGIC.keys.map(&:size).max
49
+ str = io.read(max)
50
+ io.seek(pos)
51
+
52
+ # Magic number lookup
53
+ type = MAGIC.find {|k, v| str.start_with?(k) }&.last
54
+
55
+ # Sanity check
56
+ rasie "data is not Zstd compressed" if type != :zstd
57
+
58
+ # Decoder
59
+ zstd = Zstd::StreamingDecompress.new
60
+ @decoder = ->(str) { zstd.decompress(str) }
61
+ end
62
+
63
+ def eof?
64
+ @io.eof? && @sio.eof?
65
+ end
66
+
67
+ def close
68
+ @io.close
69
+ end
70
+
71
+ def read(size)
72
+ data = @sio.read(size)
73
+
74
+ # End of buffer
75
+ if data.nil?
76
+ if cstr = @io.read(@read_size)
77
+ # Refill buffer
78
+ @sio.string = @decoder.call(cstr)
79
+ read(size)
80
+ else
81
+ # End of stream
82
+ nil
83
+ end
84
+
85
+ # Partial buffer
86
+ elsif data.size < size
87
+ # Force a new read (will trigger a refill)
88
+ odata = read(size - data.size)
89
+ odata.nil? ? data : (data + odata)
90
+
91
+ # Full data
92
+ else
93
+ data
94
+ end
95
+ end
96
+
97
+ def each_byte(&block)
98
+ return to_enum(:each_byte) if block.nil?
99
+ loop do
100
+ @sio.each_byte(&block)
101
+ break unless cstr = @io.read(@read_size)
102
+ @sio.string = @decoder.call(cstr)
103
+ end
104
+ end
105
+ end
106
+
107
+ end
@@ -0,0 +1,4 @@
1
+ class SimpleData
2
+ VERSION = '0.1.0'
3
+ end
4
+
@@ -0,0 +1,308 @@
1
+ class SimpleData
2
+ # Current file version
3
+ FILE_VERSION = 1
4
+
5
+ # Various regex
6
+ REGEX_MAGIC = /\A# simple-data:(?<version>\d+)\s*\z/
7
+ REGEX_SECTION = /\A# --<(?<section>[^>:]+)(?::(?<extra>[^>]+))?>--+\s*\z/
8
+ REGEX_FIELD = /\A\#\s* (?<type>\w+ ) \s*:\s*
9
+ (?<name>[\w\-.]+)
10
+ \s* (?:\((?<desc>.* )\))? \s*\z
11
+ /ix
12
+ REGEX_TAG = /\A@(?<tag>\w+)\s+(?<value>.*?)\s*\z/
13
+ REGEX_EMPTY = /\A#\s*\z/
14
+
15
+ # Supported tags / sections / types
16
+ TAGS = %i(title summary author license url doi keywords)
17
+ SECTIONS = %i(spec description data)
18
+ TYPES = %i(i8 i16 i32 i64 u8 u16 u32 u64 f32 f64
19
+ cstr blob char bool)
20
+
21
+ # Error classes
22
+ class Error < StandardError
23
+ end
24
+ class ParserError < Error
25
+ end
26
+
27
+ # Attributes
28
+ attr_reader :version
29
+ attr_reader :fields
30
+ attr_reader :tags
31
+ attr_reader :sections
32
+
33
+ def initialize(io, fields, mode, version: FILE_VERSION,
34
+ tags: {}, sections: {})
35
+ @io = io
36
+ @mode = mode
37
+ @fields = fields
38
+ @fields_key = fields.map {|(_, name)| name }
39
+ @tags = tags
40
+ @sections = sections
41
+ @version = version
42
+
43
+ @read_ok, @write_ok =
44
+ case mode
45
+ when :create, :append then [ false, true ]
46
+ when :read then [ true, false ]
47
+ else raise Error,
48
+ 'mode must be one of (:create, :append, or :read)'
49
+ end
50
+ end
51
+
52
+
53
+ def put(*data)
54
+ # Checking mode
55
+ raise Error, "write is not allowed in #{@mode} mode" unless @write_ok
56
+
57
+
58
+ if data.one? && (data.first.is_a?(Array) || data.first.is_a?(Hash))
59
+ data = data.first
60
+ end
61
+
62
+ if data.size != @fields.size
63
+ raise Error, 'dataset size doesn\'t match definition'
64
+ end
65
+
66
+ if data.is_a?(Hash)
67
+ if ! (data.keys - @fields_key).empty?
68
+ raise Error, 'dataset key mismatch'
69
+ end
70
+
71
+ data = @fields.map {|k| data[k] }
72
+ end
73
+
74
+ s = @fields.each.with_index.map {|(type,name), i|
75
+ d = data.fetch(i) { raise "missing data (#{name})" }
76
+ case type
77
+ when :i8 then [ d ].pack('c' )
78
+ when :i16 then [ d ].pack('s<')
79
+ when :i32 then [ d ].pack('l<')
80
+ when :i64 then [ d ].pack('q<')
81
+ when :u8 then [ d ].pack('C' )
82
+ when :u16 then [ d ].pack('S<')
83
+ when :u32 then [ d ].pack('L<')
84
+ when :u64 then [ d ].pack('q<')
85
+ when :f32 then [ d ].pack('e' )
86
+ when :f64 then [ d ].pack('E' )
87
+ when :cstr then [ d ].pack('Z*')
88
+ when :blob then raise ParserError, 'not implemented'
89
+ when :char then [ d ].pack('c' )
90
+ when :bool then [ d ? 'T' : 'F' ].pack('c')
91
+ end
92
+ }.join
93
+
94
+ @io.write(s)
95
+ end
96
+
97
+ def get
98
+ # Checking mode
99
+ raise Error, "read is not allowed in #{@mode} mode" unless @read_ok
100
+
101
+ # No-op if end of file
102
+ return if @io.eof?
103
+
104
+ # Retrieve data
105
+ @fields.map {|(type)|
106
+ case type
107
+ when :i8 then @io.read(1).unpack1('c' )
108
+ when :i16 then @io.read(2).unpack1('s<')
109
+ when :i32 then @io.read(4).unpack1('l<')
110
+ when :i64 then @io.read(8).unpack1('q<')
111
+ when :u8 then @io.read(1).unpack1('C' )
112
+ when :u16 then @io.read(2).unpack1('S<')
113
+ when :u32 then @io.read(4).unpack1('L<')
114
+ when :u64 then @io.read(8).unpack1('q<')
115
+ when :f32 then @io.read(4).unpack1('e' )
116
+ when :f64 then @io.read(8).unpack1('E' )
117
+ when :cstr then @io.each_byte.lazy.take_while {|b| !b.zero? }
118
+ .map {|b| b.chr }.to_a.join
119
+ when :blob then raise ParserError, 'not implemented'
120
+ when :char then @io.read(1)
121
+ when :bool then @io.read(1) == 'T'
122
+ end
123
+ }
124
+ end
125
+
126
+ def close
127
+ @io.close
128
+ end
129
+
130
+ # Generating file
131
+ def self.generate(file, fields, compress = false,
132
+ tags: nil, sections: nil, &block)
133
+ # Sanity check
134
+ if compress && !const_defined?(:IOCompressedWrite)
135
+ raise Error, 'compression not supported (add zstd-ruby gem)'
136
+ end
137
+
138
+ # Open file
139
+ io = File.open(file, 'w')
140
+
141
+ # Magic string
142
+ io.puts "# simple-data:1"
143
+
144
+ # Tags
145
+ if tags && !tags.empty?
146
+ io.puts "#"
147
+ tags.each do |name, value|
148
+ io.puts "# @%-8s %s" % [ name, value ]
149
+ end
150
+ end
151
+
152
+ # Spec
153
+ io.puts "#"
154
+ io.puts "# --<spec>--"
155
+ maxlen = fields.map {|(_, name)| name.size }.max
156
+ fields.each do |(type, name, desc)|
157
+ if desc
158
+ io.puts "# %-4s : %-*s (%s)" % [ type, maxlen, name, desc ]
159
+ else
160
+ io.puts "# %-4s : %s" % [ type, name ]
161
+ end
162
+ end
163
+
164
+ # Custom sections
165
+ if sections && !sections.empty?
166
+ io.puts "#"
167
+ sections.each do |name, value|
168
+ io.puts "# --<#{name}>--"
169
+ value.split(/\r?\n/).each do |line|
170
+ io.puts "# #{line}"
171
+ end
172
+ end
173
+ end
174
+
175
+ # Data
176
+ io.puts "#"
177
+ io.puts "# --<%s>--" % [ compress ? 'data:compressed' : 'data' ]
178
+
179
+ # Deal with compression
180
+ io = IOCompressedWrite.new(io) if compress
181
+
182
+ # Instantiate SimpleData
183
+ sda = self.new(io, fields, :create, tags: tags, sections: sections)
184
+ block ? block.call(sda) : sda
185
+ ensure
186
+ sda.close if sda && block
187
+ end
188
+
189
+ # Open file for reading
190
+ def self.open(file, mode = :read, &block)
191
+ # Open file
192
+ io = case mode
193
+ when :read
194
+ File.open(file, 'r:BINARY')
195
+ when :append
196
+ File.open(file, 'r+:BINARY').tap {|io|
197
+ io.seek(0, :END)
198
+ }
199
+ else raise ArgumentError,
200
+ "mode must be one of :read, :append"
201
+ end
202
+
203
+ # Read textual information
204
+ version = self.get_magic(io)
205
+ fields, tags, sections, dataopt = self.get_metadata(io)
206
+
207
+ # Deal with compression
208
+ if dataopt.include?(:compressed)
209
+ unless const_defined?(:IOCompressedRead)
210
+ raise Error, 'compression not supported (add zstd-ruby gem)'
211
+ end
212
+ io = IOCompressedRead.new(io)
213
+ end
214
+
215
+ # Instantiate SimpleData
216
+ sda = self.new(io, fields, mode, version: version,
217
+ tags: tags, sections: sections)
218
+ block ? block.call(sda) : sda
219
+ ensure
220
+ sda.close if sda && block
221
+ end
222
+
223
+ private
224
+
225
+ def self.get_magic(io)
226
+ unless m = REGEX_MAGIC.match(io.readline.chomp)
227
+ raise ParserError, 'not a simple-data file'
228
+ end
229
+ m[:version]
230
+ end
231
+
232
+ def self.get_metadata(io)
233
+ tags = []
234
+ fields = []
235
+ sections = {}
236
+ dataopts = nil
237
+
238
+ # Retrieve meta data
239
+ meta = io.each_line.lazy.map {|l| l.chomp }.take_while {|l|
240
+ ! ((m = REGEX_SECTION.match(l)) &&
241
+ (m[:section] == 'data' )).tap {|is_data|
242
+ dataopts = m[:extra]&.split(',')&.map(&:to_sym) if is_data
243
+ }
244
+ }.drop_while {|l| l =~ REGEX_EMPTY }
245
+
246
+ # Parse
247
+ meta.slice_before {|l| REGEX_SECTION =~ l }.each do |grp|
248
+ if m = REGEX_SECTION.match(grp.first)
249
+ grp.shift
250
+ case s = m[:section].to_sym
251
+ # Extract spec
252
+ when :spec
253
+ fields = grp.reject {|l| l =~ REGEX_EMPTY }.map {|l|
254
+ field = REGEX_FIELD.match(l)&.captures
255
+ raise ParserError, "wrong spec" if field.nil?
256
+
257
+ # Normalize
258
+ t, n, d = field
259
+ t = t.downcase.to_sym
260
+ n .force_encoding('UTF-8')
261
+ d&.force_encoding('UTF-8')
262
+
263
+ # Sanity check
264
+ if !TYPES.include?(t)
265
+ raise ParserError, "unknown type (#{t})"
266
+ end
267
+
268
+ # Cleaned-up field description
269
+ [ t, n, d ].compact
270
+ }
271
+ # Extract description
272
+ else
273
+ sections[s] = grp.join("\n")
274
+ end
275
+ else
276
+ # Extract tags
277
+ tags = grp.map {|l| l.sub(/\A\#\s*/, '') }
278
+ .slice_before {|l| l =~ /\A@\w+/ }
279
+ .map {|g| g.join(' ') }
280
+ .map {|tagline|
281
+ REGEX_TAG.match(tagline).captures
282
+ }.reduce({}) { |obj, (t,v)|
283
+ obj.merge(t.to_sym => v) {|k, o, n| Array(o) + [ n ] }
284
+ }
285
+
286
+ # Normalize tags
287
+ tags[:keywords] = Array(tags[:keywords]).flat_map {|e|
288
+ e.split(/\s*,\s*|\s+/).map(&:strip).uniq
289
+ }
290
+
291
+ # Tags sanityzing
292
+ tags.each do |k, v|
293
+ if !TAGS.include?(k)
294
+ raise ParserError, "unknown tag (#{k})"
295
+ end
296
+ end
297
+ tags.reject! {|k, v| v.nil? || v.empty? }
298
+
299
+ end
300
+ end
301
+
302
+ [ fields, tags, sections, dataopts ]
303
+ end
304
+
305
+ end
306
+
307
+ require_relative 'simple-data/version'
308
+ require_relative 'simple-data/compression'
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require_relative 'lib/simple-data/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'simple-data'
7
+ s.version = SimpleData::VERSION
8
+ s.summary = "Simple Data (CSV alternative)"
9
+ s.description = <<~EOF
10
+
11
+ An alternative to CSV format for storing data.
12
+ Provides metadata and field description support, and has a
13
+ reasonable hunger for disk space.
14
+
15
+ EOF
16
+
17
+ s.homepage = 'https://gitlab.inria.fr/dalu/simple-data'
18
+ s.license = 'MIT'
19
+
20
+ s.authors = [ "Stéphane D'Alu" ]
21
+ s.email = [ 'stephane.dalu@insa-lyon.fr' ]
22
+
23
+ s.files = %w[ README.md simple-data.gemspec ] +
24
+ Dir['lib/**/*.rb']
25
+
26
+ s.add_dependency 'zstd-ruby'
27
+ s.add_development_dependency 'yard', '~>0'
28
+ s.add_development_dependency 'rake', '~>13'
29
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simple-data
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Stéphane D'Alu
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-10-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: zstd-ruby
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: yard
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '13'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '13'
55
+ description: |2+
56
+
57
+ An alternative to CSV format for storing data.
58
+ Provides metadata and field description support, and has a
59
+ reasonable hunger for disk space.
60
+
61
+ email:
62
+ - stephane.dalu@insa-lyon.fr
63
+ executables: []
64
+ extensions: []
65
+ extra_rdoc_files: []
66
+ files:
67
+ - README.md
68
+ - lib/simple-data.rb
69
+ - lib/simple-data/compression.rb
70
+ - lib/simple-data/version.rb
71
+ - simple-data.gemspec
72
+ homepage: https://gitlab.inria.fr/dalu/simple-data
73
+ licenses:
74
+ - MIT
75
+ metadata: {}
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubygems_version: 3.3.21
92
+ signing_key:
93
+ specification_version: 4
94
+ summary: Simple Data (CSV alternative)
95
+ test_files: []
96
+ ...