simple-data 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: ac192701653932fa65dfad509a17ff7b1e97ff530cf0d7adc7fced1c8a97474f
4
+ data.tar.gz: 28deb77f46642a96c789ed0b0e83ac9ecc069416bf57c4ecf452ca714ce57940
5
+ SHA512:
6
+ metadata.gz: 62e1ea2a685d816c61be2c4c9d0724a9bf7f9f34aa23341f94009c135704d453af9a7c00f8ff6c4b225f069e47682bc1557290540f622051fc7771bea6a30053
7
+ data.tar.gz: cb3f6b31f8dfeafe924eec3fb89f6672501363accfae6d8150471c111f53654a9c9e97b54f349ed15d43254634a7ce0f933fae9c8aec07434d931e89a183d404
data/README.md ADDED
@@ -0,0 +1,31 @@
1
+ This is the ruby implementation of the [simple-data spec][sda]
2
+
3
+ # Example
4
+
5
+ ~~~ruby
6
+ require 'simple-data'
7
+
8
+ SimpleData.generate('spacecraft.sda',
9
+ [ [ :cstr, 'name', 'spacecraft' ],
10
+ [ :cstr, 'origin', 'serie or movie' ],
11
+ [ :f64, 'x' ],
12
+ [ :f64, 'y' ],
13
+ [ :f64, 'z' ] ],
14
+ tags: { author: "Stephane D'Alu",
15
+ url: "http://www.sdalu.com/",
16
+ license: "MIT" }) do |sda|
17
+ sda.put("Enterprise", "Star Trek", 1.0, 2.0, 3.0);
18
+ sda.put("Rocinante", "The Expanse", 1e6, 2e6, 3e6);
19
+ sda.put("Serenity", "Firefly", 0.0, 0.0, 0.0);
20
+ sda.put("Bebop", "Cowboy Bebop", 4.0, 6.8, 8.0);
21
+ end
22
+
23
+ SimpleData.open('spacecraft.sda') do |sda|
24
+ while row = sda.get do
25
+ puts row.inspect
26
+ end
27
+ end
28
+ ~~~
29
+
30
+ [sda]: https://gitlab.inria.fr/dalu/simple-data/
31
+
@@ -0,0 +1,107 @@
1
+
2
+ # Loading supported compressor
3
+ begin
4
+ require 'zstd-ruby'
5
+ require 'stringio'
6
+ rescue LoadError
7
+ return
8
+ end
9
+
10
+
11
+ class SimpleData
12
+ # Magic numbers
13
+ MAGIC = { "(\xB5/\xFD".force_encoding('BINARY') => :zstd
14
+ }
15
+
16
+ # Compressed I/O wrapper
17
+ class IOCompressedWrite
18
+ def initialize(io, write: 16 * 1024)
19
+ @io = io
20
+ @write_size = write
21
+ @written = 0
22
+ @zstd = Zstd::StreamingCompress.new
23
+ end
24
+
25
+ def close
26
+ @io.write(@zstd.finish)
27
+ @io.close
28
+ end
29
+
30
+ def write(data)
31
+ @written += data.size
32
+ if (@written.size > @write_size)
33
+ @io.write(@zstd.flush)
34
+ else
35
+ @zstd << data
36
+ end
37
+ end
38
+ end
39
+
40
+ class IOCompressedRead
41
+ def initialize(io, read: 16 * 1024)
42
+ @io = io
43
+ @sio = StringIO.new
44
+ @read_size = read
45
+
46
+ # Peek at data for magic number
47
+ pos = io.tell
48
+ max = MAGIC.keys.map(&:size).max
49
+ str = io.read(max)
50
+ io.seek(pos)
51
+
52
+ # Magic number lookup
53
+ type = MAGIC.find {|k, v| str.start_with?(k) }&.last
54
+
55
+ # Sanity check
56
+ rasie "data is not Zstd compressed" if type != :zstd
57
+
58
+ # Decoder
59
+ zstd = Zstd::StreamingDecompress.new
60
+ @decoder = ->(str) { zstd.decompress(str) }
61
+ end
62
+
63
+ def eof?
64
+ @io.eof? && @sio.eof?
65
+ end
66
+
67
+ def close
68
+ @io.close
69
+ end
70
+
71
+ def read(size)
72
+ data = @sio.read(size)
73
+
74
+ # End of buffer
75
+ if data.nil?
76
+ if cstr = @io.read(@read_size)
77
+ # Refill buffer
78
+ @sio.string = @decoder.call(cstr)
79
+ read(size)
80
+ else
81
+ # End of stream
82
+ nil
83
+ end
84
+
85
+ # Partial buffer
86
+ elsif data.size < size
87
+ # Force a new read (will trigger a refill)
88
+ odata = read(size - data.size)
89
+ odata.nil? ? data : (data + odata)
90
+
91
+ # Full data
92
+ else
93
+ data
94
+ end
95
+ end
96
+
97
+ def each_byte(&block)
98
+ return to_enum(:each_byte) if block.nil?
99
+ loop do
100
+ @sio.each_byte(&block)
101
+ break unless cstr = @io.read(@read_size)
102
+ @sio.string = @decoder.call(cstr)
103
+ end
104
+ end
105
+ end
106
+
107
+ end
@@ -0,0 +1,4 @@
1
+ class SimpleData
2
+ VERSION = '0.1.0'
3
+ end
4
+
@@ -0,0 +1,308 @@
1
+ class SimpleData
2
+ # Current file version
3
+ FILE_VERSION = 1
4
+
5
+ # Various regex
6
+ REGEX_MAGIC = /\A# simple-data:(?<version>\d+)\s*\z/
7
+ REGEX_SECTION = /\A# --<(?<section>[^>:]+)(?::(?<extra>[^>]+))?>--+\s*\z/
8
+ REGEX_FIELD = /\A\#\s* (?<type>\w+ ) \s*:\s*
9
+ (?<name>[\w\-.]+)
10
+ \s* (?:\((?<desc>.* )\))? \s*\z
11
+ /ix
12
+ REGEX_TAG = /\A@(?<tag>\w+)\s+(?<value>.*?)\s*\z/
13
+ REGEX_EMPTY = /\A#\s*\z/
14
+
15
+ # Supported tags / sections / types
16
+ TAGS = %i(title summary author license url doi keywords)
17
+ SECTIONS = %i(spec description data)
18
+ TYPES = %i(i8 i16 i32 i64 u8 u16 u32 u64 f32 f64
19
+ cstr blob char bool)
20
+
21
+ # Error classes
22
+ class Error < StandardError
23
+ end
24
+ class ParserError < Error
25
+ end
26
+
27
+ # Attributes
28
+ attr_reader :version
29
+ attr_reader :fields
30
+ attr_reader :tags
31
+ attr_reader :sections
32
+
33
+ def initialize(io, fields, mode, version: FILE_VERSION,
34
+ tags: {}, sections: {})
35
+ @io = io
36
+ @mode = mode
37
+ @fields = fields
38
+ @fields_key = fields.map {|(_, name)| name }
39
+ @tags = tags
40
+ @sections = sections
41
+ @version = version
42
+
43
+ @read_ok, @write_ok =
44
+ case mode
45
+ when :create, :append then [ false, true ]
46
+ when :read then [ true, false ]
47
+ else raise Error,
48
+ 'mode must be one of (:create, :append, or :read)'
49
+ end
50
+ end
51
+
52
+
53
+ def put(*data)
54
+ # Checking mode
55
+ raise Error, "write is not allowed in #{@mode} mode" unless @write_ok
56
+
57
+
58
+ if data.one? && (data.first.is_a?(Array) || data.first.is_a?(Hash))
59
+ data = data.first
60
+ end
61
+
62
+ if data.size != @fields.size
63
+ raise Error, 'dataset size doesn\'t match definition'
64
+ end
65
+
66
+ if data.is_a?(Hash)
67
+ if ! (data.keys - @fields_key).empty?
68
+ raise Error, 'dataset key mismatch'
69
+ end
70
+
71
+ data = @fields.map {|k| data[k] }
72
+ end
73
+
74
+ s = @fields.each.with_index.map {|(type,name), i|
75
+ d = data.fetch(i) { raise "missing data (#{name})" }
76
+ case type
77
+ when :i8 then [ d ].pack('c' )
78
+ when :i16 then [ d ].pack('s<')
79
+ when :i32 then [ d ].pack('l<')
80
+ when :i64 then [ d ].pack('q<')
81
+ when :u8 then [ d ].pack('C' )
82
+ when :u16 then [ d ].pack('S<')
83
+ when :u32 then [ d ].pack('L<')
84
+ when :u64 then [ d ].pack('q<')
85
+ when :f32 then [ d ].pack('e' )
86
+ when :f64 then [ d ].pack('E' )
87
+ when :cstr then [ d ].pack('Z*')
88
+ when :blob then raise ParserError, 'not implemented'
89
+ when :char then [ d ].pack('c' )
90
+ when :bool then [ d ? 'T' : 'F' ].pack('c')
91
+ end
92
+ }.join
93
+
94
+ @io.write(s)
95
+ end
96
+
97
+ def get
98
+ # Checking mode
99
+ raise Error, "read is not allowed in #{@mode} mode" unless @read_ok
100
+
101
+ # No-op if end of file
102
+ return if @io.eof?
103
+
104
+ # Retrieve data
105
+ @fields.map {|(type)|
106
+ case type
107
+ when :i8 then @io.read(1).unpack1('c' )
108
+ when :i16 then @io.read(2).unpack1('s<')
109
+ when :i32 then @io.read(4).unpack1('l<')
110
+ when :i64 then @io.read(8).unpack1('q<')
111
+ when :u8 then @io.read(1).unpack1('C' )
112
+ when :u16 then @io.read(2).unpack1('S<')
113
+ when :u32 then @io.read(4).unpack1('L<')
114
+ when :u64 then @io.read(8).unpack1('q<')
115
+ when :f32 then @io.read(4).unpack1('e' )
116
+ when :f64 then @io.read(8).unpack1('E' )
117
+ when :cstr then @io.each_byte.lazy.take_while {|b| !b.zero? }
118
+ .map {|b| b.chr }.to_a.join
119
+ when :blob then raise ParserError, 'not implemented'
120
+ when :char then @io.read(1)
121
+ when :bool then @io.read(1) == 'T'
122
+ end
123
+ }
124
+ end
125
+
126
+ def close
127
+ @io.close
128
+ end
129
+
130
+ # Generating file
131
+ def self.generate(file, fields, compress = false,
132
+ tags: nil, sections: nil, &block)
133
+ # Sanity check
134
+ if compress && !const_defined?(:IOCompressedWrite)
135
+ raise Error, 'compression not supported (add zstd-ruby gem)'
136
+ end
137
+
138
+ # Open file
139
+ io = File.open(file, 'w')
140
+
141
+ # Magic string
142
+ io.puts "# simple-data:1"
143
+
144
+ # Tags
145
+ if tags && !tags.empty?
146
+ io.puts "#"
147
+ tags.each do |name, value|
148
+ io.puts "# @%-8s %s" % [ name, value ]
149
+ end
150
+ end
151
+
152
+ # Spec
153
+ io.puts "#"
154
+ io.puts "# --<spec>--"
155
+ maxlen = fields.map {|(_, name)| name.size }.max
156
+ fields.each do |(type, name, desc)|
157
+ if desc
158
+ io.puts "# %-4s : %-*s (%s)" % [ type, maxlen, name, desc ]
159
+ else
160
+ io.puts "# %-4s : %s" % [ type, name ]
161
+ end
162
+ end
163
+
164
+ # Custom sections
165
+ if sections && !sections.empty?
166
+ io.puts "#"
167
+ sections.each do |name, value|
168
+ io.puts "# --<#{name}>--"
169
+ value.split(/\r?\n/).each do |line|
170
+ io.puts "# #{line}"
171
+ end
172
+ end
173
+ end
174
+
175
+ # Data
176
+ io.puts "#"
177
+ io.puts "# --<%s>--" % [ compress ? 'data:compressed' : 'data' ]
178
+
179
+ # Deal with compression
180
+ io = IOCompressedWrite.new(io) if compress
181
+
182
+ # Instantiate SimpleData
183
+ sda = self.new(io, fields, :create, tags: tags, sections: sections)
184
+ block ? block.call(sda) : sda
185
+ ensure
186
+ sda.close if sda && block
187
+ end
188
+
189
+ # Open file for reading
190
+ def self.open(file, mode = :read, &block)
191
+ # Open file
192
+ io = case mode
193
+ when :read
194
+ File.open(file, 'r:BINARY')
195
+ when :append
196
+ File.open(file, 'r+:BINARY').tap {|io|
197
+ io.seek(0, :END)
198
+ }
199
+ else raise ArgumentError,
200
+ "mode must be one of :read, :append"
201
+ end
202
+
203
+ # Read textual information
204
+ version = self.get_magic(io)
205
+ fields, tags, sections, dataopt = self.get_metadata(io)
206
+
207
+ # Deal with compression
208
+ if dataopt.include?(:compressed)
209
+ unless const_defined?(:IOCompressedRead)
210
+ raise Error, 'compression not supported (add zstd-ruby gem)'
211
+ end
212
+ io = IOCompressedRead.new(io)
213
+ end
214
+
215
+ # Instantiate SimpleData
216
+ sda = self.new(io, fields, mode, version: version,
217
+ tags: tags, sections: sections)
218
+ block ? block.call(sda) : sda
219
+ ensure
220
+ sda.close if sda && block
221
+ end
222
+
223
+ private
224
+
225
+ def self.get_magic(io)
226
+ unless m = REGEX_MAGIC.match(io.readline.chomp)
227
+ raise ParserError, 'not a simple-data file'
228
+ end
229
+ m[:version]
230
+ end
231
+
232
+ def self.get_metadata(io)
233
+ tags = []
234
+ fields = []
235
+ sections = {}
236
+ dataopts = nil
237
+
238
+ # Retrieve meta data
239
+ meta = io.each_line.lazy.map {|l| l.chomp }.take_while {|l|
240
+ ! ((m = REGEX_SECTION.match(l)) &&
241
+ (m[:section] == 'data' )).tap {|is_data|
242
+ dataopts = m[:extra]&.split(',')&.map(&:to_sym) if is_data
243
+ }
244
+ }.drop_while {|l| l =~ REGEX_EMPTY }
245
+
246
+ # Parse
247
+ meta.slice_before {|l| REGEX_SECTION =~ l }.each do |grp|
248
+ if m = REGEX_SECTION.match(grp.first)
249
+ grp.shift
250
+ case s = m[:section].to_sym
251
+ # Extract spec
252
+ when :spec
253
+ fields = grp.reject {|l| l =~ REGEX_EMPTY }.map {|l|
254
+ field = REGEX_FIELD.match(l)&.captures
255
+ raise ParserError, "wrong spec" if field.nil?
256
+
257
+ # Normalize
258
+ t, n, d = field
259
+ t = t.downcase.to_sym
260
+ n .force_encoding('UTF-8')
261
+ d&.force_encoding('UTF-8')
262
+
263
+ # Sanity check
264
+ if !TYPES.include?(t)
265
+ raise ParserError, "unknown type (#{t})"
266
+ end
267
+
268
+ # Cleaned-up field description
269
+ [ t, n, d ].compact
270
+ }
271
+ # Extract description
272
+ else
273
+ sections[s] = grp.join("\n")
274
+ end
275
+ else
276
+ # Extract tags
277
+ tags = grp.map {|l| l.sub(/\A\#\s*/, '') }
278
+ .slice_before {|l| l =~ /\A@\w+/ }
279
+ .map {|g| g.join(' ') }
280
+ .map {|tagline|
281
+ REGEX_TAG.match(tagline).captures
282
+ }.reduce({}) { |obj, (t,v)|
283
+ obj.merge(t.to_sym => v) {|k, o, n| Array(o) + [ n ] }
284
+ }
285
+
286
+ # Normalize tags
287
+ tags[:keywords] = Array(tags[:keywords]).flat_map {|e|
288
+ e.split(/\s*,\s*|\s+/).map(&:strip).uniq
289
+ }
290
+
291
+ # Tags sanityzing
292
+ tags.each do |k, v|
293
+ if !TAGS.include?(k)
294
+ raise ParserError, "unknown tag (#{k})"
295
+ end
296
+ end
297
+ tags.reject! {|k, v| v.nil? || v.empty? }
298
+
299
+ end
300
+ end
301
+
302
+ [ fields, tags, sections, dataopts ]
303
+ end
304
+
305
+ end
306
+
307
+ require_relative 'simple-data/version'
308
+ require_relative 'simple-data/compression'
@@ -0,0 +1,29 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require_relative 'lib/simple-data/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'simple-data'
7
+ s.version = SimpleData::VERSION
8
+ s.summary = "Simple Data (CSV alternative)"
9
+ s.description = <<~EOF
10
+
11
+ An alternative to CSV format for storing data.
12
+ Provides metadata and field description support, and has a
13
+ reasonable hunger for disk space.
14
+
15
+ EOF
16
+
17
+ s.homepage = 'https://gitlab.inria.fr/dalu/simple-data'
18
+ s.license = 'MIT'
19
+
20
+ s.authors = [ "Stéphane D'Alu" ]
21
+ s.email = [ 'stephane.dalu@insa-lyon.fr' ]
22
+
23
+ s.files = %w[ README.md simple-data.gemspec ] +
24
+ Dir['lib/**/*.rb']
25
+
26
+ s.add_dependency 'zstd-ruby'
27
+ s.add_development_dependency 'yard', '~>0'
28
+ s.add_development_dependency 'rake', '~>13'
29
+ end
metadata ADDED
@@ -0,0 +1,96 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simple-data
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Stéphane D'Alu
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-10-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: zstd-ruby
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: yard
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '13'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '13'
55
+ description: |2+
56
+
57
+ An alternative to CSV format for storing data.
58
+ Provides metadata and field description support, and has a
59
+ reasonable hunger for disk space.
60
+
61
+ email:
62
+ - stephane.dalu@insa-lyon.fr
63
+ executables: []
64
+ extensions: []
65
+ extra_rdoc_files: []
66
+ files:
67
+ - README.md
68
+ - lib/simple-data.rb
69
+ - lib/simple-data/compression.rb
70
+ - lib/simple-data/version.rb
71
+ - simple-data.gemspec
72
+ homepage: https://gitlab.inria.fr/dalu/simple-data
73
+ licenses:
74
+ - MIT
75
+ metadata: {}
76
+ post_install_message:
77
+ rdoc_options: []
78
+ require_paths:
79
+ - lib
80
+ required_ruby_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ required_rubygems_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ requirements: []
91
+ rubygems_version: 3.3.21
92
+ signing_key:
93
+ specification_version: 4
94
+ summary: Simple Data (CSV alternative)
95
+ test_files: []
96
+ ...