io-segmenter 0.7.2 → 0.7.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/io-segmenter.rb +4 -94
- data/lib/io-segmenter/core_ext/file.rb +2 -2
- data/lib/io-segmenter/core_ext/io.rb +2 -2
- data/lib/io-segmenter/core_ext/json.rb +6 -6
- data/lib/io-segmenter/parser.rb +78 -0
- data/lib/io-segmenter/reader.rb +27 -0
- data/lib/io-segmenter/writer.rb +24 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: eeada66a82cb120e8da0241a2847545e7579a2f297dd4e11647a15f118bb95e4
|
4
|
+
data.tar.gz: 128a87fe89da4ff96f55654aaf9da152b40dc3aca4d9be3d8ee76655bb4b76d6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 281e984a75b8f827f7b631b19a26f6bb2522808e97a2727a07829e4d637afaf7949c659238b6311c1e85b3aa6c5a9e753b95b722eb8d5d905770935e2802e531
|
7
|
+
data.tar.gz: 9ecf8b025de77ca0e7ecfc870af16e0b57fd1b2ad8260a42967e8edea514b65e3315a608cfa350e68767b060f61f5b7acd379b20c7783b1db0a3c6b68b0c7b93
|
data/lib/io-segmenter.rb
CHANGED
@@ -1,97 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
DEFAULT_READ_SIZE = 8192
|
8
|
-
|
9
|
-
attr_reader :buffer
|
10
|
-
|
11
|
-
def initialize(io, starting_char, ending_char, quote_char, escape_char, max_read_size=DEFAULT_READ_SIZE)
|
12
|
-
@io = io
|
13
|
-
@starting_char = starting_char
|
14
|
-
@ending_char = ending_char
|
15
|
-
@quote_char = quote_char
|
16
|
-
@escape_char = escape_char
|
17
|
-
@max_read_size = max_read_size
|
18
|
-
|
19
|
-
terms = [
|
20
|
-
@starting_char,
|
21
|
-
@ending_char,
|
22
|
-
@quote_char,
|
23
|
-
@escape_char
|
24
|
-
]
|
25
|
-
terms.compact!
|
26
|
-
terms.map! { |str| Regexp.escape(str) }
|
27
|
-
|
28
|
-
@search = Regexp.new('(:?' + terms.join('|') + ')')
|
29
|
-
@buffer = String.new
|
30
|
-
end
|
31
|
-
|
32
|
-
def each
|
33
|
-
until @io.eof?
|
34
|
-
unpack(@io.read(@max_read_size)) do |segment|
|
35
|
-
yield segment
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def unpack(str)
|
41
|
-
@buffer << str
|
42
|
-
each_segment(@buffer) do |segment|
|
43
|
-
yield segment
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def self.foreach(*args, &block)
|
48
|
-
new(*args).each(&block)
|
49
|
-
end
|
50
|
-
|
51
|
-
private
|
52
|
-
|
53
|
-
def each_segment(buffer)
|
54
|
-
return unless (start_offset = buffer.index(@starting_char))
|
55
|
-
|
56
|
-
brackets = 1
|
57
|
-
offset = start_offset
|
58
|
-
|
59
|
-
opened_quote = false
|
60
|
-
|
61
|
-
while (offset = buffer.index(@search, offset + 1))
|
62
|
-
case buffer[offset]
|
63
|
-
when @ending_char
|
64
|
-
next if opened_quote
|
65
|
-
brackets -= 1
|
66
|
-
when @starting_char
|
67
|
-
next if opened_quote
|
68
|
-
brackets += 1
|
69
|
-
when @quote_char
|
70
|
-
opened_quote = !opened_quote
|
71
|
-
next
|
72
|
-
when @escape_char
|
73
|
-
offset += @escape_char.size
|
74
|
-
next
|
75
|
-
else
|
76
|
-
if @ending_char == buffer[offset, @ending_char.size]
|
77
|
-
next if opened_quote
|
78
|
-
brackets -= 1
|
79
|
-
elsif @starting_char == buffer[offset, @starting_char.size]
|
80
|
-
next if opened_quote
|
81
|
-
brackets += 1
|
82
|
-
else
|
83
|
-
raise("unhandled offset #{offset}, at #{buffer[offset, 20]}...")
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
next unless brackets.zero?
|
88
|
-
|
89
|
-
len = (offset + @ending_char.size) - start_offset
|
90
|
-
yield buffer[start_offset, len]
|
91
|
-
buffer.slice!(0, offset + @ending_char.size)
|
92
|
-
return unless (start_offset = buffer.index(@starting_char))
|
93
|
-
offset = start_offset
|
94
|
-
brackets = 1
|
95
|
-
end
|
96
|
-
end
|
3
|
+
module IOSegmenter
|
4
|
+
autoload :Parser, 'io-segmenter/parser'
|
5
|
+
autoload :Reader, 'io-segmenter/reader'
|
6
|
+
autoload :Writer, 'io-segmenter/writer'
|
97
7
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
class File
|
2
|
-
def self.each_segment(file, *args, &block)
|
2
|
+
def self.each_segment(file, *args, max_read_size, &block)
|
3
3
|
handle = open(file, 'rb')
|
4
|
-
IOSegmenter.new(handle, *args).each(&block)
|
4
|
+
IOSegmenter::Reader.new(handle, IOSegmenter::Parser.new(*args), max_read_size).each(&block)
|
5
5
|
ensure
|
6
6
|
handle && handle.close
|
7
7
|
end
|
@@ -1,21 +1,21 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module JSON
|
4
|
-
def self.each_object(io, max_read_size=
|
5
|
-
IOSegmenter.new(io, '{', '}', '"', '\\', max_read_size).each do |segement|
|
4
|
+
def self.each_object(io, max_read_size=nil, &block)
|
5
|
+
IOSegmenter::Reader.new(io, IOSegmenter::Parser.new('{', '}', '"', '\\'), max_read_size || IOSegmenter::Reader::DEFAULT_READ_SIZE).each do |segement|
|
6
6
|
yield parse(segement)
|
7
7
|
end
|
8
8
|
end
|
9
9
|
|
10
|
-
def self.each_string(io, max_read_size=
|
11
|
-
IOSegmenter.new(io, '"', '"', nil, '\\', max_read_size).each do |segement|
|
10
|
+
def self.each_string(io, max_read_size=nil)
|
11
|
+
IOSegmenter::Reader.new(io, IOSegmenter::Parser.new('"', '"', nil, '\\'), max_read_size || IOSegmenter::Reader::DEFAULT_READ_SIZE).each do |segement|
|
12
12
|
yield segement[1, segement.size-2]
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
|
-
def self.
|
16
|
+
def self.each_list(io, max_read_size=nil)
|
17
17
|
io.read(1)
|
18
|
-
IOSegmenter.new(io, '[', ']', '"', '\\', max_read_size).each do |segement|
|
18
|
+
IOSegmenter::Reader.new(io, IOSegmenter::Parser.new('[', ']', '"', '\\'), max_read_size || IOSegmenter::Reader::DEFAULT_READ_SIZE).each do |segement|
|
19
19
|
yield parse(segement)
|
20
20
|
end
|
21
21
|
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module IOSegmenter
|
4
|
+
class Parser
|
5
|
+
attr_reader :buffer
|
6
|
+
|
7
|
+
def initialize(starting_char, ending_char, quote_char, escape_char)
|
8
|
+
@starting_char = starting_char
|
9
|
+
@ending_char = ending_char
|
10
|
+
@quote_char = quote_char
|
11
|
+
@escape_char = escape_char
|
12
|
+
|
13
|
+
terms = [
|
14
|
+
@starting_char,
|
15
|
+
@ending_char,
|
16
|
+
@quote_char,
|
17
|
+
@escape_char
|
18
|
+
]
|
19
|
+
terms.compact!
|
20
|
+
terms.map! { |str| Regexp.escape(str) }
|
21
|
+
|
22
|
+
@search = Regexp.new('(:?' + terms.join('|') + ')')
|
23
|
+
@buffer = String.new
|
24
|
+
end
|
25
|
+
|
26
|
+
def unpack(str)
|
27
|
+
@buffer << str
|
28
|
+
each_segment(@buffer) do |segment|
|
29
|
+
yield segment
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def each_segment(buffer)
|
34
|
+
return unless (start_offset = buffer.index(@starting_char))
|
35
|
+
|
36
|
+
brackets = 1
|
37
|
+
offset = start_offset
|
38
|
+
|
39
|
+
opened_quote = false
|
40
|
+
|
41
|
+
while (offset = buffer.index(@search, offset + 1))
|
42
|
+
case buffer[offset]
|
43
|
+
when @ending_char
|
44
|
+
next if opened_quote
|
45
|
+
brackets -= 1
|
46
|
+
when @starting_char
|
47
|
+
next if opened_quote
|
48
|
+
brackets += 1
|
49
|
+
when @quote_char
|
50
|
+
opened_quote = !opened_quote
|
51
|
+
next
|
52
|
+
when @escape_char
|
53
|
+
offset += @escape_char.size
|
54
|
+
next
|
55
|
+
else
|
56
|
+
if @ending_char == buffer[offset, @ending_char.size]
|
57
|
+
next if opened_quote
|
58
|
+
brackets -= 1
|
59
|
+
elsif @starting_char == buffer[offset, @starting_char.size]
|
60
|
+
next if opened_quote
|
61
|
+
brackets += 1
|
62
|
+
else
|
63
|
+
raise("unhandled offset #{offset}, at #{buffer[offset, 20]}...")
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
next unless brackets.zero?
|
68
|
+
|
69
|
+
len = (offset + @ending_char.size) - start_offset
|
70
|
+
yield buffer[start_offset, len]
|
71
|
+
buffer.slice!(0, offset + @ending_char.size)
|
72
|
+
return unless (start_offset = buffer.index(@starting_char))
|
73
|
+
offset = start_offset
|
74
|
+
brackets = 1
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module IOSegmenter
|
4
|
+
class Reader
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
DEFAULT_READ_SIZE = 8192
|
8
|
+
|
9
|
+
def initialize(io, parser, max_read_size=DEFAULT_READ_SIZE)
|
10
|
+
@io = io
|
11
|
+
@parser = parser
|
12
|
+
@max_read_size = max_read_size
|
13
|
+
end
|
14
|
+
|
15
|
+
def each
|
16
|
+
until @io.eof?
|
17
|
+
@parser.unpack(@io.read(@max_read_size)) do |segment|
|
18
|
+
yield segment
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.foreach(*args, &block)
|
24
|
+
new(*args).each(&block)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module IOSegmenter
|
4
|
+
# Used to add a separator after the first write call
|
5
|
+
class Writer
|
6
|
+
def self.write(io, header, footer, separator)
|
7
|
+
io.write(header) if header
|
8
|
+
yield new(io, separator)
|
9
|
+
io.write(footer) if footer
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(io, separator)
|
13
|
+
@io = io
|
14
|
+
@separator = separator
|
15
|
+
@add_separator = false
|
16
|
+
end
|
17
|
+
|
18
|
+
def write(str)
|
19
|
+
@io.write(@separator) if @separator && @add_separator
|
20
|
+
@io.write(str)
|
21
|
+
@add_separator = true
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: io-segmenter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Doug Youch
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-09-
|
11
|
+
date: 2018-09-10 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Used to iterate over segments of data in IO objects
|
14
14
|
email: dyouch5@yahoo.com
|
@@ -20,6 +20,9 @@ files:
|
|
20
20
|
- lib/io-segmenter/core_ext/file.rb
|
21
21
|
- lib/io-segmenter/core_ext/io.rb
|
22
22
|
- lib/io-segmenter/core_ext/json.rb
|
23
|
+
- lib/io-segmenter/parser.rb
|
24
|
+
- lib/io-segmenter/reader.rb
|
25
|
+
- lib/io-segmenter/writer.rb
|
23
26
|
homepage: https://github.com/dougyouch/io-segemnter
|
24
27
|
licenses: []
|
25
28
|
metadata: {}
|