ioblockreader 1.0.1.20130611 → 1.0.2.20130613
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS +1 -0
- data/ChangeLog +5 -0
- data/README.md +30 -0
- data/ReleaseInfo +1 -1
- data/lib/ioblockreader/datablock.rb +10 -6
- data/lib/ioblockreader/ioblockreader.rb +73 -2
- metadata +2 -2
data/AUTHORS
CHANGED
data/ChangeLog
CHANGED
data/README.md
CHANGED
@@ -34,6 +34,8 @@ end
|
|
34
34
|
|
35
35
|
### IOBlockReader.init(io, options = {})
|
36
36
|
|
37
|
+
Get an IOBlockReader instance on an IO.
|
38
|
+
|
37
39
|
Parameters:
|
38
40
|
* **io** ( _IO_ ): The IO object used to give the String interface
|
39
41
|
* **options** (<em>map< Symbol, Object ></em>): Additional options:
|
@@ -50,6 +52,8 @@ content = IOBlockReader.init(file, :block_size => 32768, :blocks_in_memory => 5)
|
|
50
52
|
|
51
53
|
### IOBlockReader#\[\](range)
|
52
54
|
|
55
|
+
Access a part of the data in the IO as a String.
|
56
|
+
|
53
57
|
Parameters:
|
54
58
|
* **range** ( _Fixnum_ or _Range_ ): Range to extract
|
55
59
|
|
@@ -64,6 +68,8 @@ substring = content[10..20]
|
|
64
68
|
|
65
69
|
### IOBlockReader#index(token, offset = 0, max_size_regexp = 32)
|
66
70
|
|
71
|
+
Search for a token or a list of tokens.
|
72
|
+
|
67
73
|
Parameters:
|
68
74
|
* **token** ( _String_ , _Regexp_ or <em>list< Object ></em>): Token to be found. Can be a list of tokens.
|
69
75
|
* **offset** ( _Fixnum_ ): Offset starting the search [optional = 0]
|
@@ -94,6 +100,30 @@ i, token_index = content.index( [ 'search string', 'another string' ] )
|
|
94
100
|
i, token_index = content.index( [ 'search string', /another f.....g string/ ], 20, 22)
|
95
101
|
```
|
96
102
|
|
103
|
+
### IOBlockReader#each_block(range = 0)
|
104
|
+
|
105
|
+
Iterate over blocks in the data.
|
106
|
+
|
107
|
+
Parameters:
|
108
|
+
* **range** ( _Range_ or _Fixnum_ ): The boundaries of the iteration, or the starting index [default = 0]
|
109
|
+
* _Block_ : Code called for each block encountered
|
110
|
+
* Parameters:
|
111
|
+
* **data** ( _String_ ): The data
|
112
|
+
|
113
|
+
Example:
|
114
|
+
```
|
115
|
+
# Iterate all over the IO
|
116
|
+
content.each_block do |data|
|
117
|
+
puts "Got a block of #{data.size} bytes"
|
118
|
+
end
|
119
|
+
|
120
|
+
# Iterate on just a part
|
121
|
+
content.each_block(10..50) do |data|
|
122
|
+
puts "Got a block of #{data.size} bytes"
|
123
|
+
end
|
124
|
+
|
125
|
+
```
|
126
|
+
|
97
127
|
## Contact
|
98
128
|
|
99
129
|
Want to contribute? Have any questions? [Contact Muriel!](muriel@x-aeon.com)
|
data/ReleaseInfo
CHANGED
@@ -19,26 +19,30 @@ module IOBlockReader
|
|
19
19
|
attr_reader :data
|
20
20
|
|
21
21
|
# Constructor
|
22
|
-
|
22
|
+
#
|
23
|
+
# Parameters::
|
24
|
+
# * *io* (_IO_): IO to read from
|
25
|
+
def initialize(io)
|
26
|
+
@io = io
|
23
27
|
@offset = nil
|
24
28
|
@last_access_time = nil
|
25
29
|
@data = ''
|
30
|
+
@data.force_encoding(@io.external_encoding) if (@data.respond_to?(:force_encoding))
|
26
31
|
end
|
27
32
|
|
28
33
|
# Fill the data block for a given IO
|
29
34
|
#
|
30
35
|
# Parameters::
|
31
|
-
# * *io* (_IO_): IO to read from
|
32
36
|
# * *offset* (_Fixnum_): Offset of this block in the IO
|
33
37
|
# * *size* (_Fixnum_): Size of the block to be read
|
34
|
-
def fill(
|
38
|
+
def fill(offset, size)
|
35
39
|
@offset = offset
|
36
40
|
@last_access_time = @@access_time_sequence
|
37
41
|
@@access_time_sequence += 1
|
38
42
|
#puts "[IOBlockReader] - Read #{size} @#{@offset}"
|
39
|
-
io.seek(@offset)
|
40
|
-
io.read(size, @data)
|
41
|
-
@last_block = io.eof?
|
43
|
+
@io.seek(@offset)
|
44
|
+
@io.read(size, @data)
|
45
|
+
@last_block = @io.eof?
|
42
46
|
end
|
43
47
|
|
44
48
|
# Is this block the last of its IO stream?
|
@@ -199,6 +199,77 @@ module IOBlockReader
|
|
199
199
|
end
|
200
200
|
end
|
201
201
|
|
202
|
+
# Iterate over blocks in the data.
|
203
|
+
# ! Do not use negative integers in the range.
|
204
|
+
#
|
205
|
+
# Parameters::
|
206
|
+
# * *range* (_Range_ or _Fixnum_): The boundaries of the iteration, or the starting index [default = 0]
|
207
|
+
# * _Block_: Code called for each block encountered
|
208
|
+
# * Parameters::
|
209
|
+
# * *data* (_String_): The data
|
210
|
+
def each_block(range = 0)
|
211
|
+
#puts "[IOBlockReader] - each_block(#{range})"
|
212
|
+
# Parse parameters
|
213
|
+
begin_offset = range
|
214
|
+
end_offset = nil
|
215
|
+
if (range.is_a?(Range))
|
216
|
+
begin_offset = range.first
|
217
|
+
end_offset = range.last
|
218
|
+
end
|
219
|
+
|
220
|
+
current_block_index, begin_offset_in_first_block = begin_offset.divmod(@block_size)
|
221
|
+
end_offset_block_index, end_offset_in_last_block = ((end_offset == nil) ? [nil, nil] : end_offset.divmod(@block_size))
|
222
|
+
# Make sure first block is loaded
|
223
|
+
if ((current_block = @blocks[current_block_index]) == nil)
|
224
|
+
read_needed_blocks([current_block_index], current_block_index, current_block_index)
|
225
|
+
current_block = @blocks[current_block_index]
|
226
|
+
else
|
227
|
+
current_block.touch
|
228
|
+
end
|
229
|
+
if (current_block_index == end_offset_block_index)
|
230
|
+
# We have a Range in the same block
|
231
|
+
if ((begin_offset_in_first_block == 0) and
|
232
|
+
(end_offset_in_last_block == current_block.data.size-1))
|
233
|
+
yield(current_block.data)
|
234
|
+
else
|
235
|
+
yield(current_block.data[begin_offset_in_first_block..end_offset_in_last_block])
|
236
|
+
end
|
237
|
+
else
|
238
|
+
# We need to loop, but consider first block differently as it might be partially given
|
239
|
+
if (begin_offset_in_first_block == 0)
|
240
|
+
yield(current_block.data)
|
241
|
+
else
|
242
|
+
yield(current_block.data[begin_offset_in_first_block..-1])
|
243
|
+
end
|
244
|
+
if (!current_block.last_block?)
|
245
|
+
# Now loop on all subsequent blocks unless we get to the last one
|
246
|
+
finished = false
|
247
|
+
while (!finished)
|
248
|
+
# Read next block
|
249
|
+
current_block_index += 1
|
250
|
+
if ((current_block = @blocks[current_block_index]) == nil)
|
251
|
+
read_needed_blocks([current_block_index], current_block_index, current_block_index)
|
252
|
+
current_block = @blocks[current_block_index]
|
253
|
+
else
|
254
|
+
current_block.touch
|
255
|
+
end
|
256
|
+
if (end_offset_block_index == current_block_index)
|
257
|
+
# We arrived on the last block of the Range
|
258
|
+
if (end_offset_in_last_block == current_block.data.size-1)
|
259
|
+
yield(current_block.data)
|
260
|
+
else
|
261
|
+
yield(current_block.data[0..end_offset_in_last_block])
|
262
|
+
end
|
263
|
+
finished = true
|
264
|
+
else
|
265
|
+
yield(current_block.data)
|
266
|
+
finished = current_block.last_block?
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
202
273
|
private
|
203
274
|
|
204
275
|
# Set the new cache block
|
@@ -246,8 +317,8 @@ module IOBlockReader
|
|
246
317
|
indexes_needing_loading.each do |block_index|
|
247
318
|
# Have to load this block
|
248
319
|
block_to_fill = removed_blocks.pop
|
249
|
-
block_to_fill = DataBlock.new if (block_to_fill == nil)
|
250
|
-
block_to_fill.fill(
|
320
|
+
block_to_fill = DataBlock.new(@io) if (block_to_fill == nil)
|
321
|
+
block_to_fill.fill(block_index * @block_size, @block_size)
|
251
322
|
@blocks[block_index] = block_to_fill
|
252
323
|
end
|
253
324
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ioblockreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2.20130613
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
12
|
+
date: 2013-06-13 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Ruby library giving block-buffered and cached read over IO objects with
|
15
15
|
a String-like interface. Ideal to parse big files as Strings, limiting memory consumption.
|