ioblockreader 1.0.1.20130611 → 1.0.2.20130613
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS +1 -0
- data/ChangeLog +5 -0
- data/README.md +30 -0
- data/ReleaseInfo +1 -1
- data/lib/ioblockreader/datablock.rb +10 -6
- data/lib/ioblockreader/ioblockreader.rb +73 -2
- metadata +2 -2
data/AUTHORS
CHANGED
data/ChangeLog
CHANGED
data/README.md
CHANGED
@@ -34,6 +34,8 @@ end
|
|
34
34
|
|
35
35
|
### IOBlockReader.init(io, options = {})
|
36
36
|
|
37
|
+
Get an IOBlockReader instance on an IO.
|
38
|
+
|
37
39
|
Parameters:
|
38
40
|
* **io** ( _IO_ ): The IO object used to give the String interface
|
39
41
|
* **options** (<em>map< Symbol, Object ></em>): Additional options:
|
@@ -50,6 +52,8 @@ content = IOBlockReader.init(file, :block_size => 32768, :blocks_in_memory => 5)
|
|
50
52
|
|
51
53
|
### IOBlockReader#\[\](range)
|
52
54
|
|
55
|
+
Access a part of the data in the IO as a String.
|
56
|
+
|
53
57
|
Parameters:
|
54
58
|
* **range** ( _Fixnum_ or _Range_ ): Range to extract
|
55
59
|
|
@@ -64,6 +68,8 @@ substring = content[10..20]
|
|
64
68
|
|
65
69
|
### IOBlockReader#index(token, offset = 0, max_size_regexp = 32)
|
66
70
|
|
71
|
+
Search for a token or a list of tokens.
|
72
|
+
|
67
73
|
Parameters:
|
68
74
|
* **token** ( _String_ , _Regexp_ or <em>list< Object ></em>): Token to be found. Can be a list of tokens.
|
69
75
|
* **offset** ( _Fixnum_ ): Offset starting the search [optional = 0]
|
@@ -94,6 +100,30 @@ i, token_index = content.index( [ 'search string', 'another string' ] )
|
|
94
100
|
i, token_index = content.index( [ 'search string', /another f.....g string/ ], 20, 22)
|
95
101
|
```
|
96
102
|
|
103
|
+
### IOBlockReader#each_block(range = 0)
|
104
|
+
|
105
|
+
Iterate over blocks in the data.
|
106
|
+
|
107
|
+
Parameters:
|
108
|
+
* **range** ( _Range_ or _Fixnum_ ): The boundaries of the iteration, or the starting index [default = 0]
|
109
|
+
* _Block_ : Code called for each block encountered
|
110
|
+
* Parameters:
|
111
|
+
* **data** ( _String_ ): The data
|
112
|
+
|
113
|
+
Example:
|
114
|
+
```
|
115
|
+
# Iterate all over the IO
|
116
|
+
content.each_block do |data|
|
117
|
+
puts "Got a block of #{data.size} bytes"
|
118
|
+
end
|
119
|
+
|
120
|
+
# Iterate on just a part
|
121
|
+
content.each_block(10..50) do |data|
|
122
|
+
puts "Got a block of #{data.size} bytes"
|
123
|
+
end
|
124
|
+
|
125
|
+
```
|
126
|
+
|
97
127
|
## Contact
|
98
128
|
|
99
129
|
Want to contribute? Have any questions? [Contact Muriel!](muriel@x-aeon.com)
|
data/ReleaseInfo
CHANGED
@@ -19,26 +19,30 @@ module IOBlockReader
|
|
19
19
|
attr_reader :data
|
20
20
|
|
21
21
|
# Constructor
|
22
|
-
|
22
|
+
#
|
23
|
+
# Parameters::
|
24
|
+
# * *io* (_IO_): IO to read from
|
25
|
+
def initialize(io)
|
26
|
+
@io = io
|
23
27
|
@offset = nil
|
24
28
|
@last_access_time = nil
|
25
29
|
@data = ''
|
30
|
+
@data.force_encoding(@io.external_encoding) if (@data.respond_to?(:force_encoding))
|
26
31
|
end
|
27
32
|
|
28
33
|
# Fill the data block for a given IO
|
29
34
|
#
|
30
35
|
# Parameters::
|
31
|
-
# * *io* (_IO_): IO to read from
|
32
36
|
# * *offset* (_Fixnum_): Offset of this block in the IO
|
33
37
|
# * *size* (_Fixnum_): Size of the block to be read
|
34
|
-
def fill(
|
38
|
+
def fill(offset, size)
|
35
39
|
@offset = offset
|
36
40
|
@last_access_time = @@access_time_sequence
|
37
41
|
@@access_time_sequence += 1
|
38
42
|
#puts "[IOBlockReader] - Read #{size} @#{@offset}"
|
39
|
-
io.seek(@offset)
|
40
|
-
io.read(size, @data)
|
41
|
-
@last_block = io.eof?
|
43
|
+
@io.seek(@offset)
|
44
|
+
@io.read(size, @data)
|
45
|
+
@last_block = @io.eof?
|
42
46
|
end
|
43
47
|
|
44
48
|
# Is this block the last of its IO stream?
|
@@ -199,6 +199,77 @@ module IOBlockReader
|
|
199
199
|
end
|
200
200
|
end
|
201
201
|
|
202
|
+
# Iterate over blocks in the data.
|
203
|
+
# ! Do not use negative integers in the range.
|
204
|
+
#
|
205
|
+
# Parameters::
|
206
|
+
# * *range* (_Range_ or _Fixnum_): The boundaries of the iteration, or the starting index [default = 0]
|
207
|
+
# * _Block_: Code called for each block encountered
|
208
|
+
# * Parameters::
|
209
|
+
# * *data* (_String_): The data
|
210
|
+
def each_block(range = 0)
|
211
|
+
#puts "[IOBlockReader] - each_block(#{range})"
|
212
|
+
# Parse parameters
|
213
|
+
begin_offset = range
|
214
|
+
end_offset = nil
|
215
|
+
if (range.is_a?(Range))
|
216
|
+
begin_offset = range.first
|
217
|
+
end_offset = range.last
|
218
|
+
end
|
219
|
+
|
220
|
+
current_block_index, begin_offset_in_first_block = begin_offset.divmod(@block_size)
|
221
|
+
end_offset_block_index, end_offset_in_last_block = ((end_offset == nil) ? [nil, nil] : end_offset.divmod(@block_size))
|
222
|
+
# Make sure first block is loaded
|
223
|
+
if ((current_block = @blocks[current_block_index]) == nil)
|
224
|
+
read_needed_blocks([current_block_index], current_block_index, current_block_index)
|
225
|
+
current_block = @blocks[current_block_index]
|
226
|
+
else
|
227
|
+
current_block.touch
|
228
|
+
end
|
229
|
+
if (current_block_index == end_offset_block_index)
|
230
|
+
# We have a Range in the same block
|
231
|
+
if ((begin_offset_in_first_block == 0) and
|
232
|
+
(end_offset_in_last_block == current_block.data.size-1))
|
233
|
+
yield(current_block.data)
|
234
|
+
else
|
235
|
+
yield(current_block.data[begin_offset_in_first_block..end_offset_in_last_block])
|
236
|
+
end
|
237
|
+
else
|
238
|
+
# We need to loop, but consider first block differently as it might be partially given
|
239
|
+
if (begin_offset_in_first_block == 0)
|
240
|
+
yield(current_block.data)
|
241
|
+
else
|
242
|
+
yield(current_block.data[begin_offset_in_first_block..-1])
|
243
|
+
end
|
244
|
+
if (!current_block.last_block?)
|
245
|
+
# Now loop on all subsequent blocks unless we get to the last one
|
246
|
+
finished = false
|
247
|
+
while (!finished)
|
248
|
+
# Read next block
|
249
|
+
current_block_index += 1
|
250
|
+
if ((current_block = @blocks[current_block_index]) == nil)
|
251
|
+
read_needed_blocks([current_block_index], current_block_index, current_block_index)
|
252
|
+
current_block = @blocks[current_block_index]
|
253
|
+
else
|
254
|
+
current_block.touch
|
255
|
+
end
|
256
|
+
if (end_offset_block_index == current_block_index)
|
257
|
+
# We arrived on the last block of the Range
|
258
|
+
if (end_offset_in_last_block == current_block.data.size-1)
|
259
|
+
yield(current_block.data)
|
260
|
+
else
|
261
|
+
yield(current_block.data[0..end_offset_in_last_block])
|
262
|
+
end
|
263
|
+
finished = true
|
264
|
+
else
|
265
|
+
yield(current_block.data)
|
266
|
+
finished = current_block.last_block?
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|
270
|
+
end
|
271
|
+
end
|
272
|
+
|
202
273
|
private
|
203
274
|
|
204
275
|
# Set the new cache block
|
@@ -246,8 +317,8 @@ module IOBlockReader
|
|
246
317
|
indexes_needing_loading.each do |block_index|
|
247
318
|
# Have to load this block
|
248
319
|
block_to_fill = removed_blocks.pop
|
249
|
-
block_to_fill = DataBlock.new if (block_to_fill == nil)
|
250
|
-
block_to_fill.fill(
|
320
|
+
block_to_fill = DataBlock.new(@io) if (block_to_fill == nil)
|
321
|
+
block_to_fill.fill(block_index * @block_size, @block_size)
|
251
322
|
@blocks[block_index] = block_to_fill
|
252
323
|
end
|
253
324
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ioblockreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2.20130613
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
12
|
+
date: 2013-06-13 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Ruby library giving block-buffered and cached read over IO objects with
|
15
15
|
a String-like interface. Ideal to parse big files as Strings, limiting memory consumption.
|