filter_io 0.1.6 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,4 +1,4 @@
1
- *.gemspec
1
+ Gemfile.lock
2
2
  pkg
3
3
  coverage
4
4
  .bundle
data/Gemfile CHANGED
@@ -1,6 +1,2 @@
1
- source :gemcutter
2
-
3
- gem 'jeweler'
4
- gem 'activesupport', '>=2.3.9'
5
- gem 'i18n', '>=0.4.1'
6
- gem 'test-unit'
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/README.markdown CHANGED
@@ -11,37 +11,41 @@
11
11
 
12
12
  You can install the gem by running:
13
13
 
14
- gem install filter_io
14
+ ``` sh
15
+ gem install filter_io
16
+ ```
15
17
 
16
18
  ### Example Usage
17
19
 
18
20
  #### A Simple Example: ROT-13
19
21
 
20
- io = FilterIO.new io do |data|
21
- data.tr "A-Za-z", "N-ZA-Mn-za-m"
22
- end
22
+ ``` ruby
23
+ io = FilterIO.new io do |data|
24
+ data.tr "A-Za-z", "N-ZA-Mn-za-m"
25
+ end
26
+ ```
23
27
 
24
28
  #### A Useful Example: Line Ending Normalisation
25
29
 
26
30
  A common usage of `filter_io` is to normalise line endings before parsing CSV data:
27
31
 
28
- # open source stream
29
- File.open(filename) do |io|
30
-
31
- # apply filter to stream
32
- io = FilterIO.new(io) do |data, state|
33
- # grab another chunk if the last character is a delimiter
34
- raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
35
- # normalise line endings to LF
36
- data.gsub /\r\n|\r|\n/, "\n"
37
- end
38
-
39
- # process resulting stream normally
40
- FasterCSV.parse(io) do |row|
41
- pp row
42
- end
43
-
44
- end
32
+ ``` ruby
33
+ # open source stream
34
+ File.open(filename) do |io|
35
+ # apply filter to stream
36
+ io = FilterIO.new(io) do |data, state|
37
+ # grab another chunk if the last character is a delimiter
38
+ raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
39
+ # normalise line endings to LF
40
+ data.gsub /\r\n|\r|\n/, "\n"
41
+ end
42
+
43
+ # process resulting stream normally
44
+ FasterCSV.parse(io) do |row|
45
+ pp row
46
+ end
47
+ end
48
+ ```
45
49
 
46
50
  ### Reference
47
51
 
@@ -66,16 +70,18 @@ If your block is unable to process the whole chunk of data immediately, it can r
66
70
 
67
71
  Here's an example which processes whole lines and prepends the line length to the beginning of each line.
68
72
 
69
- io = FilterIO.new io do |data, state|
70
- output = ''
71
- # grab complete lines until we hit EOF
72
- while data =~ /(.*)\n/ || (state.eof? && data =~ /(.+)/)
73
- output << "#{$1.size} #{$1}\n"
74
- data = $'
75
- end
76
- # `output` contains the processed lines, `data` contains any left over partial line
77
- [output, data]
78
- end
73
+ ``` ruby
74
+ io = FilterIO.new io do |data, state|
75
+ output = ''
76
+ # grab complete lines until we hit EOF
77
+ while data =~ /(.*)\n/ || (state.eof? && data =~ /(.+)/)
78
+ output << "#{$1.size} #{$1}\n"
79
+ data = $'
80
+ end
81
+ # `output` contains the processed lines, `data` contains any left over partial line
82
+ [output, data]
83
+ end
84
+ ```
79
85
 
80
86
  #### Block Size
81
87
 
data/Rakefile CHANGED
@@ -1,54 +1,6 @@
1
- require 'rubygems'
2
1
  require 'rake'
3
- require 'rake/testtask'
2
+ require 'bundler/gem_tasks'
3
+ require 'rspec/core/rake_task'
4
4
 
5
- desc 'Default: run unit tests.'
6
- task :default => :test
7
-
8
- desc 'Test the filter_io plugin.'
9
- Rake::TestTask.new(:test) do |t|
10
- t.libs << 'lib'
11
- t.libs << 'test'
12
- t.pattern = 'test/**/*_test.rb'
13
- t.verbose = true
14
- end
15
-
16
- task :test => :check_dependencies
17
-
18
- begin
19
- require 'jeweler'
20
- Jeweler::Tasks.new do |gem|
21
- gem.name = "filter_io"
22
- gem.summary = "Filter IO streams with a block. Ruby's FilterInputStream."
23
- gem.email = "jason@jasoncodes.com"
24
- gem.homepage = "http://github.com/jasoncodes/filter_io"
25
- gem.authors = ["Jason Weathered"]
26
- gem.has_rdoc = false
27
- gem.add_dependency 'activesupport'
28
- end
29
- Jeweler::GemcutterTasks.new
30
- rescue LoadError
31
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
32
- end
33
-
34
- begin
35
- require 'rcov/rcovtask'
36
- Rcov::RcovTask.new do |t|
37
- t.libs << "test"
38
- t.rcov_opts = [
39
- "--exclude '^(?!lib)'"
40
- ]
41
- t.test_files = FileList[
42
- 'test/**/*_test.rb'
43
- ]
44
- t.output_dir = 'coverage'
45
- t.verbose = true
46
- end
47
- task :rcov do
48
- system "open coverage/index.html"
49
- end
50
- rescue LoadError
51
- task :rcov do
52
- raise "You must install the 'rcov' gem"
53
- end
54
- end
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ task :default => :spec
data/filter_io.gemspec ADDED
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'filter_io/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = %q{filter_io}
8
+ spec.version = FilterIO::VERSION
9
+ spec.authors = ['Jason Weathered']
10
+ spec.email = ['jason@jasoncodes.com']
11
+ spec.summary = %q{Filter IO streams with a block. Ruby's FilterInputStream.}
12
+ spec.homepage = 'http://github.com/jasoncodes/filter_io'
13
+ spec.license = 'MIT'
14
+
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ['lib']
19
+
20
+ spec.add_dependency 'activesupport', '>= 2.3.9'
21
+
22
+ spec.add_development_dependency 'bundler', '~> 1.3'
23
+ spec.add_development_dependency 'rake'
24
+ spec.add_development_dependency 'simplecov'
25
+ spec.add_development_dependency 'rspec', '~> 2.13'
26
+ end
@@ -0,0 +1,3 @@
1
+ class FilterIO
2
+ VERSION = '0.2.0'
3
+ end
data/lib/filter_io.rb CHANGED
@@ -4,12 +4,11 @@ require 'active_support/core_ext/array'
4
4
  require 'active_support/core_ext/hash'
5
5
 
6
6
  class FilterIO
7
-
8
7
  DEFAULT_BLOCK_SIZE = 1024
9
-
8
+
10
9
  class NeedMoreData < Exception
11
10
  end
12
-
11
+
13
12
  class BlockState
14
13
  attr_reader :bof, :eof
15
14
  def initialize(bof, eof)
@@ -19,7 +18,7 @@ class FilterIO
19
18
  alias_method :bof?, :bof
20
19
  alias_method :eof?, :eof
21
20
  end
22
-
21
+
23
22
  def initialize(io, options = nil, &block)
24
23
  @io = io
25
24
  @options = options || {}
@@ -29,75 +28,99 @@ class FilterIO
29
28
  @buffer_raw = empty_string_raw
30
29
  @options.assert_valid_keys :block_size
31
30
  end
32
-
31
+
33
32
  def pos
34
33
  @pos
35
34
  end
36
-
35
+
37
36
  def bof?
38
37
  @pos == 0
39
38
  end
40
-
39
+
41
40
  def eof?
42
41
  @buffer.empty? && source_eof?
43
42
  end
44
-
43
+
45
44
  def source_eof?
46
45
  @buffer_raw.empty? && @io.eof?
47
46
  end
48
-
47
+
49
48
  def close
50
49
  @io.close
51
50
  end
52
-
51
+
53
52
  def closed?
54
53
  @io.closed?
55
54
  end
56
-
57
- def readchar
58
- raise EOFError, 'end of file reached' if eof?
59
- if @io.respond_to? :external_encoding
60
- data = empty_string_raw
61
- begin
62
- data << read(1).force_encoding(@io.internal_encoding || @io.external_encoding)
63
- end until data.valid_encoding? or source_eof?
64
- data.encode! @io.internal_encoding if @io.internal_encoding
65
- data
55
+
56
+ def default_encoding
57
+ unless @default_encoding
58
+ c = @io.getc
59
+ @io.ungetc c
60
+ @default_encoding = c.encoding
61
+ end
62
+ @default_encoding
63
+ end
64
+
65
+ def internal_encoding
66
+ if @io.respond_to?(:internal_encoding)
67
+ @io.internal_encoding
66
68
  else
67
- read(1).ord
69
+ default_encoding
68
70
  end
69
71
  end
70
-
72
+
73
+ def external_encoding
74
+ if @io.respond_to?(:external_encoding)
75
+ @io.external_encoding
76
+ else
77
+ default_encoding
78
+ end
79
+ end
80
+
81
+ def readchar
82
+ raise EOFError, 'end of file reached' if eof?
83
+ data = empty_string_raw
84
+ begin
85
+ byte = read(1)
86
+ if internal_encoding || external_encoding
87
+ byte.force_encoding internal_encoding || external_encoding
88
+ end
89
+ data << byte
90
+ end until data.valid_encoding? or source_eof?
91
+ data.encode! internal_encoding if internal_encoding
92
+ data
93
+ end
94
+
71
95
  def getc
72
96
  readchar
73
97
  rescue EOFError
74
98
  nil
75
99
  end
76
-
100
+
77
101
  def read(length = nil)
78
-
79
102
  raise ArgumentError if length && length < 0
80
103
  return '' if length == 0
81
-
104
+
82
105
  # fill the buffer up to the fill level (or whole input if length is nil)
83
- while !source_eof? && (length.nil? || length > bytesize(@buffer))
106
+ while !source_eof? && (length.nil? || length > @buffer.bytesize)
84
107
  buffer_data @options[:block_size] || length
85
108
  end
86
-
109
+
87
110
  # we now have all the data in the buffer that we need (or can get if EOF)
88
111
  case
89
- when bytesize(@buffer) > 0
112
+ when @buffer.bytesize > 0
90
113
  # limit length to the buffer size if we were asked for it all or have ran out (EOF)
91
- read_length = if length.nil? or length > bytesize(@buffer)
92
- bytesize @buffer
114
+ read_length = if length.nil? or length > @buffer.bytesize
115
+ @buffer.bytesize
93
116
  else
94
117
  length
95
118
  end
96
119
  data = pop_bytes read_length
97
- @pos += bytesize(data)
98
- if length.nil? && @io.respond_to?(:external_encoding)
99
- data.force_encoding @io.external_encoding
100
- data.encode! @io.internal_encoding if @io.internal_encoding
120
+ @pos += data.bytesize
121
+ if length.nil?
122
+ data.force_encoding external_encoding if external_encoding
123
+ data.encode! internal_encoding if internal_encoding
101
124
  end
102
125
  data
103
126
  when source_eof?
@@ -106,15 +129,13 @@ class FilterIO
106
129
  else
107
130
  raise IOError, 'Read error'
108
131
  end
109
-
110
132
  end
111
-
133
+
112
134
  def rewind
113
135
  seek 0, IO::SEEK_SET
114
136
  end
115
-
137
+
116
138
  def seek(offset, whence = IO::SEEK_SET)
117
-
118
139
  new_pos = case whence
119
140
  when IO::SEEK_SET
120
141
  offset
@@ -125,7 +146,7 @@ class FilterIO
125
146
  else
126
147
  raise Errno::EINVAL
127
148
  end
128
-
149
+
129
150
  case new_pos
130
151
  when pos
131
152
  # noop
@@ -137,26 +158,25 @@ class FilterIO
137
158
  else
138
159
  raise Errno::EINVAL, 'Random seek not supported'
139
160
  end
140
-
161
+
141
162
  0
142
163
  end
143
-
164
+
144
165
  def ungetc(char)
145
- char = char.chr if char.respond_to? :chr
146
- @pos -= bytesize(char)
166
+ char = char.chr
167
+ @pos -= char.bytesize
147
168
  @pos = 0 if @pos < 0
148
169
  @buffer = char + @buffer
149
170
  end
150
-
171
+
151
172
  def gets(sep_string = $/)
152
-
153
173
  return nil if eof?
154
174
  return read if sep_string.nil?
155
-
175
+
156
176
  paragraph_mode = sep_string == ''
157
177
  sep_string = "\n\n" if paragraph_mode
158
178
  sep_string = sep_string.to_s unless sep_string.is_a? String
159
-
179
+
160
180
  if paragraph_mode
161
181
  # consume any leading newlines
162
182
  char = getc
@@ -167,12 +187,12 @@ class FilterIO
167
187
  return nil # nothing left except newlines, bail out
168
188
  end
169
189
  end
170
-
190
+
171
191
  # fill the buffer until it contains the separator sequence
172
192
  until source_eof? or @buffer.index(sep_string)
173
193
  buffer_data @options[:block_size]
174
194
  end
175
-
195
+
176
196
  # calculate how much of the buffer to return
177
197
  length = if idx = @buffer.index(sep_string)
178
198
  # we found the separator, include it in our output
@@ -181,18 +201,18 @@ class FilterIO
181
201
  # no separator found (must be EOF). return everything we've got
182
202
  length = @buffer.size
183
203
  end
184
-
204
+
185
205
  # increment the position and return the buffer fragment
186
206
  data = @buffer.slice!(0, length)
187
- @pos += bytesize(data)
188
-
207
+ @pos += data.bytesize
208
+
189
209
  data
190
210
  end
191
-
211
+
192
212
  def readline(sep_string = $/)
193
213
  gets(sep_string) or raise EOFError, 'end of file reached'
194
214
  end
195
-
215
+
196
216
  def each_line(sep_string = $/)
197
217
  unless block_given?
198
218
  klass = defined?(Enumerator) ? Enumerator : Enumerable::Enumerator
@@ -205,64 +225,55 @@ class FilterIO
205
225
  end
206
226
  alias :each :each_line
207
227
  alias :lines :each_line
208
-
228
+
209
229
  def readlines(sep_string = $/)
210
230
  lines = []
211
231
  each_line(sep_string) { |line| lines << line }
212
232
  lines
213
233
  end
214
-
234
+
215
235
  protected
216
-
236
+
217
237
  def empty_string
218
238
  str = String.new
219
- if @io.respond_to?(:internal_encoding)
220
- str.force_encoding @io.internal_encoding || @io.external_encoding
239
+ if internal_encoding || external_encoding
240
+ str.force_encoding internal_encoding || external_encoding
221
241
  end
222
242
  str
223
243
  end
224
-
244
+
225
245
  def empty_string_raw
226
246
  str = String.new
227
- if @io.respond_to?(:external_encoding)
228
- str.force_encoding @io.external_encoding
247
+ if external_encoding
248
+ str.force_encoding external_encoding
229
249
  end
230
250
  str
231
251
  end
232
-
233
- def bytesize(str)
234
- str.respond_to?(:bytesize) ? str.bytesize : str.size
235
- end
236
-
252
+
237
253
  def pop_bytes(count)
238
254
  data = begin
239
- if @io.respond_to?(:internal_encoding)
240
- @buffer.force_encoding 'ASCII-8BIT'
241
- end
255
+ org_encoding = @buffer.encoding
256
+ @buffer.force_encoding 'ASCII-8BIT'
242
257
  @buffer.slice!(0, count)
243
258
  ensure
244
- if @io.respond_to?(:internal_encoding)
245
- @buffer.force_encoding @io.internal_encoding || @io.external_encoding
246
- end
259
+ @buffer.force_encoding org_encoding
247
260
  end
248
261
  data
249
262
  end
250
-
263
+
251
264
  def buffer_data(block_size = nil)
252
-
253
265
  block_size ||= DEFAULT_BLOCK_SIZE
254
-
266
+
255
267
  data = unless @buffer_raw.empty?
256
- @buffer_raw.slice! 0, bytesize(@buffer_raw)
268
+ @buffer_raw.slice! 0, @buffer_raw.bytesize
257
269
  else
258
270
  @io.read(block_size) or return
259
271
  end
260
-
261
- initial_data_size = bytesize(data)
272
+
273
+ initial_data_size = data.bytesize
262
274
  begin
263
-
264
275
  data = process_data data, initial_data_size
265
-
276
+
266
277
  # if no processed data was returned and there is unprocessed data...
267
278
  if data.is_a?(Array) && data.size == 2 && data[0].size == 0 && data[1].size > 0
268
279
  # restore the unprocessed data into the temporary buffer
@@ -270,52 +281,45 @@ class FilterIO
270
281
  # and add some more data to the buffer
271
282
  raise NeedMoreData
272
283
  end
273
-
274
284
  rescue NeedMoreData => e
275
285
  raise EOFError, 'end of file reached' if @io.eof?
276
286
  data << @io.read(block_size)
277
287
  retry
278
288
  end
279
-
289
+
280
290
  data = [data] unless data.is_a? Array
281
291
  raise 'Block must have 1 or 2 values' unless data.size <= 2
282
- if @buffer.respond_to?(:encoding) && @buffer.encoding != data[0].encoding
292
+ if @buffer.encoding != data[0].encoding
283
293
  if [@buffer, data[0]].any? { |x| x.encoding.to_s == 'ASCII-8BIT' }
284
294
  data[0] = data[0].dup.force_encoding @buffer.encoding
285
295
  end
286
296
  end
287
297
  @buffer << data[0]
288
298
  if data[1]
289
- if @io.respond_to?(:internal_encoding) && @io.internal_encoding
290
- data[1].convert! @io.external_encoding
299
+ if internal_encoding
300
+ data[1].convert! external_encoding
291
301
  end
292
302
  @buffer_raw = data[1]
293
303
  end
294
-
295
304
  end
296
-
305
+
297
306
  def process_data(data, initial_data_size)
298
-
299
- if @io.respond_to? :external_encoding
300
- org_encoding = data.encoding
301
- data.force_encoding @io.external_encoding
302
- additional_data_size = bytesize(data) - initial_data_size
303
- unless data.valid_encoding? or source_eof? or additional_data_size >= 4
304
- data.force_encoding org_encoding
305
- raise NeedMoreData
306
- end
307
- data.encode! @io.internal_encoding if @io.internal_encoding
307
+ org_encoding = data.encoding
308
+ data.force_encoding external_encoding if external_encoding
309
+ additional_data_size = data.bytesize - initial_data_size
310
+ unless data.valid_encoding? or source_eof? or additional_data_size >= 4
311
+ data.force_encoding org_encoding
312
+ raise NeedMoreData
308
313
  end
309
-
314
+ data.encode! internal_encoding if internal_encoding
315
+
310
316
  if data && @block
311
- state = BlockState.new @io.pos == data.length, source_eof?
312
- args = [data.dup, state]
313
- args = args.first(@block.arity > 0 ? @block.arity : 1)
317
+ args = [data.dup]
318
+ args << BlockState.new(@io.pos == data.length, source_eof?) if @block.arity > 1
314
319
  data = @block.call(*args)
315
320
  raise IOError, 'Block returned nil' if data.nil?
316
321
  end
317
-
322
+
318
323
  data
319
324
  end
320
-
321
325
  end