filter_io 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -1,4 +1,4 @@
1
- *.gemspec
1
+ Gemfile.lock
2
2
  pkg
3
3
  coverage
4
4
  .bundle
data/Gemfile CHANGED
@@ -1,6 +1,2 @@
1
- source :gemcutter
2
-
3
- gem 'jeweler'
4
- gem 'activesupport', '>=2.3.9'
5
- gem 'i18n', '>=0.4.1'
6
- gem 'test-unit'
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/README.markdown CHANGED
@@ -11,37 +11,41 @@
11
11
 
12
12
  You can install the gem by running:
13
13
 
14
- gem install filter_io
14
+ ``` sh
15
+ gem install filter_io
16
+ ```
15
17
 
16
18
  ### Example Usage
17
19
 
18
20
  #### A Simple Example: ROT-13
19
21
 
20
- io = FilterIO.new io do |data|
21
- data.tr "A-Za-z", "N-ZA-Mn-za-m"
22
- end
22
+ ``` ruby
23
+ io = FilterIO.new io do |data|
24
+ data.tr "A-Za-z", "N-ZA-Mn-za-m"
25
+ end
26
+ ```
23
27
 
24
28
  #### A Useful Example: Line Ending Normalisation
25
29
 
26
30
  A common usage of `filter_io` is to normalise line endings before parsing CSV data:
27
31
 
28
- # open source stream
29
- File.open(filename) do |io|
30
-
31
- # apply filter to stream
32
- io = FilterIO.new(io) do |data, state|
33
- # grab another chunk if the last character is a delimiter
34
- raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
35
- # normalise line endings to LF
36
- data.gsub /\r\n|\r|\n/, "\n"
37
- end
38
-
39
- # process resulting stream normally
40
- FasterCSV.parse(io) do |row|
41
- pp row
42
- end
43
-
44
- end
32
+ ``` ruby
33
+ # open source stream
34
+ File.open(filename) do |io|
35
+ # apply filter to stream
36
+ io = FilterIO.new(io) do |data, state|
37
+ # grab another chunk if the last character is a delimiter
38
+ raise FilterIO::NeedMoreData if data =~ /[\r\n]\z/ && !state.eof?
39
+ # normalise line endings to LF
40
+ data.gsub /\r\n|\r|\n/, "\n"
41
+ end
42
+
43
+ # process resulting stream normally
44
+ FasterCSV.parse(io) do |row|
45
+ pp row
46
+ end
47
+ end
48
+ ```
45
49
 
46
50
  ### Reference
47
51
 
@@ -66,16 +70,18 @@ If your block is unable to process the whole chunk of data immediately, it can r
66
70
 
67
71
  Here's an example which processes whole lines and prepends the line length to the beginning of each line.
68
72
 
69
- io = FilterIO.new io do |data, state|
70
- output = ''
71
- # grab complete lines until we hit EOF
72
- while data =~ /(.*)\n/ || (state.eof? && data =~ /(.+)/)
73
- output << "#{$1.size} #{$1}\n"
74
- data = $'
75
- end
76
- # `output` contains the processed lines, `data` contains any left over partial line
77
- [output, data]
78
- end
73
+ ``` ruby
74
+ io = FilterIO.new io do |data, state|
75
+ output = ''
76
+ # grab complete lines until we hit EOF
77
+ while data =~ /(.*)\n/ || (state.eof? && data =~ /(.+)/)
78
+ output << "#{$1.size} #{$1}\n"
79
+ data = $'
80
+ end
81
+ # `output` contains the processed lines, `data` contains any left over partial line
82
+ [output, data]
83
+ end
84
+ ```
79
85
 
80
86
  #### Block Size
81
87
 
data/Rakefile CHANGED
@@ -1,54 +1,6 @@
1
- require 'rubygems'
2
1
  require 'rake'
3
- require 'rake/testtask'
2
+ require 'bundler/gem_tasks'
3
+ require 'rspec/core/rake_task'
4
4
 
5
- desc 'Default: run unit tests.'
6
- task :default => :test
7
-
8
- desc 'Test the filter_io plugin.'
9
- Rake::TestTask.new(:test) do |t|
10
- t.libs << 'lib'
11
- t.libs << 'test'
12
- t.pattern = 'test/**/*_test.rb'
13
- t.verbose = true
14
- end
15
-
16
- task :test => :check_dependencies
17
-
18
- begin
19
- require 'jeweler'
20
- Jeweler::Tasks.new do |gem|
21
- gem.name = "filter_io"
22
- gem.summary = "Filter IO streams with a block. Ruby's FilterInputStream."
23
- gem.email = "jason@jasoncodes.com"
24
- gem.homepage = "http://github.com/jasoncodes/filter_io"
25
- gem.authors = ["Jason Weathered"]
26
- gem.has_rdoc = false
27
- gem.add_dependency 'activesupport'
28
- end
29
- Jeweler::GemcutterTasks.new
30
- rescue LoadError
31
- puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
32
- end
33
-
34
- begin
35
- require 'rcov/rcovtask'
36
- Rcov::RcovTask.new do |t|
37
- t.libs << "test"
38
- t.rcov_opts = [
39
- "--exclude '^(?!lib)'"
40
- ]
41
- t.test_files = FileList[
42
- 'test/**/*_test.rb'
43
- ]
44
- t.output_dir = 'coverage'
45
- t.verbose = true
46
- end
47
- task :rcov do
48
- system "open coverage/index.html"
49
- end
50
- rescue LoadError
51
- task :rcov do
52
- raise "You must install the 'rcov' gem"
53
- end
54
- end
5
+ RSpec::Core::RakeTask.new(:spec)
6
+ task :default => :spec
data/filter_io.gemspec ADDED
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'filter_io/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = %q{filter_io}
8
+ spec.version = FilterIO::VERSION
9
+ spec.authors = ['Jason Weathered']
10
+ spec.email = ['jason@jasoncodes.com']
11
+ spec.summary = %q{Filter IO streams with a block. Ruby's FilterInputStream.}
12
+ spec.homepage = 'http://github.com/jasoncodes/filter_io'
13
+ spec.license = 'MIT'
14
+
15
+ spec.files = `git ls-files`.split($/)
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ['lib']
19
+
20
+ spec.add_dependency 'activesupport', '>= 2.3.9'
21
+
22
+ spec.add_development_dependency 'bundler', '~> 1.3'
23
+ spec.add_development_dependency 'rake'
24
+ spec.add_development_dependency 'simplecov'
25
+ spec.add_development_dependency 'rspec', '~> 2.13'
26
+ end
@@ -0,0 +1,3 @@
1
+ class FilterIO
2
+ VERSION = '0.2.0'
3
+ end
data/lib/filter_io.rb CHANGED
@@ -4,12 +4,11 @@ require 'active_support/core_ext/array'
4
4
  require 'active_support/core_ext/hash'
5
5
 
6
6
  class FilterIO
7
-
8
7
  DEFAULT_BLOCK_SIZE = 1024
9
-
8
+
10
9
  class NeedMoreData < Exception
11
10
  end
12
-
11
+
13
12
  class BlockState
14
13
  attr_reader :bof, :eof
15
14
  def initialize(bof, eof)
@@ -19,7 +18,7 @@ class FilterIO
19
18
  alias_method :bof?, :bof
20
19
  alias_method :eof?, :eof
21
20
  end
22
-
21
+
23
22
  def initialize(io, options = nil, &block)
24
23
  @io = io
25
24
  @options = options || {}
@@ -29,75 +28,99 @@ class FilterIO
29
28
  @buffer_raw = empty_string_raw
30
29
  @options.assert_valid_keys :block_size
31
30
  end
32
-
31
+
33
32
  def pos
34
33
  @pos
35
34
  end
36
-
35
+
37
36
  def bof?
38
37
  @pos == 0
39
38
  end
40
-
39
+
41
40
  def eof?
42
41
  @buffer.empty? && source_eof?
43
42
  end
44
-
43
+
45
44
  def source_eof?
46
45
  @buffer_raw.empty? && @io.eof?
47
46
  end
48
-
47
+
49
48
  def close
50
49
  @io.close
51
50
  end
52
-
51
+
53
52
  def closed?
54
53
  @io.closed?
55
54
  end
56
-
57
- def readchar
58
- raise EOFError, 'end of file reached' if eof?
59
- if @io.respond_to? :external_encoding
60
- data = empty_string_raw
61
- begin
62
- data << read(1).force_encoding(@io.internal_encoding || @io.external_encoding)
63
- end until data.valid_encoding? or source_eof?
64
- data.encode! @io.internal_encoding if @io.internal_encoding
65
- data
55
+
56
+ def default_encoding
57
+ unless @default_encoding
58
+ c = @io.getc
59
+ @io.ungetc c
60
+ @default_encoding = c.encoding
61
+ end
62
+ @default_encoding
63
+ end
64
+
65
+ def internal_encoding
66
+ if @io.respond_to?(:internal_encoding)
67
+ @io.internal_encoding
66
68
  else
67
- read(1).ord
69
+ default_encoding
68
70
  end
69
71
  end
70
-
72
+
73
+ def external_encoding
74
+ if @io.respond_to?(:external_encoding)
75
+ @io.external_encoding
76
+ else
77
+ default_encoding
78
+ end
79
+ end
80
+
81
+ def readchar
82
+ raise EOFError, 'end of file reached' if eof?
83
+ data = empty_string_raw
84
+ begin
85
+ byte = read(1)
86
+ if internal_encoding || external_encoding
87
+ byte.force_encoding internal_encoding || external_encoding
88
+ end
89
+ data << byte
90
+ end until data.valid_encoding? or source_eof?
91
+ data.encode! internal_encoding if internal_encoding
92
+ data
93
+ end
94
+
71
95
  def getc
72
96
  readchar
73
97
  rescue EOFError
74
98
  nil
75
99
  end
76
-
100
+
77
101
  def read(length = nil)
78
-
79
102
  raise ArgumentError if length && length < 0
80
103
  return '' if length == 0
81
-
104
+
82
105
  # fill the buffer up to the fill level (or whole input if length is nil)
83
- while !source_eof? && (length.nil? || length > bytesize(@buffer))
106
+ while !source_eof? && (length.nil? || length > @buffer.bytesize)
84
107
  buffer_data @options[:block_size] || length
85
108
  end
86
-
109
+
87
110
  # we now have all the data in the buffer that we need (or can get if EOF)
88
111
  case
89
- when bytesize(@buffer) > 0
112
+ when @buffer.bytesize > 0
90
113
  # limit length to the buffer size if we were asked for it all or have ran out (EOF)
91
- read_length = if length.nil? or length > bytesize(@buffer)
92
- bytesize @buffer
114
+ read_length = if length.nil? or length > @buffer.bytesize
115
+ @buffer.bytesize
93
116
  else
94
117
  length
95
118
  end
96
119
  data = pop_bytes read_length
97
- @pos += bytesize(data)
98
- if length.nil? && @io.respond_to?(:external_encoding)
99
- data.force_encoding @io.external_encoding
100
- data.encode! @io.internal_encoding if @io.internal_encoding
120
+ @pos += data.bytesize
121
+ if length.nil?
122
+ data.force_encoding external_encoding if external_encoding
123
+ data.encode! internal_encoding if internal_encoding
101
124
  end
102
125
  data
103
126
  when source_eof?
@@ -106,15 +129,13 @@ class FilterIO
106
129
  else
107
130
  raise IOError, 'Read error'
108
131
  end
109
-
110
132
  end
111
-
133
+
112
134
  def rewind
113
135
  seek 0, IO::SEEK_SET
114
136
  end
115
-
137
+
116
138
  def seek(offset, whence = IO::SEEK_SET)
117
-
118
139
  new_pos = case whence
119
140
  when IO::SEEK_SET
120
141
  offset
@@ -125,7 +146,7 @@ class FilterIO
125
146
  else
126
147
  raise Errno::EINVAL
127
148
  end
128
-
149
+
129
150
  case new_pos
130
151
  when pos
131
152
  # noop
@@ -137,26 +158,25 @@ class FilterIO
137
158
  else
138
159
  raise Errno::EINVAL, 'Random seek not supported'
139
160
  end
140
-
161
+
141
162
  0
142
163
  end
143
-
164
+
144
165
  def ungetc(char)
145
- char = char.chr if char.respond_to? :chr
146
- @pos -= bytesize(char)
166
+ char = char.chr
167
+ @pos -= char.bytesize
147
168
  @pos = 0 if @pos < 0
148
169
  @buffer = char + @buffer
149
170
  end
150
-
171
+
151
172
  def gets(sep_string = $/)
152
-
153
173
  return nil if eof?
154
174
  return read if sep_string.nil?
155
-
175
+
156
176
  paragraph_mode = sep_string == ''
157
177
  sep_string = "\n\n" if paragraph_mode
158
178
  sep_string = sep_string.to_s unless sep_string.is_a? String
159
-
179
+
160
180
  if paragraph_mode
161
181
  # consume any leading newlines
162
182
  char = getc
@@ -167,12 +187,12 @@ class FilterIO
167
187
  return nil # nothing left except newlines, bail out
168
188
  end
169
189
  end
170
-
190
+
171
191
  # fill the buffer until it contains the separator sequence
172
192
  until source_eof? or @buffer.index(sep_string)
173
193
  buffer_data @options[:block_size]
174
194
  end
175
-
195
+
176
196
  # calculate how much of the buffer to return
177
197
  length = if idx = @buffer.index(sep_string)
178
198
  # we found the separator, include it in our output
@@ -181,18 +201,18 @@ class FilterIO
181
201
  # no separator found (must be EOF). return everything we've got
182
202
  length = @buffer.size
183
203
  end
184
-
204
+
185
205
  # increment the position and return the buffer fragment
186
206
  data = @buffer.slice!(0, length)
187
- @pos += bytesize(data)
188
-
207
+ @pos += data.bytesize
208
+
189
209
  data
190
210
  end
191
-
211
+
192
212
  def readline(sep_string = $/)
193
213
  gets(sep_string) or raise EOFError, 'end of file reached'
194
214
  end
195
-
215
+
196
216
  def each_line(sep_string = $/)
197
217
  unless block_given?
198
218
  klass = defined?(Enumerator) ? Enumerator : Enumerable::Enumerator
@@ -205,64 +225,55 @@ class FilterIO
205
225
  end
206
226
  alias :each :each_line
207
227
  alias :lines :each_line
208
-
228
+
209
229
  def readlines(sep_string = $/)
210
230
  lines = []
211
231
  each_line(sep_string) { |line| lines << line }
212
232
  lines
213
233
  end
214
-
234
+
215
235
  protected
216
-
236
+
217
237
  def empty_string
218
238
  str = String.new
219
- if @io.respond_to?(:internal_encoding)
220
- str.force_encoding @io.internal_encoding || @io.external_encoding
239
+ if internal_encoding || external_encoding
240
+ str.force_encoding internal_encoding || external_encoding
221
241
  end
222
242
  str
223
243
  end
224
-
244
+
225
245
  def empty_string_raw
226
246
  str = String.new
227
- if @io.respond_to?(:external_encoding)
228
- str.force_encoding @io.external_encoding
247
+ if external_encoding
248
+ str.force_encoding external_encoding
229
249
  end
230
250
  str
231
251
  end
232
-
233
- def bytesize(str)
234
- str.respond_to?(:bytesize) ? str.bytesize : str.size
235
- end
236
-
252
+
237
253
  def pop_bytes(count)
238
254
  data = begin
239
- if @io.respond_to?(:internal_encoding)
240
- @buffer.force_encoding 'ASCII-8BIT'
241
- end
255
+ org_encoding = @buffer.encoding
256
+ @buffer.force_encoding 'ASCII-8BIT'
242
257
  @buffer.slice!(0, count)
243
258
  ensure
244
- if @io.respond_to?(:internal_encoding)
245
- @buffer.force_encoding @io.internal_encoding || @io.external_encoding
246
- end
259
+ @buffer.force_encoding org_encoding
247
260
  end
248
261
  data
249
262
  end
250
-
263
+
251
264
  def buffer_data(block_size = nil)
252
-
253
265
  block_size ||= DEFAULT_BLOCK_SIZE
254
-
266
+
255
267
  data = unless @buffer_raw.empty?
256
- @buffer_raw.slice! 0, bytesize(@buffer_raw)
268
+ @buffer_raw.slice! 0, @buffer_raw.bytesize
257
269
  else
258
270
  @io.read(block_size) or return
259
271
  end
260
-
261
- initial_data_size = bytesize(data)
272
+
273
+ initial_data_size = data.bytesize
262
274
  begin
263
-
264
275
  data = process_data data, initial_data_size
265
-
276
+
266
277
  # if no processed data was returned and there is unprocessed data...
267
278
  if data.is_a?(Array) && data.size == 2 && data[0].size == 0 && data[1].size > 0
268
279
  # restore the unprocessed data into the temporary buffer
@@ -270,52 +281,45 @@ class FilterIO
270
281
  # and add some more data to the buffer
271
282
  raise NeedMoreData
272
283
  end
273
-
274
284
  rescue NeedMoreData => e
275
285
  raise EOFError, 'end of file reached' if @io.eof?
276
286
  data << @io.read(block_size)
277
287
  retry
278
288
  end
279
-
289
+
280
290
  data = [data] unless data.is_a? Array
281
291
  raise 'Block must have 1 or 2 values' unless data.size <= 2
282
- if @buffer.respond_to?(:encoding) && @buffer.encoding != data[0].encoding
292
+ if @buffer.encoding != data[0].encoding
283
293
  if [@buffer, data[0]].any? { |x| x.encoding.to_s == 'ASCII-8BIT' }
284
294
  data[0] = data[0].dup.force_encoding @buffer.encoding
285
295
  end
286
296
  end
287
297
  @buffer << data[0]
288
298
  if data[1]
289
- if @io.respond_to?(:internal_encoding) && @io.internal_encoding
290
- data[1].convert! @io.external_encoding
299
+ if internal_encoding
300
+ data[1].convert! external_encoding
291
301
  end
292
302
  @buffer_raw = data[1]
293
303
  end
294
-
295
304
  end
296
-
305
+
297
306
  def process_data(data, initial_data_size)
298
-
299
- if @io.respond_to? :external_encoding
300
- org_encoding = data.encoding
301
- data.force_encoding @io.external_encoding
302
- additional_data_size = bytesize(data) - initial_data_size
303
- unless data.valid_encoding? or source_eof? or additional_data_size >= 4
304
- data.force_encoding org_encoding
305
- raise NeedMoreData
306
- end
307
- data.encode! @io.internal_encoding if @io.internal_encoding
307
+ org_encoding = data.encoding
308
+ data.force_encoding external_encoding if external_encoding
309
+ additional_data_size = data.bytesize - initial_data_size
310
+ unless data.valid_encoding? or source_eof? or additional_data_size >= 4
311
+ data.force_encoding org_encoding
312
+ raise NeedMoreData
308
313
  end
309
-
314
+ data.encode! internal_encoding if internal_encoding
315
+
310
316
  if data && @block
311
- state = BlockState.new @io.pos == data.length, source_eof?
312
- args = [data.dup, state]
313
- args = args.first(@block.arity > 0 ? @block.arity : 1)
317
+ args = [data.dup]
318
+ args << BlockState.new(@io.pos == data.length, source_eof?) if @block.arity > 1
314
319
  data = @block.call(*args)
315
320
  raise IOError, 'Block returned nil' if data.nil?
316
321
  end
317
-
322
+
318
323
  data
319
324
  end
320
-
321
325
  end