buftok 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 67c39aeda72dd14dc738490d14c121acd8591050057f488517c0a92039187179
4
+ data.tar.gz: dd6f4e0460ac0c2c076d9d4d05e91bdd29a2667167872f2850127b3ed7f72118
5
+ SHA512:
6
+ metadata.gz: 9a2db2dffe2660fcb5ec89e813b8a953cdbdebd530184be5bf88f35dbf7bfd06dd15e5441b6e4316b6a23a83458d43df90212f7a30f9a2d25de5b49609ff6857
7
+ data.tar.gz: c8312db37a322e718163142e1246f909eca8e50f07bde78b706d46b327c035bf2c4d3e7ebb59d10b7cdeb5dbac29dcf7b9b8d1cad2ab5e7702f65ebfde315d79
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 Tony Arcieri, Martin Emde, Erik Michaels-Ober
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md CHANGED
@@ -1,14 +1,10 @@
1
1
  # BufferedTokenizer
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/buftok.png)][gem]
4
- [![Build Status](https://travis-ci.org/sferik/buftok.png?branch=master)][travis]
5
- [![Dependency Status](https://gemnasium.com/sferik/buftok.png?travis)][gemnasium]
6
- [![Code Climate](https://codeclimate.com/github/sferik/buftok.png)][codeclimate]
3
+ [![Gem Version](http://img.shields.io/gem/v/buftok.svg)][gem]
4
+ [![Build Status](https://github.com/sferik/buftok/actions/workflows/ruby.yml/badge.svg)][build]
7
5
 
8
6
  [gem]: https://rubygems.org/gems/buftok
9
- [travis]: https://travis-ci.org/sferik/buftok
10
- [gemnasium]: https://gemnasium.com/sferik/buftok
11
- [codeclimate]: https://codeclimate.com/github/sferik/buftok
7
+ [build]: https://github.com/sferik/buftok/actions
12
8
 
13
9
  ###### Statefully split input data by a specifiable token
14
10
 
@@ -21,19 +17,18 @@ something like [EventMachine][].
21
17
  [EventMachine]: http://rubyeventmachine.com/
22
18
 
23
19
  ## Supported Ruby Versions
24
- This library aims to support and is [tested against][travis] the following Ruby
20
+ This library aims to support and is [tested against][build] the following Ruby
25
21
  implementations:
26
22
 
27
- * Ruby 1.8.7
28
- * Ruby 1.9.2
29
- * Ruby 1.9.3
30
- * Ruby 2.0.0
23
+ * Ruby 2.6
24
+ * Ruby 2.7
25
+ * Ruby 3.0
31
26
 
32
27
  If something doesn't work on one of these interpreters, it's a bug.
33
28
 
34
- This library may inadvertently work (or seem to work) on other Ruby
35
- implementations, however support will only be provided for the versions listed
36
- above.
29
+ This code will likely still work on older versions since it has not undergone
30
+ many changes since release. However, support will not be provided for
31
+ end-of-life ruby versions.
37
32
 
38
33
  If you would like this library to support another Ruby version, you may
39
34
  volunteer to be a maintainer. Being a maintainer entails making sure all tests
@@ -43,6 +38,7 @@ fashion. If critical issues for a particular implementation exist at the time
43
38
  of a major release, support for that Ruby version may be dropped.
44
39
 
45
40
  ## Copyright
46
- Copyright (c) 2006-2013 Tony Arcieri, Martin Emde, Erik Michaels-Ober.
47
- Distributed under the [Ruby license][license].
48
- [license]: http://www.ruby-lang.org/en/LICENSE.txt
41
+ Copyright (c) 2006-2021 Tony Arcieri, Martin Emde, Erik Michaels-Ober.
42
+ Distributed under the [MIT license][license].
43
+
44
+ [license]: https://opensource.org/licenses/MIT
data/buftok.gemspec CHANGED
@@ -1,17 +1,19 @@
1
1
  Gem::Specification.new do |spec|
2
- spec.add_development_dependency 'bundler', '~> 1.0'
2
+ spec.version = "0.3.0"
3
+
3
4
  spec.authors = ["Tony Arcieri", "Martin Emde", "Erik Michaels-Ober"]
4
- spec.description = %q{BufferedTokenizer extracts token delimited entities from a sequence of arbitrary inputs}
5
- spec.email = "sferik@gmail.com"
6
- spec.files = %w(CONTRIBUTING.md Gemfile LICENSE.md README.md Rakefile buftok.gemspec)
7
- spec.files += Dir.glob("lib/**/*.rb")
8
- spec.files += Dir.glob("test/**/*.rb")
9
- spec.test_files = spec.files.grep(%r{^test/})
5
+ spec.summary = %q{BufferedTokenizer extracts token delimited entities from a sequence of string inputs}
6
+ spec.description = spec.summary
7
+ spec.email = ["sferik@gmail.com", "martin.emde@gmail.com"]
8
+ spec.files = %w(CONTRIBUTING.md LICENSE.txt README.md buftok.gemspec) + Dir["lib/**/*.rb"]
10
9
  spec.homepage = "https://github.com/sferik/buftok"
11
- spec.licenses = ['MIT']
10
+ spec.licenses = ["MIT"]
12
11
  spec.name = "buftok"
13
12
  spec.require_paths = ["lib"]
14
- spec.required_rubygems_version = '>= 1.3.5'
15
- spec.summary = spec.description
16
- spec.version = "0.2.0"
13
+ spec.required_rubygems_version = ">= 1.3.5"
14
+
15
+ spec.add_development_dependency "bundler", ">= 1.17"
16
+ spec.add_development_dependency "rake", "~> 10.0"
17
+ spec.add_development_dependency "rdoc"
18
+ spec.add_development_dependency "test-unit"
17
19
  end
data/lib/buftok.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+ #
1
3
  # BufferedTokenizer takes a delimiter upon instantiation, or acts line-based
2
4
  # by default. It allows input to be spoon-fed from some outside source which
3
5
  # receives arbitrary length datagrams which may-or-may-not contain the token
@@ -15,10 +17,18 @@ class BufferedTokenizer
15
17
  def initialize(delimiter = $/)
16
18
  @delimiter = delimiter
17
19
  @input = []
18
- @tail = ''
20
+ @tail = String.new
19
21
  @trim = @delimiter.length - 1
20
22
  end
21
23
 
24
+ # Determine the size of the internal buffer.
25
+ #
26
+ # Size is not cached and is determined every time this method is called
27
+ # in order to optimize throughput for extract.
28
+ def size
29
+ @tail.length + @input.inject(0) { |total, input| total + input.length }
30
+ end
31
+
22
32
  # Extract takes an arbitrary string of input data and returns an array of
23
33
  # tokenized entities, provided there were any available to extract. This
24
34
  # makes for easy processing of datagrams using a pattern like:
@@ -53,7 +63,10 @@ class BufferedTokenizer
53
63
  @input << @tail
54
64
  buffer = @input.join
55
65
  @input.clear
56
- @tail = "" # @tail.clear is slightly faster, but not supported on 1.8.7
66
+ @tail = String.new # @tail.clear is slightly faster, but not supported on 1.8.7
57
67
  buffer
58
68
  end
59
69
  end
70
+
71
+ # The expected constant for a gem named buftok
72
+ Buftok = BufferedTokenizer
metadata CHANGED
@@ -1,75 +1,109 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: buftok
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
5
- prerelease:
4
+ version: 0.3.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Tony Arcieri
9
8
  - Martin Emde
10
9
  - Erik Michaels-Ober
11
- autorequire:
10
+ autorequire:
12
11
  bindir: bin
13
12
  cert_chain: []
14
- date: 2013-11-22 00:00:00.000000000 Z
13
+ date: 2021-03-25 00:00:00.000000000 Z
15
14
  dependencies:
16
15
  - !ruby/object:Gem::Dependency
17
16
  name: bundler
18
17
  requirement: !ruby/object:Gem::Requirement
19
- none: false
20
18
  requirements:
21
- - - ~>
19
+ - - ">="
22
20
  - !ruby/object:Gem::Version
23
- version: '1.0'
21
+ version: '1.17'
24
22
  type: :development
25
23
  prerelease: false
26
24
  version_requirements: !ruby/object:Gem::Requirement
27
- none: false
28
25
  requirements:
29
- - - ~>
26
+ - - ">="
30
27
  - !ruby/object:Gem::Version
31
- version: '1.0'
28
+ version: '1.17'
29
+ - !ruby/object:Gem::Dependency
30
+ name: rake
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - "~>"
34
+ - !ruby/object:Gem::Version
35
+ version: '10.0'
36
+ type: :development
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - "~>"
41
+ - !ruby/object:Gem::Version
42
+ version: '10.0'
43
+ - !ruby/object:Gem::Dependency
44
+ name: rdoc
45
+ requirement: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ type: :development
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ - !ruby/object:Gem::Dependency
58
+ name: test-unit
59
+ requirement: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ type: :development
65
+ prerelease: false
66
+ version_requirements: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
32
71
  description: BufferedTokenizer extracts token delimited entities from a sequence of
33
- arbitrary inputs
34
- email: sferik@gmail.com
72
+ string inputs
73
+ email:
74
+ - sferik@gmail.com
75
+ - martin.emde@gmail.com
35
76
  executables: []
36
77
  extensions: []
37
78
  extra_rdoc_files: []
38
79
  files:
39
80
  - CONTRIBUTING.md
40
- - Gemfile
41
- - LICENSE.md
81
+ - LICENSE.txt
42
82
  - README.md
43
- - Rakefile
44
83
  - buftok.gemspec
45
84
  - lib/buftok.rb
46
- - test/test_buftok.rb
47
85
  homepage: https://github.com/sferik/buftok
48
86
  licenses:
49
87
  - MIT
50
- post_install_message:
88
+ metadata: {}
89
+ post_install_message:
51
90
  rdoc_options: []
52
91
  require_paths:
53
92
  - lib
54
93
  required_ruby_version: !ruby/object:Gem::Requirement
55
- none: false
56
94
  requirements:
57
- - - ! '>='
95
+ - - ">="
58
96
  - !ruby/object:Gem::Version
59
97
  version: '0'
60
98
  required_rubygems_version: !ruby/object:Gem::Requirement
61
- none: false
62
99
  requirements:
63
- - - ! '>='
100
+ - - ">="
64
101
  - !ruby/object:Gem::Version
65
102
  version: 1.3.5
66
103
  requirements: []
67
- rubyforge_project:
68
- rubygems_version: 1.8.23
69
- signing_key:
70
- specification_version: 3
71
- summary: BufferedTokenizer extracts token delimited entities from a sequence of arbitrary
104
+ rubygems_version: 3.2.3
105
+ signing_key:
106
+ specification_version: 4
107
+ summary: BufferedTokenizer extracts token delimited entities from a sequence of string
72
108
  inputs
73
- test_files:
74
- - test/test_buftok.rb
75
- has_rdoc:
109
+ test_files: []
data/Gemfile DELETED
@@ -1,6 +0,0 @@
1
- source 'https://rubygems.org'
2
-
3
- gem 'rake'
4
- gem 'rdoc'
5
-
6
- gemspec
data/LICENSE.md DELETED
@@ -1,56 +0,0 @@
1
- Ruby is copyrighted free software by Yukihiro Matsumoto <matz@netlab.jp>.
2
- You can redistribute it and/or modify it under either the terms of the
3
- 2-clause BSDL (see the file BSDL), or the conditions below:
4
-
5
- 1. You may make and give away verbatim copies of the source form of the
6
- software without restriction, provided that you duplicate all of the
7
- original copyright notices and associated disclaimers.
8
-
9
- 2. You may modify your copy of the software in any way, provided that
10
- you do at least ONE of the following:
11
-
12
- a) place your modifications in the Public Domain or otherwise
13
- make them Freely Available, such as by posting said
14
- modifications to Usenet or an equivalent medium, or by allowing
15
- the author to include your modifications in the software.
16
-
17
- b) use the modified software only within your corporation or
18
- organization.
19
-
20
- c) give non-standard binaries non-standard names, with
21
- instructions on where to get the original software distribution.
22
-
23
- d) make other distribution arrangements with the author.
24
-
25
- 3. You may distribute the software in object code or binary form,
26
- provided that you do at least ONE of the following:
27
-
28
- a) distribute the binaries and library files of the software,
29
- together with instructions (in the manual page or equivalent)
30
- on where to get the original distribution.
31
-
32
- b) accompany the distribution with the machine-readable source of
33
- the software.
34
-
35
- c) give non-standard binaries non-standard names, with
36
- instructions on where to get the original software distribution.
37
-
38
- d) make other distribution arrangements with the author.
39
-
40
- 4. You may modify and include the part of the software into any other
41
- software (possibly commercial). But some files in the distribution
42
- are not written by the author, so that they are not under these terms.
43
-
44
- For the list of those files and their copying conditions, see the
45
- file LEGAL.
46
-
47
- 5. The scripts and library files supplied as input to or produced as
48
- output from the software do not automatically fall under the
49
- copyright of the software, but belong to whomever generated them,
50
- and may be sold commercially, and may be aggregated with this
51
- software.
52
-
53
- 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
54
- IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
55
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56
- PURPOSE.
data/Rakefile DELETED
@@ -1,66 +0,0 @@
1
- require 'bundler'
2
- require 'rdoc/task'
3
- require 'rake/testtask'
4
-
5
- task :default => :test
6
-
7
- Bundler::GemHelper.install_tasks
8
-
9
- RDoc::Task.new do |task|
10
- task.rdoc_dir = 'doc'
11
- task.title = 'BufferedTokenizer'
12
- task.rdoc_files.include('lib/**/*.rb')
13
- end
14
-
15
- Rake::TestTask.new :test do |t|
16
- t.libs << 'lib'
17
- t.test_files = FileList['test/**/*.rb']
18
- end
19
-
20
- desc "Benchmark the current implementation"
21
- task :bench do
22
- require 'benchmark'
23
- require File.expand_path('lib/buftok', File.dirname(__FILE__))
24
-
25
- n = 50000
26
- delimiter = "\n\n"
27
-
28
- frequency1 = 1000
29
- puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency1} strings..."
30
- data1 = (0...n).map do |i|
31
- (((i % frequency1 == 1) ? "\n" : "") +
32
- ("s" * i) +
33
- ((i % frequency1 == 0) ? "\n" : "")).freeze
34
- end
35
-
36
- frequency2 = 10
37
- puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency2} strings..."
38
- data2 = (0...n).map do |i|
39
- (((i % frequency2 == 1) ? "\n" : "") +
40
- ("s" * i) +
41
- ((i % frequency2 == 0) ? "\n" : "")).freeze
42
- end
43
-
44
- Benchmark.bmbm do |x|
45
- x.report("1 char, freq: #{frequency1}") do
46
- bt1 = BufferedTokenizer.new
47
- n.times { |i| bt1.extract(data1[i]) }
48
- end
49
-
50
- x.report("2 char, freq: #{frequency1}") do
51
- bt2 = BufferedTokenizer.new(delimiter)
52
- n.times { |i| bt2.extract(data1[i]) }
53
- end
54
-
55
- x.report("1 char, freq: #{frequency2}") do
56
- bt3 = BufferedTokenizer.new
57
- n.times { |i| bt3.extract(data2[i]) }
58
- end
59
-
60
- x.report("2 char, freq: #{frequency2}") do
61
- bt4 = BufferedTokenizer.new(delimiter)
62
- n.times { |i| bt4.extract(data2[i]) }
63
- end
64
-
65
- end
66
- end
data/test/test_buftok.rb DELETED
@@ -1,27 +0,0 @@
1
- require 'test/unit'
2
- require 'buftok'
3
-
4
- class TestBuftok < Test::Unit::TestCase
5
- def test_buftok
6
- tokenizer = BufferedTokenizer.new
7
- assert_equal %w[foo], tokenizer.extract("foo\nbar".freeze)
8
- assert_equal %w[barbaz qux], tokenizer.extract("baz\nqux\nquu".freeze)
9
- assert_equal 'quu', tokenizer.flush
10
- assert_equal '', tokenizer.flush
11
- end
12
-
13
- def test_delimiter
14
- tokenizer = BufferedTokenizer.new('<>')
15
- assert_equal ['', "foo\n"], tokenizer.extract("<>foo\n<>".freeze)
16
- assert_equal %w[bar], tokenizer.extract('bar<>baz'.freeze)
17
- assert_equal 'baz', tokenizer.flush
18
- end
19
-
20
- def test_split_delimiter
21
- tokenizer = BufferedTokenizer.new('<>'.freeze)
22
- assert_equal [], tokenizer.extract('foo<'.freeze)
23
- assert_equal %w[foo], tokenizer.extract('>bar<'.freeze)
24
- assert_equal %w[bar<baz qux], tokenizer.extract('baz<>qux<>'.freeze)
25
- assert_equal '', tokenizer.flush
26
- end
27
- end