buftok 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 67c39aeda72dd14dc738490d14c121acd8591050057f488517c0a92039187179
4
+ data.tar.gz: dd6f4e0460ac0c2c076d9d4d05e91bdd29a2667167872f2850127b3ed7f72118
5
+ SHA512:
6
+ metadata.gz: 9a2db2dffe2660fcb5ec89e813b8a953cdbdebd530184be5bf88f35dbf7bfd06dd15e5441b6e4316b6a23a83458d43df90212f7a30f9a2d25de5b49609ff6857
7
+ data.tar.gz: c8312db37a322e718163142e1246f909eca8e50f07bde78b706d46b327c035bf2c4d3e7ebb59d10b7cdeb5dbac29dcf7b9b8d1cad2ab5e7702f65ebfde315d79
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 Tony Arcieri, Martin Emde, Erik Michaels-Ober
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md CHANGED
@@ -1,14 +1,10 @@
1
1
  # BufferedTokenizer
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/buftok.png)][gem]
4
- [![Build Status](https://travis-ci.org/sferik/buftok.png?branch=master)][travis]
5
- [![Dependency Status](https://gemnasium.com/sferik/buftok.png?travis)][gemnasium]
6
- [![Code Climate](https://codeclimate.com/github/sferik/buftok.png)][codeclimate]
3
+ [![Gem Version](http://img.shields.io/gem/v/buftok.svg)][gem]
4
+ [![Build Status](https://github.com/sferik/buftok/actions/workflows/ruby.yml/badge.svg)][build]
7
5
 
8
6
  [gem]: https://rubygems.org/gems/buftok
9
- [travis]: https://travis-ci.org/sferik/buftok
10
- [gemnasium]: https://gemnasium.com/sferik/buftok
11
- [codeclimate]: https://codeclimate.com/github/sferik/buftok
7
+ [build]: https://github.com/sferik/buftok/actions
12
8
 
13
9
  ###### Statefully split input data by a specifiable token
14
10
 
@@ -21,19 +17,18 @@ something like [EventMachine][].
21
17
  [EventMachine]: http://rubyeventmachine.com/
22
18
 
23
19
  ## Supported Ruby Versions
24
- This library aims to support and is [tested against][travis] the following Ruby
20
+ This library aims to support and is [tested against][build] the following Ruby
25
21
  implementations:
26
22
 
27
- * Ruby 1.8.7
28
- * Ruby 1.9.2
29
- * Ruby 1.9.3
30
- * Ruby 2.0.0
23
+ * Ruby 2.6
24
+ * Ruby 2.7
25
+ * Ruby 3.0
31
26
 
32
27
  If something doesn't work on one of these interpreters, it's a bug.
33
28
 
34
- This library may inadvertently work (or seem to work) on other Ruby
35
- implementations, however support will only be provided for the versions listed
36
- above.
29
+ This code will likely still work on older versions since it has not undergone
30
+ many changes since release. However, support will not be provided for
31
+ end-of-life ruby versions.
37
32
 
38
33
  If you would like this library to support another Ruby version, you may
39
34
  volunteer to be a maintainer. Being a maintainer entails making sure all tests
@@ -43,6 +38,7 @@ fashion. If critical issues for a particular implementation exist at the time
43
38
  of a major release, support for that Ruby version may be dropped.
44
39
 
45
40
  ## Copyright
46
- Copyright (c) 2006-2013 Tony Arcieri, Martin Emde, Erik Michaels-Ober.
47
- Distributed under the [Ruby license][license].
48
- [license]: http://www.ruby-lang.org/en/LICENSE.txt
41
+ Copyright (c) 2006-2021 Tony Arcieri, Martin Emde, Erik Michaels-Ober.
42
+ Distributed under the [MIT license][license].
43
+
44
+ [license]: https://opensource.org/licenses/MIT
data/buftok.gemspec CHANGED
@@ -1,17 +1,19 @@
1
1
  Gem::Specification.new do |spec|
2
- spec.add_development_dependency 'bundler', '~> 1.0'
2
+ spec.version = "0.3.0"
3
+
3
4
  spec.authors = ["Tony Arcieri", "Martin Emde", "Erik Michaels-Ober"]
4
- spec.description = %q{BufferedTokenizer extracts token delimited entities from a sequence of arbitrary inputs}
5
- spec.email = "sferik@gmail.com"
6
- spec.files = %w(CONTRIBUTING.md Gemfile LICENSE.md README.md Rakefile buftok.gemspec)
7
- spec.files += Dir.glob("lib/**/*.rb")
8
- spec.files += Dir.glob("test/**/*.rb")
9
- spec.test_files = spec.files.grep(%r{^test/})
5
+ spec.summary = %q{BufferedTokenizer extracts token delimited entities from a sequence of string inputs}
6
+ spec.description = spec.summary
7
+ spec.email = ["sferik@gmail.com", "martin.emde@gmail.com"]
8
+ spec.files = %w(CONTRIBUTING.md LICENSE.txt README.md buftok.gemspec) + Dir["lib/**/*.rb"]
10
9
  spec.homepage = "https://github.com/sferik/buftok"
11
- spec.licenses = ['MIT']
10
+ spec.licenses = ["MIT"]
12
11
  spec.name = "buftok"
13
12
  spec.require_paths = ["lib"]
14
- spec.required_rubygems_version = '>= 1.3.5'
15
- spec.summary = spec.description
16
- spec.version = "0.2.0"
13
+ spec.required_rubygems_version = ">= 1.3.5"
14
+
15
+ spec.add_development_dependency "bundler", ">= 1.17"
16
+ spec.add_development_dependency "rake", "~> 10.0"
17
+ spec.add_development_dependency "rdoc"
18
+ spec.add_development_dependency "test-unit"
17
19
  end
data/lib/buftok.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+ #
1
3
  # BufferedTokenizer takes a delimiter upon instantiation, or acts line-based
2
4
  # by default. It allows input to be spoon-fed from some outside source which
3
5
  # receives arbitrary length datagrams which may-or-may-not contain the token
@@ -15,10 +17,18 @@ class BufferedTokenizer
15
17
  def initialize(delimiter = $/)
16
18
  @delimiter = delimiter
17
19
  @input = []
18
- @tail = ''
20
+ @tail = String.new
19
21
  @trim = @delimiter.length - 1
20
22
  end
21
23
 
24
+ # Determine the size of the internal buffer.
25
+ #
26
+ # Size is not cached and is determined every time this method is called
27
+ # in order to optimize throughput for extract.
28
+ def size
29
+ @tail.length + @input.inject(0) { |total, input| total + input.length }
30
+ end
31
+
22
32
  # Extract takes an arbitrary string of input data and returns an array of
23
33
  # tokenized entities, provided there were any available to extract. This
24
34
  # makes for easy processing of datagrams using a pattern like:
@@ -53,7 +63,10 @@ class BufferedTokenizer
53
63
  @input << @tail
54
64
  buffer = @input.join
55
65
  @input.clear
56
- @tail = "" # @tail.clear is slightly faster, but not supported on 1.8.7
66
+ @tail = String.new # @tail.clear is slightly faster, but not supported on 1.8.7
57
67
  buffer
58
68
  end
59
69
  end
70
+
71
+ # The expected constant for a gem named buftok
72
+ Buftok = BufferedTokenizer
metadata CHANGED
@@ -1,75 +1,109 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: buftok
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
5
- prerelease:
4
+ version: 0.3.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - Tony Arcieri
9
8
  - Martin Emde
10
9
  - Erik Michaels-Ober
11
- autorequire:
10
+ autorequire:
12
11
  bindir: bin
13
12
  cert_chain: []
14
- date: 2013-11-22 00:00:00.000000000 Z
13
+ date: 2021-03-25 00:00:00.000000000 Z
15
14
  dependencies:
16
15
  - !ruby/object:Gem::Dependency
17
16
  name: bundler
18
17
  requirement: !ruby/object:Gem::Requirement
19
- none: false
20
18
  requirements:
21
- - - ~>
19
+ - - ">="
22
20
  - !ruby/object:Gem::Version
23
- version: '1.0'
21
+ version: '1.17'
24
22
  type: :development
25
23
  prerelease: false
26
24
  version_requirements: !ruby/object:Gem::Requirement
27
- none: false
28
25
  requirements:
29
- - - ~>
26
+ - - ">="
30
27
  - !ruby/object:Gem::Version
31
- version: '1.0'
28
+ version: '1.17'
29
+ - !ruby/object:Gem::Dependency
30
+ name: rake
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - "~>"
34
+ - !ruby/object:Gem::Version
35
+ version: '10.0'
36
+ type: :development
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - "~>"
41
+ - !ruby/object:Gem::Version
42
+ version: '10.0'
43
+ - !ruby/object:Gem::Dependency
44
+ name: rdoc
45
+ requirement: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ type: :development
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ - !ruby/object:Gem::Dependency
58
+ name: test-unit
59
+ requirement: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ type: :development
65
+ prerelease: false
66
+ version_requirements: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
32
71
  description: BufferedTokenizer extracts token delimited entities from a sequence of
33
- arbitrary inputs
34
- email: sferik@gmail.com
72
+ string inputs
73
+ email:
74
+ - sferik@gmail.com
75
+ - martin.emde@gmail.com
35
76
  executables: []
36
77
  extensions: []
37
78
  extra_rdoc_files: []
38
79
  files:
39
80
  - CONTRIBUTING.md
40
- - Gemfile
41
- - LICENSE.md
81
+ - LICENSE.txt
42
82
  - README.md
43
- - Rakefile
44
83
  - buftok.gemspec
45
84
  - lib/buftok.rb
46
- - test/test_buftok.rb
47
85
  homepage: https://github.com/sferik/buftok
48
86
  licenses:
49
87
  - MIT
50
- post_install_message:
88
+ metadata: {}
89
+ post_install_message:
51
90
  rdoc_options: []
52
91
  require_paths:
53
92
  - lib
54
93
  required_ruby_version: !ruby/object:Gem::Requirement
55
- none: false
56
94
  requirements:
57
- - - ! '>='
95
+ - - ">="
58
96
  - !ruby/object:Gem::Version
59
97
  version: '0'
60
98
  required_rubygems_version: !ruby/object:Gem::Requirement
61
- none: false
62
99
  requirements:
63
- - - ! '>='
100
+ - - ">="
64
101
  - !ruby/object:Gem::Version
65
102
  version: 1.3.5
66
103
  requirements: []
67
- rubyforge_project:
68
- rubygems_version: 1.8.23
69
- signing_key:
70
- specification_version: 3
71
- summary: BufferedTokenizer extracts token delimited entities from a sequence of arbitrary
104
+ rubygems_version: 3.2.3
105
+ signing_key:
106
+ specification_version: 4
107
+ summary: BufferedTokenizer extracts token delimited entities from a sequence of string
72
108
  inputs
73
- test_files:
74
- - test/test_buftok.rb
75
- has_rdoc:
109
+ test_files: []
data/Gemfile DELETED
@@ -1,6 +0,0 @@
1
- source 'https://rubygems.org'
2
-
3
- gem 'rake'
4
- gem 'rdoc'
5
-
6
- gemspec
data/LICENSE.md DELETED
@@ -1,56 +0,0 @@
1
- Ruby is copyrighted free software by Yukihiro Matsumoto <matz@netlab.jp>.
2
- You can redistribute it and/or modify it under either the terms of the
3
- 2-clause BSDL (see the file BSDL), or the conditions below:
4
-
5
- 1. You may make and give away verbatim copies of the source form of the
6
- software without restriction, provided that you duplicate all of the
7
- original copyright notices and associated disclaimers.
8
-
9
- 2. You may modify your copy of the software in any way, provided that
10
- you do at least ONE of the following:
11
-
12
- a) place your modifications in the Public Domain or otherwise
13
- make them Freely Available, such as by posting said
14
- modifications to Usenet or an equivalent medium, or by allowing
15
- the author to include your modifications in the software.
16
-
17
- b) use the modified software only within your corporation or
18
- organization.
19
-
20
- c) give non-standard binaries non-standard names, with
21
- instructions on where to get the original software distribution.
22
-
23
- d) make other distribution arrangements with the author.
24
-
25
- 3. You may distribute the software in object code or binary form,
26
- provided that you do at least ONE of the following:
27
-
28
- a) distribute the binaries and library files of the software,
29
- together with instructions (in the manual page or equivalent)
30
- on where to get the original distribution.
31
-
32
- b) accompany the distribution with the machine-readable source of
33
- the software.
34
-
35
- c) give non-standard binaries non-standard names, with
36
- instructions on where to get the original software distribution.
37
-
38
- d) make other distribution arrangements with the author.
39
-
40
- 4. You may modify and include the part of the software into any other
41
- software (possibly commercial). But some files in the distribution
42
- are not written by the author, so that they are not under these terms.
43
-
44
- For the list of those files and their copying conditions, see the
45
- file LEGAL.
46
-
47
- 5. The scripts and library files supplied as input to or produced as
48
- output from the software do not automatically fall under the
49
- copyright of the software, but belong to whomever generated them,
50
- and may be sold commercially, and may be aggregated with this
51
- software.
52
-
53
- 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
54
- IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
55
- WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
56
- PURPOSE.
data/Rakefile DELETED
@@ -1,66 +0,0 @@
1
- require 'bundler'
2
- require 'rdoc/task'
3
- require 'rake/testtask'
4
-
5
- task :default => :test
6
-
7
- Bundler::GemHelper.install_tasks
8
-
9
- RDoc::Task.new do |task|
10
- task.rdoc_dir = 'doc'
11
- task.title = 'BufferedTokenizer'
12
- task.rdoc_files.include('lib/**/*.rb')
13
- end
14
-
15
- Rake::TestTask.new :test do |t|
16
- t.libs << 'lib'
17
- t.test_files = FileList['test/**/*.rb']
18
- end
19
-
20
- desc "Benchmark the current implementation"
21
- task :bench do
22
- require 'benchmark'
23
- require File.expand_path('lib/buftok', File.dirname(__FILE__))
24
-
25
- n = 50000
26
- delimiter = "\n\n"
27
-
28
- frequency1 = 1000
29
- puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency1} strings..."
30
- data1 = (0...n).map do |i|
31
- (((i % frequency1 == 1) ? "\n" : "") +
32
- ("s" * i) +
33
- ((i % frequency1 == 0) ? "\n" : "")).freeze
34
- end
35
-
36
- frequency2 = 10
37
- puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency2} strings..."
38
- data2 = (0...n).map do |i|
39
- (((i % frequency2 == 1) ? "\n" : "") +
40
- ("s" * i) +
41
- ((i % frequency2 == 0) ? "\n" : "")).freeze
42
- end
43
-
44
- Benchmark.bmbm do |x|
45
- x.report("1 char, freq: #{frequency1}") do
46
- bt1 = BufferedTokenizer.new
47
- n.times { |i| bt1.extract(data1[i]) }
48
- end
49
-
50
- x.report("2 char, freq: #{frequency1}") do
51
- bt2 = BufferedTokenizer.new(delimiter)
52
- n.times { |i| bt2.extract(data1[i]) }
53
- end
54
-
55
- x.report("1 char, freq: #{frequency2}") do
56
- bt3 = BufferedTokenizer.new
57
- n.times { |i| bt3.extract(data2[i]) }
58
- end
59
-
60
- x.report("2 char, freq: #{frequency2}") do
61
- bt4 = BufferedTokenizer.new(delimiter)
62
- n.times { |i| bt4.extract(data2[i]) }
63
- end
64
-
65
- end
66
- end
data/test/test_buftok.rb DELETED
@@ -1,27 +0,0 @@
1
- require 'test/unit'
2
- require 'buftok'
3
-
4
- class TestBuftok < Test::Unit::TestCase
5
- def test_buftok
6
- tokenizer = BufferedTokenizer.new
7
- assert_equal %w[foo], tokenizer.extract("foo\nbar".freeze)
8
- assert_equal %w[barbaz qux], tokenizer.extract("baz\nqux\nquu".freeze)
9
- assert_equal 'quu', tokenizer.flush
10
- assert_equal '', tokenizer.flush
11
- end
12
-
13
- def test_delimiter
14
- tokenizer = BufferedTokenizer.new('<>')
15
- assert_equal ['', "foo\n"], tokenizer.extract("<>foo\n<>".freeze)
16
- assert_equal %w[bar], tokenizer.extract('bar<>baz'.freeze)
17
- assert_equal 'baz', tokenizer.flush
18
- end
19
-
20
- def test_split_delimiter
21
- tokenizer = BufferedTokenizer.new('<>'.freeze)
22
- assert_equal [], tokenizer.extract('foo<'.freeze)
23
- assert_equal %w[foo], tokenizer.extract('>bar<'.freeze)
24
- assert_equal %w[bar<baz qux], tokenizer.extract('baz<>qux<>'.freeze)
25
- assert_equal '', tokenizer.flush
26
- end
27
- end