buftok 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +14 -18
- data/buftok.gemspec +13 -11
- data/lib/buftok.rb +15 -2
- metadata +63 -29
- data/Gemfile +0 -6
- data/LICENSE.md +0 -56
- data/Rakefile +0 -66
- data/test/test_buftok.rb +0 -27
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 67c39aeda72dd14dc738490d14c121acd8591050057f488517c0a92039187179
|
4
|
+
data.tar.gz: dd6f4e0460ac0c2c076d9d4d05e91bdd29a2667167872f2850127b3ed7f72118
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9a2db2dffe2660fcb5ec89e813b8a953cdbdebd530184be5bf88f35dbf7bfd06dd15e5441b6e4316b6a23a83458d43df90212f7a30f9a2d25de5b49609ff6857
|
7
|
+
data.tar.gz: c8312db37a322e718163142e1246f909eca8e50f07bde78b706d46b327c035bf2c4d3e7ebb59d10b7cdeb5dbac29dcf7b9b8d1cad2ab5e7702f65ebfde315d79
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2021 Tony Arcieri, Martin Emde, Erik Michaels-Ober
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,14 +1,10 @@
|
|
1
1
|
# BufferedTokenizer
|
2
2
|
|
3
|
-
[][gemnasium]
|
6
|
-
[][codeclimate]
|
3
|
+
[][gem]
|
4
|
+
[][build]
|
7
5
|
|
8
6
|
[gem]: https://rubygems.org/gems/buftok
|
9
|
-
[
|
10
|
-
[gemnasium]: https://gemnasium.com/sferik/buftok
|
11
|
-
[codeclimate]: https://codeclimate.com/github/sferik/buftok
|
7
|
+
[build]: https://github.com/sferik/buftok/actions
|
12
8
|
|
13
9
|
###### Statefully split input data by a specifiable token
|
14
10
|
|
@@ -21,19 +17,18 @@ something like [EventMachine][].
|
|
21
17
|
[EventMachine]: http://rubyeventmachine.com/
|
22
18
|
|
23
19
|
## Supported Ruby Versions
|
24
|
-
This library aims to support and is [tested against][
|
20
|
+
This library aims to support and is [tested against][build] the following Ruby
|
25
21
|
implementations:
|
26
22
|
|
27
|
-
* Ruby
|
28
|
-
* Ruby
|
29
|
-
* Ruby
|
30
|
-
* Ruby 2.0.0
|
23
|
+
* Ruby 2.6
|
24
|
+
* Ruby 2.7
|
25
|
+
* Ruby 3.0
|
31
26
|
|
32
27
|
If something doesn't work on one of these interpreters, it's a bug.
|
33
28
|
|
34
|
-
This
|
35
|
-
|
36
|
-
|
29
|
+
This code will likely still work on older versions since it has not undergone
|
30
|
+
many changes since release. However, support will not be provided for
|
31
|
+
end-of-life ruby versions.
|
37
32
|
|
38
33
|
If you would like this library to support another Ruby version, you may
|
39
34
|
volunteer to be a maintainer. Being a maintainer entails making sure all tests
|
@@ -43,6 +38,7 @@ fashion. If critical issues for a particular implementation exist at the time
|
|
43
38
|
of a major release, support for that Ruby version may be dropped.
|
44
39
|
|
45
40
|
## Copyright
|
46
|
-
Copyright (c) 2006-
|
47
|
-
Distributed under the [
|
48
|
-
|
41
|
+
Copyright (c) 2006-2021 Tony Arcieri, Martin Emde, Erik Michaels-Ober.
|
42
|
+
Distributed under the [MIT license][license].
|
43
|
+
|
44
|
+
[license]: https://opensource.org/licenses/MIT
|
data/buftok.gemspec
CHANGED
@@ -1,17 +1,19 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
|
-
spec.
|
2
|
+
spec.version = "0.3.0"
|
3
|
+
|
3
4
|
spec.authors = ["Tony Arcieri", "Martin Emde", "Erik Michaels-Ober"]
|
4
|
-
spec.
|
5
|
-
spec.
|
6
|
-
spec.
|
7
|
-
spec.files
|
8
|
-
spec.files += Dir.glob("test/**/*.rb")
|
9
|
-
spec.test_files = spec.files.grep(%r{^test/})
|
5
|
+
spec.summary = %q{BufferedTokenizer extracts token delimited entities from a sequence of string inputs}
|
6
|
+
spec.description = spec.summary
|
7
|
+
spec.email = ["sferik@gmail.com", "martin.emde@gmail.com"]
|
8
|
+
spec.files = %w(CONTRIBUTING.md LICENSE.txt README.md buftok.gemspec) + Dir["lib/**/*.rb"]
|
10
9
|
spec.homepage = "https://github.com/sferik/buftok"
|
11
|
-
spec.licenses = [
|
10
|
+
spec.licenses = ["MIT"]
|
12
11
|
spec.name = "buftok"
|
13
12
|
spec.require_paths = ["lib"]
|
14
|
-
spec.required_rubygems_version =
|
15
|
-
|
16
|
-
spec.
|
13
|
+
spec.required_rubygems_version = ">= 1.3.5"
|
14
|
+
|
15
|
+
spec.add_development_dependency "bundler", ">= 1.17"
|
16
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
17
|
+
spec.add_development_dependency "rdoc"
|
18
|
+
spec.add_development_dependency "test-unit"
|
17
19
|
end
|
data/lib/buftok.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
#
|
1
3
|
# BufferedTokenizer takes a delimiter upon instantiation, or acts line-based
|
2
4
|
# by default. It allows input to be spoon-fed from some outside source which
|
3
5
|
# receives arbitrary length datagrams which may-or-may-not contain the token
|
@@ -15,10 +17,18 @@ class BufferedTokenizer
|
|
15
17
|
def initialize(delimiter = $/)
|
16
18
|
@delimiter = delimiter
|
17
19
|
@input = []
|
18
|
-
@tail =
|
20
|
+
@tail = String.new
|
19
21
|
@trim = @delimiter.length - 1
|
20
22
|
end
|
21
23
|
|
24
|
+
# Determine the size of the internal buffer.
|
25
|
+
#
|
26
|
+
# Size is not cached and is determined every time this method is called
|
27
|
+
# in order to optimize throughput for extract.
|
28
|
+
def size
|
29
|
+
@tail.length + @input.inject(0) { |total, input| total + input.length }
|
30
|
+
end
|
31
|
+
|
22
32
|
# Extract takes an arbitrary string of input data and returns an array of
|
23
33
|
# tokenized entities, provided there were any available to extract. This
|
24
34
|
# makes for easy processing of datagrams using a pattern like:
|
@@ -53,7 +63,10 @@ class BufferedTokenizer
|
|
53
63
|
@input << @tail
|
54
64
|
buffer = @input.join
|
55
65
|
@input.clear
|
56
|
-
@tail =
|
66
|
+
@tail = String.new # @tail.clear is slightly faster, but not supported on 1.8.7
|
57
67
|
buffer
|
58
68
|
end
|
59
69
|
end
|
70
|
+
|
71
|
+
# The expected constant for a gem named buftok
|
72
|
+
Buftok = BufferedTokenizer
|
metadata
CHANGED
@@ -1,75 +1,109 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: buftok
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.3.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Tony Arcieri
|
9
8
|
- Martin Emde
|
10
9
|
- Erik Michaels-Ober
|
11
|
-
autorequire:
|
10
|
+
autorequire:
|
12
11
|
bindir: bin
|
13
12
|
cert_chain: []
|
14
|
-
date:
|
13
|
+
date: 2021-03-25 00:00:00.000000000 Z
|
15
14
|
dependencies:
|
16
15
|
- !ruby/object:Gem::Dependency
|
17
16
|
name: bundler
|
18
17
|
requirement: !ruby/object:Gem::Requirement
|
19
|
-
none: false
|
20
18
|
requirements:
|
21
|
-
- -
|
19
|
+
- - ">="
|
22
20
|
- !ruby/object:Gem::Version
|
23
|
-
version: '1.
|
21
|
+
version: '1.17'
|
24
22
|
type: :development
|
25
23
|
prerelease: false
|
26
24
|
version_requirements: !ruby/object:Gem::Requirement
|
27
|
-
none: false
|
28
25
|
requirements:
|
29
|
-
- -
|
26
|
+
- - ">="
|
30
27
|
- !ruby/object:Gem::Version
|
31
|
-
version: '1.
|
28
|
+
version: '1.17'
|
29
|
+
- !ruby/object:Gem::Dependency
|
30
|
+
name: rake
|
31
|
+
requirement: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - "~>"
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '10.0'
|
36
|
+
type: :development
|
37
|
+
prerelease: false
|
38
|
+
version_requirements: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - "~>"
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '10.0'
|
43
|
+
- !ruby/object:Gem::Dependency
|
44
|
+
name: rdoc
|
45
|
+
requirement: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
type: :development
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
- !ruby/object:Gem::Dependency
|
58
|
+
name: test-unit
|
59
|
+
requirement: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
type: :development
|
65
|
+
prerelease: false
|
66
|
+
version_requirements: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
32
71
|
description: BufferedTokenizer extracts token delimited entities from a sequence of
|
33
|
-
|
34
|
-
email:
|
72
|
+
string inputs
|
73
|
+
email:
|
74
|
+
- sferik@gmail.com
|
75
|
+
- martin.emde@gmail.com
|
35
76
|
executables: []
|
36
77
|
extensions: []
|
37
78
|
extra_rdoc_files: []
|
38
79
|
files:
|
39
80
|
- CONTRIBUTING.md
|
40
|
-
-
|
41
|
-
- LICENSE.md
|
81
|
+
- LICENSE.txt
|
42
82
|
- README.md
|
43
|
-
- Rakefile
|
44
83
|
- buftok.gemspec
|
45
84
|
- lib/buftok.rb
|
46
|
-
- test/test_buftok.rb
|
47
85
|
homepage: https://github.com/sferik/buftok
|
48
86
|
licenses:
|
49
87
|
- MIT
|
50
|
-
|
88
|
+
metadata: {}
|
89
|
+
post_install_message:
|
51
90
|
rdoc_options: []
|
52
91
|
require_paths:
|
53
92
|
- lib
|
54
93
|
required_ruby_version: !ruby/object:Gem::Requirement
|
55
|
-
none: false
|
56
94
|
requirements:
|
57
|
-
- -
|
95
|
+
- - ">="
|
58
96
|
- !ruby/object:Gem::Version
|
59
97
|
version: '0'
|
60
98
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
-
none: false
|
62
99
|
requirements:
|
63
|
-
- -
|
100
|
+
- - ">="
|
64
101
|
- !ruby/object:Gem::Version
|
65
102
|
version: 1.3.5
|
66
103
|
requirements: []
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
summary: BufferedTokenizer extracts token delimited entities from a sequence of arbitrary
|
104
|
+
rubygems_version: 3.2.3
|
105
|
+
signing_key:
|
106
|
+
specification_version: 4
|
107
|
+
summary: BufferedTokenizer extracts token delimited entities from a sequence of string
|
72
108
|
inputs
|
73
|
-
test_files:
|
74
|
-
- test/test_buftok.rb
|
75
|
-
has_rdoc:
|
109
|
+
test_files: []
|
data/Gemfile
DELETED
data/LICENSE.md
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
Ruby is copyrighted free software by Yukihiro Matsumoto <matz@netlab.jp>.
|
2
|
-
You can redistribute it and/or modify it under either the terms of the
|
3
|
-
2-clause BSDL (see the file BSDL), or the conditions below:
|
4
|
-
|
5
|
-
1. You may make and give away verbatim copies of the source form of the
|
6
|
-
software without restriction, provided that you duplicate all of the
|
7
|
-
original copyright notices and associated disclaimers.
|
8
|
-
|
9
|
-
2. You may modify your copy of the software in any way, provided that
|
10
|
-
you do at least ONE of the following:
|
11
|
-
|
12
|
-
a) place your modifications in the Public Domain or otherwise
|
13
|
-
make them Freely Available, such as by posting said
|
14
|
-
modifications to Usenet or an equivalent medium, or by allowing
|
15
|
-
the author to include your modifications in the software.
|
16
|
-
|
17
|
-
b) use the modified software only within your corporation or
|
18
|
-
organization.
|
19
|
-
|
20
|
-
c) give non-standard binaries non-standard names, with
|
21
|
-
instructions on where to get the original software distribution.
|
22
|
-
|
23
|
-
d) make other distribution arrangements with the author.
|
24
|
-
|
25
|
-
3. You may distribute the software in object code or binary form,
|
26
|
-
provided that you do at least ONE of the following:
|
27
|
-
|
28
|
-
a) distribute the binaries and library files of the software,
|
29
|
-
together with instructions (in the manual page or equivalent)
|
30
|
-
on where to get the original distribution.
|
31
|
-
|
32
|
-
b) accompany the distribution with the machine-readable source of
|
33
|
-
the software.
|
34
|
-
|
35
|
-
c) give non-standard binaries non-standard names, with
|
36
|
-
instructions on where to get the original software distribution.
|
37
|
-
|
38
|
-
d) make other distribution arrangements with the author.
|
39
|
-
|
40
|
-
4. You may modify and include the part of the software into any other
|
41
|
-
software (possibly commercial). But some files in the distribution
|
42
|
-
are not written by the author, so that they are not under these terms.
|
43
|
-
|
44
|
-
For the list of those files and their copying conditions, see the
|
45
|
-
file LEGAL.
|
46
|
-
|
47
|
-
5. The scripts and library files supplied as input to or produced as
|
48
|
-
output from the software do not automatically fall under the
|
49
|
-
copyright of the software, but belong to whomever generated them,
|
50
|
-
and may be sold commercially, and may be aggregated with this
|
51
|
-
software.
|
52
|
-
|
53
|
-
6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
|
54
|
-
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
|
55
|
-
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
56
|
-
PURPOSE.
|
data/Rakefile
DELETED
@@ -1,66 +0,0 @@
|
|
1
|
-
require 'bundler'
|
2
|
-
require 'rdoc/task'
|
3
|
-
require 'rake/testtask'
|
4
|
-
|
5
|
-
task :default => :test
|
6
|
-
|
7
|
-
Bundler::GemHelper.install_tasks
|
8
|
-
|
9
|
-
RDoc::Task.new do |task|
|
10
|
-
task.rdoc_dir = 'doc'
|
11
|
-
task.title = 'BufferedTokenizer'
|
12
|
-
task.rdoc_files.include('lib/**/*.rb')
|
13
|
-
end
|
14
|
-
|
15
|
-
Rake::TestTask.new :test do |t|
|
16
|
-
t.libs << 'lib'
|
17
|
-
t.test_files = FileList['test/**/*.rb']
|
18
|
-
end
|
19
|
-
|
20
|
-
desc "Benchmark the current implementation"
|
21
|
-
task :bench do
|
22
|
-
require 'benchmark'
|
23
|
-
require File.expand_path('lib/buftok', File.dirname(__FILE__))
|
24
|
-
|
25
|
-
n = 50000
|
26
|
-
delimiter = "\n\n"
|
27
|
-
|
28
|
-
frequency1 = 1000
|
29
|
-
puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency1} strings..."
|
30
|
-
data1 = (0...n).map do |i|
|
31
|
-
(((i % frequency1 == 1) ? "\n" : "") +
|
32
|
-
("s" * i) +
|
33
|
-
((i % frequency1 == 0) ? "\n" : "")).freeze
|
34
|
-
end
|
35
|
-
|
36
|
-
frequency2 = 10
|
37
|
-
puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency2} strings..."
|
38
|
-
data2 = (0...n).map do |i|
|
39
|
-
(((i % frequency2 == 1) ? "\n" : "") +
|
40
|
-
("s" * i) +
|
41
|
-
((i % frequency2 == 0) ? "\n" : "")).freeze
|
42
|
-
end
|
43
|
-
|
44
|
-
Benchmark.bmbm do |x|
|
45
|
-
x.report("1 char, freq: #{frequency1}") do
|
46
|
-
bt1 = BufferedTokenizer.new
|
47
|
-
n.times { |i| bt1.extract(data1[i]) }
|
48
|
-
end
|
49
|
-
|
50
|
-
x.report("2 char, freq: #{frequency1}") do
|
51
|
-
bt2 = BufferedTokenizer.new(delimiter)
|
52
|
-
n.times { |i| bt2.extract(data1[i]) }
|
53
|
-
end
|
54
|
-
|
55
|
-
x.report("1 char, freq: #{frequency2}") do
|
56
|
-
bt3 = BufferedTokenizer.new
|
57
|
-
n.times { |i| bt3.extract(data2[i]) }
|
58
|
-
end
|
59
|
-
|
60
|
-
x.report("2 char, freq: #{frequency2}") do
|
61
|
-
bt4 = BufferedTokenizer.new(delimiter)
|
62
|
-
n.times { |i| bt4.extract(data2[i]) }
|
63
|
-
end
|
64
|
-
|
65
|
-
end
|
66
|
-
end
|
data/test/test_buftok.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
require 'test/unit'
|
2
|
-
require 'buftok'
|
3
|
-
|
4
|
-
class TestBuftok < Test::Unit::TestCase
|
5
|
-
def test_buftok
|
6
|
-
tokenizer = BufferedTokenizer.new
|
7
|
-
assert_equal %w[foo], tokenizer.extract("foo\nbar".freeze)
|
8
|
-
assert_equal %w[barbaz qux], tokenizer.extract("baz\nqux\nquu".freeze)
|
9
|
-
assert_equal 'quu', tokenizer.flush
|
10
|
-
assert_equal '', tokenizer.flush
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_delimiter
|
14
|
-
tokenizer = BufferedTokenizer.new('<>')
|
15
|
-
assert_equal ['', "foo\n"], tokenizer.extract("<>foo\n<>".freeze)
|
16
|
-
assert_equal %w[bar], tokenizer.extract('bar<>baz'.freeze)
|
17
|
-
assert_equal 'baz', tokenizer.flush
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_split_delimiter
|
21
|
-
tokenizer = BufferedTokenizer.new('<>'.freeze)
|
22
|
-
assert_equal [], tokenizer.extract('foo<'.freeze)
|
23
|
-
assert_equal %w[foo], tokenizer.extract('>bar<'.freeze)
|
24
|
-
assert_equal %w[bar<baz qux], tokenizer.extract('baz<>qux<>'.freeze)
|
25
|
-
assert_equal '', tokenizer.flush
|
26
|
-
end
|
27
|
-
end
|