buftok 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +14 -18
- data/buftok.gemspec +13 -11
- data/lib/buftok.rb +15 -2
- metadata +63 -29
- data/Gemfile +0 -6
- data/LICENSE.md +0 -56
- data/Rakefile +0 -66
- data/test/test_buftok.rb +0 -27
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 67c39aeda72dd14dc738490d14c121acd8591050057f488517c0a92039187179
|
4
|
+
data.tar.gz: dd6f4e0460ac0c2c076d9d4d05e91bdd29a2667167872f2850127b3ed7f72118
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9a2db2dffe2660fcb5ec89e813b8a953cdbdebd530184be5bf88f35dbf7bfd06dd15e5441b6e4316b6a23a83458d43df90212f7a30f9a2d25de5b49609ff6857
|
7
|
+
data.tar.gz: c8312db37a322e718163142e1246f909eca8e50f07bde78b706d46b327c035bf2c4d3e7ebb59d10b7cdeb5dbac29dcf7b9b8d1cad2ab5e7702f65ebfde315d79
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2021 Tony Arcieri, Martin Emde, Erik Michaels-Ober
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,14 +1,10 @@
|
|
1
1
|
# BufferedTokenizer
|
2
2
|
|
3
|
-
[![Gem Version](
|
4
|
-
[![Build Status](https://
|
5
|
-
[![Dependency Status](https://gemnasium.com/sferik/buftok.png?travis)][gemnasium]
|
6
|
-
[![Code Climate](https://codeclimate.com/github/sferik/buftok.png)][codeclimate]
|
3
|
+
[![Gem Version](http://img.shields.io/gem/v/buftok.svg)][gem]
|
4
|
+
[![Build Status](https://github.com/sferik/buftok/actions/workflows/ruby.yml/badge.svg)][build]
|
7
5
|
|
8
6
|
[gem]: https://rubygems.org/gems/buftok
|
9
|
-
[
|
10
|
-
[gemnasium]: https://gemnasium.com/sferik/buftok
|
11
|
-
[codeclimate]: https://codeclimate.com/github/sferik/buftok
|
7
|
+
[build]: https://github.com/sferik/buftok/actions
|
12
8
|
|
13
9
|
###### Statefully split input data by a specifiable token
|
14
10
|
|
@@ -21,19 +17,18 @@ something like [EventMachine][].
|
|
21
17
|
[EventMachine]: http://rubyeventmachine.com/
|
22
18
|
|
23
19
|
## Supported Ruby Versions
|
24
|
-
This library aims to support and is [tested against][
|
20
|
+
This library aims to support and is [tested against][build] the following Ruby
|
25
21
|
implementations:
|
26
22
|
|
27
|
-
* Ruby
|
28
|
-
* Ruby
|
29
|
-
* Ruby
|
30
|
-
* Ruby 2.0.0
|
23
|
+
* Ruby 2.6
|
24
|
+
* Ruby 2.7
|
25
|
+
* Ruby 3.0
|
31
26
|
|
32
27
|
If something doesn't work on one of these interpreters, it's a bug.
|
33
28
|
|
34
|
-
This
|
35
|
-
|
36
|
-
|
29
|
+
This code will likely still work on older versions since it has not undergone
|
30
|
+
many changes since release. However, support will not be provided for
|
31
|
+
end-of-life ruby versions.
|
37
32
|
|
38
33
|
If you would like this library to support another Ruby version, you may
|
39
34
|
volunteer to be a maintainer. Being a maintainer entails making sure all tests
|
@@ -43,6 +38,7 @@ fashion. If critical issues for a particular implementation exist at the time
|
|
43
38
|
of a major release, support for that Ruby version may be dropped.
|
44
39
|
|
45
40
|
## Copyright
|
46
|
-
Copyright (c) 2006-
|
47
|
-
Distributed under the [
|
48
|
-
|
41
|
+
Copyright (c) 2006-2021 Tony Arcieri, Martin Emde, Erik Michaels-Ober.
|
42
|
+
Distributed under the [MIT license][license].
|
43
|
+
|
44
|
+
[license]: https://opensource.org/licenses/MIT
|
data/buftok.gemspec
CHANGED
@@ -1,17 +1,19 @@
|
|
1
1
|
Gem::Specification.new do |spec|
|
2
|
-
spec.
|
2
|
+
spec.version = "0.3.0"
|
3
|
+
|
3
4
|
spec.authors = ["Tony Arcieri", "Martin Emde", "Erik Michaels-Ober"]
|
4
|
-
spec.
|
5
|
-
spec.
|
6
|
-
spec.
|
7
|
-
spec.files
|
8
|
-
spec.files += Dir.glob("test/**/*.rb")
|
9
|
-
spec.test_files = spec.files.grep(%r{^test/})
|
5
|
+
spec.summary = %q{BufferedTokenizer extracts token delimited entities from a sequence of string inputs}
|
6
|
+
spec.description = spec.summary
|
7
|
+
spec.email = ["sferik@gmail.com", "martin.emde@gmail.com"]
|
8
|
+
spec.files = %w(CONTRIBUTING.md LICENSE.txt README.md buftok.gemspec) + Dir["lib/**/*.rb"]
|
10
9
|
spec.homepage = "https://github.com/sferik/buftok"
|
11
|
-
spec.licenses = [
|
10
|
+
spec.licenses = ["MIT"]
|
12
11
|
spec.name = "buftok"
|
13
12
|
spec.require_paths = ["lib"]
|
14
|
-
spec.required_rubygems_version =
|
15
|
-
|
16
|
-
spec.
|
13
|
+
spec.required_rubygems_version = ">= 1.3.5"
|
14
|
+
|
15
|
+
spec.add_development_dependency "bundler", ">= 1.17"
|
16
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
17
|
+
spec.add_development_dependency "rdoc"
|
18
|
+
spec.add_development_dependency "test-unit"
|
17
19
|
end
|
data/lib/buftok.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
#
|
1
3
|
# BufferedTokenizer takes a delimiter upon instantiation, or acts line-based
|
2
4
|
# by default. It allows input to be spoon-fed from some outside source which
|
3
5
|
# receives arbitrary length datagrams which may-or-may-not contain the token
|
@@ -15,10 +17,18 @@ class BufferedTokenizer
|
|
15
17
|
def initialize(delimiter = $/)
|
16
18
|
@delimiter = delimiter
|
17
19
|
@input = []
|
18
|
-
@tail =
|
20
|
+
@tail = String.new
|
19
21
|
@trim = @delimiter.length - 1
|
20
22
|
end
|
21
23
|
|
24
|
+
# Determine the size of the internal buffer.
|
25
|
+
#
|
26
|
+
# Size is not cached and is determined every time this method is called
|
27
|
+
# in order to optimize throughput for extract.
|
28
|
+
def size
|
29
|
+
@tail.length + @input.inject(0) { |total, input| total + input.length }
|
30
|
+
end
|
31
|
+
|
22
32
|
# Extract takes an arbitrary string of input data and returns an array of
|
23
33
|
# tokenized entities, provided there were any available to extract. This
|
24
34
|
# makes for easy processing of datagrams using a pattern like:
|
@@ -53,7 +63,10 @@ class BufferedTokenizer
|
|
53
63
|
@input << @tail
|
54
64
|
buffer = @input.join
|
55
65
|
@input.clear
|
56
|
-
@tail =
|
66
|
+
@tail = String.new # @tail.clear is slightly faster, but not supported on 1.8.7
|
57
67
|
buffer
|
58
68
|
end
|
59
69
|
end
|
70
|
+
|
71
|
+
# The expected constant for a gem named buftok
|
72
|
+
Buftok = BufferedTokenizer
|
metadata
CHANGED
@@ -1,75 +1,109 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: buftok
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.3.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Tony Arcieri
|
9
8
|
- Martin Emde
|
10
9
|
- Erik Michaels-Ober
|
11
|
-
autorequire:
|
10
|
+
autorequire:
|
12
11
|
bindir: bin
|
13
12
|
cert_chain: []
|
14
|
-
date:
|
13
|
+
date: 2021-03-25 00:00:00.000000000 Z
|
15
14
|
dependencies:
|
16
15
|
- !ruby/object:Gem::Dependency
|
17
16
|
name: bundler
|
18
17
|
requirement: !ruby/object:Gem::Requirement
|
19
|
-
none: false
|
20
18
|
requirements:
|
21
|
-
- -
|
19
|
+
- - ">="
|
22
20
|
- !ruby/object:Gem::Version
|
23
|
-
version: '1.
|
21
|
+
version: '1.17'
|
24
22
|
type: :development
|
25
23
|
prerelease: false
|
26
24
|
version_requirements: !ruby/object:Gem::Requirement
|
27
|
-
none: false
|
28
25
|
requirements:
|
29
|
-
- -
|
26
|
+
- - ">="
|
30
27
|
- !ruby/object:Gem::Version
|
31
|
-
version: '1.
|
28
|
+
version: '1.17'
|
29
|
+
- !ruby/object:Gem::Dependency
|
30
|
+
name: rake
|
31
|
+
requirement: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - "~>"
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '10.0'
|
36
|
+
type: :development
|
37
|
+
prerelease: false
|
38
|
+
version_requirements: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - "~>"
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '10.0'
|
43
|
+
- !ruby/object:Gem::Dependency
|
44
|
+
name: rdoc
|
45
|
+
requirement: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
type: :development
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
- !ruby/object:Gem::Dependency
|
58
|
+
name: test-unit
|
59
|
+
requirement: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
type: :development
|
65
|
+
prerelease: false
|
66
|
+
version_requirements: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
32
71
|
description: BufferedTokenizer extracts token delimited entities from a sequence of
|
33
|
-
|
34
|
-
email:
|
72
|
+
string inputs
|
73
|
+
email:
|
74
|
+
- sferik@gmail.com
|
75
|
+
- martin.emde@gmail.com
|
35
76
|
executables: []
|
36
77
|
extensions: []
|
37
78
|
extra_rdoc_files: []
|
38
79
|
files:
|
39
80
|
- CONTRIBUTING.md
|
40
|
-
-
|
41
|
-
- LICENSE.md
|
81
|
+
- LICENSE.txt
|
42
82
|
- README.md
|
43
|
-
- Rakefile
|
44
83
|
- buftok.gemspec
|
45
84
|
- lib/buftok.rb
|
46
|
-
- test/test_buftok.rb
|
47
85
|
homepage: https://github.com/sferik/buftok
|
48
86
|
licenses:
|
49
87
|
- MIT
|
50
|
-
|
88
|
+
metadata: {}
|
89
|
+
post_install_message:
|
51
90
|
rdoc_options: []
|
52
91
|
require_paths:
|
53
92
|
- lib
|
54
93
|
required_ruby_version: !ruby/object:Gem::Requirement
|
55
|
-
none: false
|
56
94
|
requirements:
|
57
|
-
- -
|
95
|
+
- - ">="
|
58
96
|
- !ruby/object:Gem::Version
|
59
97
|
version: '0'
|
60
98
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
61
|
-
none: false
|
62
99
|
requirements:
|
63
|
-
- -
|
100
|
+
- - ">="
|
64
101
|
- !ruby/object:Gem::Version
|
65
102
|
version: 1.3.5
|
66
103
|
requirements: []
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
summary: BufferedTokenizer extracts token delimited entities from a sequence of arbitrary
|
104
|
+
rubygems_version: 3.2.3
|
105
|
+
signing_key:
|
106
|
+
specification_version: 4
|
107
|
+
summary: BufferedTokenizer extracts token delimited entities from a sequence of string
|
72
108
|
inputs
|
73
|
-
test_files:
|
74
|
-
- test/test_buftok.rb
|
75
|
-
has_rdoc:
|
109
|
+
test_files: []
|
data/Gemfile
DELETED
data/LICENSE.md
DELETED
@@ -1,56 +0,0 @@
|
|
1
|
-
Ruby is copyrighted free software by Yukihiro Matsumoto <matz@netlab.jp>.
|
2
|
-
You can redistribute it and/or modify it under either the terms of the
|
3
|
-
2-clause BSDL (see the file BSDL), or the conditions below:
|
4
|
-
|
5
|
-
1. You may make and give away verbatim copies of the source form of the
|
6
|
-
software without restriction, provided that you duplicate all of the
|
7
|
-
original copyright notices and associated disclaimers.
|
8
|
-
|
9
|
-
2. You may modify your copy of the software in any way, provided that
|
10
|
-
you do at least ONE of the following:
|
11
|
-
|
12
|
-
a) place your modifications in the Public Domain or otherwise
|
13
|
-
make them Freely Available, such as by posting said
|
14
|
-
modifications to Usenet or an equivalent medium, or by allowing
|
15
|
-
the author to include your modifications in the software.
|
16
|
-
|
17
|
-
b) use the modified software only within your corporation or
|
18
|
-
organization.
|
19
|
-
|
20
|
-
c) give non-standard binaries non-standard names, with
|
21
|
-
instructions on where to get the original software distribution.
|
22
|
-
|
23
|
-
d) make other distribution arrangements with the author.
|
24
|
-
|
25
|
-
3. You may distribute the software in object code or binary form,
|
26
|
-
provided that you do at least ONE of the following:
|
27
|
-
|
28
|
-
a) distribute the binaries and library files of the software,
|
29
|
-
together with instructions (in the manual page or equivalent)
|
30
|
-
on where to get the original distribution.
|
31
|
-
|
32
|
-
b) accompany the distribution with the machine-readable source of
|
33
|
-
the software.
|
34
|
-
|
35
|
-
c) give non-standard binaries non-standard names, with
|
36
|
-
instructions on where to get the original software distribution.
|
37
|
-
|
38
|
-
d) make other distribution arrangements with the author.
|
39
|
-
|
40
|
-
4. You may modify and include the part of the software into any other
|
41
|
-
software (possibly commercial). But some files in the distribution
|
42
|
-
are not written by the author, so that they are not under these terms.
|
43
|
-
|
44
|
-
For the list of those files and their copying conditions, see the
|
45
|
-
file LEGAL.
|
46
|
-
|
47
|
-
5. The scripts and library files supplied as input to or produced as
|
48
|
-
output from the software do not automatically fall under the
|
49
|
-
copyright of the software, but belong to whomever generated them,
|
50
|
-
and may be sold commercially, and may be aggregated with this
|
51
|
-
software.
|
52
|
-
|
53
|
-
6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
|
54
|
-
IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
|
55
|
-
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
56
|
-
PURPOSE.
|
data/Rakefile
DELETED
@@ -1,66 +0,0 @@
|
|
1
|
-
require 'bundler'
|
2
|
-
require 'rdoc/task'
|
3
|
-
require 'rake/testtask'
|
4
|
-
|
5
|
-
task :default => :test
|
6
|
-
|
7
|
-
Bundler::GemHelper.install_tasks
|
8
|
-
|
9
|
-
RDoc::Task.new do |task|
|
10
|
-
task.rdoc_dir = 'doc'
|
11
|
-
task.title = 'BufferedTokenizer'
|
12
|
-
task.rdoc_files.include('lib/**/*.rb')
|
13
|
-
end
|
14
|
-
|
15
|
-
Rake::TestTask.new :test do |t|
|
16
|
-
t.libs << 'lib'
|
17
|
-
t.test_files = FileList['test/**/*.rb']
|
18
|
-
end
|
19
|
-
|
20
|
-
desc "Benchmark the current implementation"
|
21
|
-
task :bench do
|
22
|
-
require 'benchmark'
|
23
|
-
require File.expand_path('lib/buftok', File.dirname(__FILE__))
|
24
|
-
|
25
|
-
n = 50000
|
26
|
-
delimiter = "\n\n"
|
27
|
-
|
28
|
-
frequency1 = 1000
|
29
|
-
puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency1} strings..."
|
30
|
-
data1 = (0...n).map do |i|
|
31
|
-
(((i % frequency1 == 1) ? "\n" : "") +
|
32
|
-
("s" * i) +
|
33
|
-
((i % frequency1 == 0) ? "\n" : "")).freeze
|
34
|
-
end
|
35
|
-
|
36
|
-
frequency2 = 10
|
37
|
-
puts "generating #{n} strings, with #{delimiter.inspect} every #{frequency2} strings..."
|
38
|
-
data2 = (0...n).map do |i|
|
39
|
-
(((i % frequency2 == 1) ? "\n" : "") +
|
40
|
-
("s" * i) +
|
41
|
-
((i % frequency2 == 0) ? "\n" : "")).freeze
|
42
|
-
end
|
43
|
-
|
44
|
-
Benchmark.bmbm do |x|
|
45
|
-
x.report("1 char, freq: #{frequency1}") do
|
46
|
-
bt1 = BufferedTokenizer.new
|
47
|
-
n.times { |i| bt1.extract(data1[i]) }
|
48
|
-
end
|
49
|
-
|
50
|
-
x.report("2 char, freq: #{frequency1}") do
|
51
|
-
bt2 = BufferedTokenizer.new(delimiter)
|
52
|
-
n.times { |i| bt2.extract(data1[i]) }
|
53
|
-
end
|
54
|
-
|
55
|
-
x.report("1 char, freq: #{frequency2}") do
|
56
|
-
bt3 = BufferedTokenizer.new
|
57
|
-
n.times { |i| bt3.extract(data2[i]) }
|
58
|
-
end
|
59
|
-
|
60
|
-
x.report("2 char, freq: #{frequency2}") do
|
61
|
-
bt4 = BufferedTokenizer.new(delimiter)
|
62
|
-
n.times { |i| bt4.extract(data2[i]) }
|
63
|
-
end
|
64
|
-
|
65
|
-
end
|
66
|
-
end
|
data/test/test_buftok.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
require 'test/unit'
|
2
|
-
require 'buftok'
|
3
|
-
|
4
|
-
class TestBuftok < Test::Unit::TestCase
|
5
|
-
def test_buftok
|
6
|
-
tokenizer = BufferedTokenizer.new
|
7
|
-
assert_equal %w[foo], tokenizer.extract("foo\nbar".freeze)
|
8
|
-
assert_equal %w[barbaz qux], tokenizer.extract("baz\nqux\nquu".freeze)
|
9
|
-
assert_equal 'quu', tokenizer.flush
|
10
|
-
assert_equal '', tokenizer.flush
|
11
|
-
end
|
12
|
-
|
13
|
-
def test_delimiter
|
14
|
-
tokenizer = BufferedTokenizer.new('<>')
|
15
|
-
assert_equal ['', "foo\n"], tokenizer.extract("<>foo\n<>".freeze)
|
16
|
-
assert_equal %w[bar], tokenizer.extract('bar<>baz'.freeze)
|
17
|
-
assert_equal 'baz', tokenizer.flush
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_split_delimiter
|
21
|
-
tokenizer = BufferedTokenizer.new('<>'.freeze)
|
22
|
-
assert_equal [], tokenizer.extract('foo<'.freeze)
|
23
|
-
assert_equal %w[foo], tokenizer.extract('>bar<'.freeze)
|
24
|
-
assert_equal %w[bar<baz qux], tokenizer.extract('baz<>qux<>'.freeze)
|
25
|
-
assert_equal '', tokenizer.flush
|
26
|
-
end
|
27
|
-
end
|