worte 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +35 -0
- data/LICENSE +22 -0
- data/README.md +83 -0
- data/Rakefile +27 -0
- data/lib/worte/filter/markdown.rb +12 -0
- data/lib/worte/filter/url.rb +13 -0
- data/lib/worte/printer/colorized.rb +11 -0
- data/lib/worte/printer/simple.rb +35 -0
- data/lib/worte/token.rb +14 -0
- data/lib/worte/tokenizer.rb +63 -0
- data/lib/worte/version.rb +3 -0
- data/lib/worte/worte.rb +30 -0
- data/lib/worte.rb +9 -0
- data/spec/colorized_printer_spec.rb +13 -0
- data/spec/simple_printer_spec.rb +40 -0
- data/spec/spec_helper.rb +101 -0
- data/spec/tokenizer_spec.rb +50 -0
- data/spec/url_filter_spec.rb +9 -0
- data/worte.gemspec +23 -0
- metadata +85 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d5d0ecb6b4947b4cc77c25c2c591f6752f6adf02
|
4
|
+
data.tar.gz: 665ef1e8b37121de5ebe170e04b018be3a673486
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 76c11f99ddaa9e8ef5f94d7dbf1a0e77ff66fdefd98cff4216ceda67e10d595e1846a5f989a9bc7e96ceb3150ec34f7746f9e781b895b4779059a9e634c31988
|
7
|
+
data.tar.gz: 50b940b66d00a3d1ac27d9c0d1ed69875149b36a036f4ef1b9777eaefddc3c26b259d55c6443bff852d82442220db04c090126ceab4720c978e34ef80c0f9473
|
data/.rspec
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
worte (0.0.1)
|
5
|
+
ffi-aspell (~> 1.1.0)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
diff-lcs (1.2.5)
|
11
|
+
ffi (1.9.6)
|
12
|
+
ffi-aspell (1.1.0)
|
13
|
+
ffi
|
14
|
+
rake (10.4.2)
|
15
|
+
rspec (3.3.0)
|
16
|
+
rspec-core (~> 3.3.0)
|
17
|
+
rspec-expectations (~> 3.3.0)
|
18
|
+
rspec-mocks (~> 3.3.0)
|
19
|
+
rspec-core (3.3.2)
|
20
|
+
rspec-support (~> 3.3.0)
|
21
|
+
rspec-expectations (3.3.1)
|
22
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
23
|
+
rspec-support (~> 3.3.0)
|
24
|
+
rspec-mocks (3.3.2)
|
25
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
26
|
+
rspec-support (~> 3.3.0)
|
27
|
+
rspec-support (3.3.0)
|
28
|
+
|
29
|
+
PLATFORMS
|
30
|
+
ruby
|
31
|
+
|
32
|
+
DEPENDENCIES
|
33
|
+
rake (~> 10.4.2)
|
34
|
+
rspec (~> 3.3.0)
|
35
|
+
worte!
|
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2015 Jonas Oberschweiber
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
# worte - A simple Ruby spell checker
|
2
|
+
|
3
|
+
[](https://travis-ci.org/jonasoberschweiber/worte)
|
4
|
+
|
5
|
+
Worte is a simple spell checker based on [ffi-aspell](https://github.com/YorickPeterse/ffi-aspell).
|
6
|
+
It tokenizes strings, spell checks the tokens and can then print out the lines
|
7
|
+
of the original text containing spelling mistakes.
|
8
|
+
|
9
|
+
## Installation
|
10
|
+
|
11
|
+
Just install it using `gem install worte` or add it to your Gemfile. Please note
|
12
|
+
that worte depends on ffi-aspell, which requires an installation of aspell.
|
13
|
+
|
14
|
+
## Usage
|
15
|
+
|
16
|
+
Spell checking is done by `Worte::Worte`. Create a new instance, passing it the
|
17
|
+
language you want to check:
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
worte = Worte::Worte.new('en')
|
21
|
+
```
|
22
|
+
|
23
|
+
You can also pass in some options, most of which will be passed through to
|
24
|
+
ffi-aspell. The most useful one might be `:personal`, which you can use to tell
|
25
|
+
aspell the path to your personal word list. The word list has to be in a
|
26
|
+
[specific format.](http://aspell.net/man-html/Format-of-the-Personal-and-Replacement-Dictionaries.html)
|
27
|
+
|
28
|
+
The `:filter` and `:filters` option can be used to specify one or more filters.
|
29
|
+
See below for details.
|
30
|
+
|
31
|
+
Use the `check` method to spell check some text and get back a list of `Token`
|
32
|
+
objects. The most important attributes of each token are `word`, `position` and
|
33
|
+
`correct`.
|
34
|
+
|
35
|
+
You can use the token list with one of the built-in printers to highlight
|
36
|
+
spelling mistakes in the original text, or use the information contained in the
|
37
|
+
token objects to do your own highlighting.
|
38
|
+
|
39
|
+
### Filters
|
40
|
+
|
41
|
+
Filters are used to filter out some parts of the original text before spell
|
42
|
+
checking, e.g. URLs or formatting. They are simple Ruby classes that implement
|
43
|
+
a `filter` method, which just takes a string and returns a string. A filter must
|
44
|
+
replace the filtered out bits by space characters. Otherwise the positions in
|
45
|
+
the tokens returned by check don't match the original text.
|
46
|
+
|
47
|
+
Worte ships with two filters. The first is `Worte::Filter::URL`, which simply
|
48
|
+
strips all URLs from the text. The other one is `Worte::Filter::Markdown`, which
|
49
|
+
filters out Markdown formatting. The Markdown filter is still very much a work
|
50
|
+
in progress!
|
51
|
+
|
52
|
+
### Printing
|
53
|
+
|
54
|
+
There are two printers included with worte: `Worte::Printer::Simple` and
|
55
|
+
`Worte::Printer::Colorized`. The simple printer prints out all lines containing
|
56
|
+
at least one spelling mistake and marks the mistake in the line below. Like
|
57
|
+
this:
|
58
|
+
|
59
|
+
```
|
60
|
+
1: A sipmle spelling mistake.
|
61
|
+
^^^^^^
|
62
|
+
```
|
63
|
+
|
64
|
+
The colorized variant prints out the markings in red.
|
65
|
+
|
66
|
+
## Example
|
67
|
+
|
68
|
+
```ruby
|
69
|
+
require 'worte'
|
70
|
+
|
71
|
+
text = "A sipmle spelling mistake"
|
72
|
+
|
73
|
+
tokens = Worte::Worte.new('en').check(text)
|
74
|
+
Worte::Printer::Simple.print(text, tokens)
|
75
|
+
|
76
|
+
# Output:
|
77
|
+
# 1: A sipmle spelling mistake
|
78
|
+
# ^^^^^^
|
79
|
+
```
|
80
|
+
|
81
|
+
## License
|
82
|
+
|
83
|
+
Worte is licensed under the MIT license. See the `LICENSE` file for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# These are shamelessly copied from https://github.com/jnunemaker/flipper
|
2
|
+
|
3
|
+
$LOAD_PATH.push File.expand_path("../lib", __FILE__)
|
4
|
+
require "worte/version"
|
5
|
+
|
6
|
+
desc 'Build gem into the pkg directory'
|
7
|
+
task :build do
|
8
|
+
FileUtils.rm_rf('pkg')
|
9
|
+
system "gem build worte.gemspec"
|
10
|
+
FileUtils.mkdir_p('pkg')
|
11
|
+
FileUtils.mv(Dir['*.gem'], 'pkg')
|
12
|
+
end
|
13
|
+
|
14
|
+
desc 'Tags version, pushes to remote, and pushes gem'
|
15
|
+
task :release => :build do
|
16
|
+
sh 'git', 'tag', "v#{Worte::VERSION}"
|
17
|
+
sh "git push origin master"
|
18
|
+
sh "git push origin v#{Worte::VERSION}"
|
19
|
+
sh "ls pkg/*.gem | xargs -n 1 gem push"
|
20
|
+
end
|
21
|
+
|
22
|
+
require "rspec/core/rake_task"
|
23
|
+
RSpec::Core::RakeTask.new(:spec) do |t|
|
24
|
+
t.rspec_opts = %w(--color)
|
25
|
+
end
|
26
|
+
|
27
|
+
task :default => :spec
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Worte
|
2
|
+
module Filter
|
3
|
+
class Markdown
|
4
|
+
def filter(str)
|
5
|
+
str = str.gsub(/^([ \t]*[#]+)/) { |c| ' ' * c.length }
|
6
|
+
str = str.gsub(/\*\*(.*?)\*\*/) { |c| ' ' + c[2..-3] + ' ' }
|
7
|
+
str = str.gsub(/\*(.*?)\*/) { |c| ' ' + c[1..-2] + ' ' }
|
8
|
+
str = str.gsub(/[!]?\[.*\]\(.*\)/) { |c| ' ' * c.length }
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module Worte
|
4
|
+
module Printer
|
5
|
+
class Simple
|
6
|
+
def print(text, tokens, where=STDOUT)
|
7
|
+
text.split("\n").each_with_index do |line, l|
|
8
|
+
line_tokens = tokens.select { |t| t.position[0] == l }
|
9
|
+
if line_tokens.collect(&:correct).all?
|
10
|
+
next # Do not print lines without errors
|
11
|
+
end
|
12
|
+
where.print "#{l + 1}: "
|
13
|
+
where.puts line
|
14
|
+
col = 0
|
15
|
+
where.print ' ' * ((l + 1).to_s.length + 2)
|
16
|
+
line_tokens.each do |token|
|
17
|
+
if !token.correct
|
18
|
+
if col != token.position[1]
|
19
|
+
where.print (' ' * (token.position[1] - col))
|
20
|
+
col = token.position[1]
|
21
|
+
end
|
22
|
+
where.print error_marker(token.word.length)
|
23
|
+
col += token.word.length
|
24
|
+
end
|
25
|
+
end
|
26
|
+
where.puts
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def error_marker(length)
|
31
|
+
'^' * length
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/lib/worte/token.rb
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Inspired by tokenizer (https://github.com/arbox/tokenizer)
|
4
|
+
|
5
|
+
FS = Regexp.new('[[:blank:]]+')
|
6
|
+
|
7
|
+
BREAK = ['(', ')', '[', ']', '<', '>', '!', '?', ',', '.', ';', ':', '.', '"', "'"]
|
8
|
+
|
9
|
+
module Worte
|
10
|
+
class Tokenizer
|
11
|
+
def tokenize(str)
|
12
|
+
reset
|
13
|
+
|
14
|
+
str.split("\n").each_with_index do |line, l|
|
15
|
+
@start_word = -1
|
16
|
+
@buf_word = ''
|
17
|
+
@last_token = nil
|
18
|
+
|
19
|
+
line.each_char.with_index do |ch, c|
|
20
|
+
case
|
21
|
+
when FS.match(ch) || BREAK.include?(ch)
|
22
|
+
add_token_from_buffer(l)
|
23
|
+
else
|
24
|
+
if @start_word == -1
|
25
|
+
@start_word = c
|
26
|
+
end
|
27
|
+
@buf_word += ch
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
add_token_from_buffer(l)
|
32
|
+
|
33
|
+
if !@last_token
|
34
|
+
# Add a dummy token that will get the newline flag
|
35
|
+
@last_token = Token.new('', [l, 0])
|
36
|
+
@tokens << @last_token
|
37
|
+
end
|
38
|
+
|
39
|
+
@last_token.newline = true
|
40
|
+
end
|
41
|
+
|
42
|
+
@tokens
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def reset
|
48
|
+
@tokens = []
|
49
|
+
@start_word = -1
|
50
|
+
@buf_word = ''
|
51
|
+
@last_token = nil
|
52
|
+
end
|
53
|
+
|
54
|
+
def add_token_from_buffer(line)
|
55
|
+
if @buf_word.length > 0
|
56
|
+
@last_token = Token.new(@buf_word, [line, @start_word])
|
57
|
+
@tokens << @last_token
|
58
|
+
@buf_word = ''
|
59
|
+
@start_word = -1
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/lib/worte/worte.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'ffi/aspell'
|
4
|
+
|
5
|
+
module Worte
|
6
|
+
class Worte
|
7
|
+
def initialize(language, options={})
|
8
|
+
@language = language
|
9
|
+
|
10
|
+
@filters = options.delete(:filters) || []
|
11
|
+
single_filter = options.delete(:filter)
|
12
|
+
@filters << single_filter if single_filter
|
13
|
+
|
14
|
+
@options = options
|
15
|
+
end
|
16
|
+
|
17
|
+
def check(str)
|
18
|
+
@filters.each do |filter|
|
19
|
+
str = filter.filter(str)
|
20
|
+
end
|
21
|
+
tokens = Tokenizer.new.tokenize(str)
|
22
|
+
FFI::Aspell::Speller.open(@language, @options) do |speller|
|
23
|
+
tokens.each do |token|
|
24
|
+
token.correct = speller.correct?(token.word)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
tokens
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/worte.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Worte::Printer::Colorized do
|
4
|
+
it "marks incorrect tokens with red marker" do
|
5
|
+
pr = Worte::Printer::Colorized.new
|
6
|
+
o = StringIO.new
|
7
|
+
sample = "This is a simple test"
|
8
|
+
tokens = Worte::Tokenizer.new.tokenize(sample)
|
9
|
+
tokens[3].correct = false
|
10
|
+
pr.print(sample, tokens, o)
|
11
|
+
expect(o.string.split("\n")[1]).to start_with(" \e[31m^^^^^^\e[0m")
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Worte::Printer::Simple do
|
4
|
+
TEXT = <<-EOF
|
5
|
+
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aenean vitae mi neque. Sed tempus libero placerat tellus
|
6
|
+
scelerisque condimentum. Nunc augue orci, aliquam sit amet lacus sit amet, volutpat molestie tellus. In eu egestas erat, et
|
7
|
+
fringilla orci. Suspendisse est ipsum, scelerisque varius porta a, ornare non dolor. Sed placerat ex risus. Donec posuere
|
8
|
+
fringilla ornare. Aliquam suscipit gravida ex sit amet rhoncus. Vivamus fringilla dui ut dui fermentum, eu volutpat justo
|
9
|
+
vulputate. Sed ut semper ipsum. Pellentesque varius tempus congue. Duis ullamcorper gravida velit non rhoncus. Nulla
|
10
|
+
facilisi. In vel placerat mauris, semper venenatis metus. Sed efficitur, metus eu efficitur placerat, lacus diam pharetra
|
11
|
+
lectus, a accumsan ante mi ut neque. Integer vitae posuere nisl.
|
12
|
+
EOF
|
13
|
+
|
14
|
+
it "skips lines without incorrect tokens" do
|
15
|
+
pr = Worte::Printer::Simple.new
|
16
|
+
o = StringIO.new
|
17
|
+
tokens = Worte::Tokenizer.new.tokenize(TEXT)
|
18
|
+
pr.print(TEXT, tokens, o)
|
19
|
+
expect(o.string.length).to eq(0)
|
20
|
+
end
|
21
|
+
|
22
|
+
it "prints line numbers" do
|
23
|
+
pr = Worte::Printer::Simple.new
|
24
|
+
o = StringIO.new
|
25
|
+
tokens = Worte::Tokenizer.new.tokenize(TEXT)
|
26
|
+
tokens[40].correct = false
|
27
|
+
pr.print(TEXT, tokens, o)
|
28
|
+
expect(o.string).to start_with('3: ')
|
29
|
+
end
|
30
|
+
|
31
|
+
it "marks incorrect words" do
|
32
|
+
pr = Worte::Printer::Simple.new
|
33
|
+
o = StringIO.new
|
34
|
+
sample = "This is a simple test"
|
35
|
+
tokens = Worte::Tokenizer.new.tokenize(sample)
|
36
|
+
tokens[3].correct = false
|
37
|
+
pr.print(sample, tokens, o)
|
38
|
+
expect(o.string.split("\n")[1]).to start_with(' ^^^^^^')
|
39
|
+
end
|
40
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
2
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
3
|
+
# The generated `.rspec` file contains `--require spec_helper` which will cause
|
4
|
+
# this file to always be loaded, without a need to explicitly require it in any
|
5
|
+
# files.
|
6
|
+
#
|
7
|
+
# Given that it is always loaded, you are encouraged to keep this file as
|
8
|
+
# light-weight as possible. Requiring heavyweight dependencies from this file
|
9
|
+
# will add to the boot time of your test suite on EVERY test run, even for an
|
10
|
+
# individual file that may not need all of that loaded. Instead, consider making
|
11
|
+
# a separate helper file that requires the additional dependencies and performs
|
12
|
+
# the additional setup, and require it from the spec files that actually need
|
13
|
+
# it.
|
14
|
+
#
|
15
|
+
# The `.rspec` file also contains a few flags that are not defaults but that
|
16
|
+
# users commonly want.
|
17
|
+
#
|
18
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
19
|
+
|
20
|
+
$:.unshift(File.expand_path('../../lib', __FILE__))
|
21
|
+
|
22
|
+
require 'worte'
|
23
|
+
|
24
|
+
RSpec.configure do |config|
|
25
|
+
# rspec-expectations config goes here. You can use an alternate
|
26
|
+
# assertion/expectation library such as wrong or the stdlib/minitest
|
27
|
+
# assertions if you prefer.
|
28
|
+
config.expect_with :rspec do |expectations|
|
29
|
+
# This option will default to `true` in RSpec 4. It makes the `description`
|
30
|
+
# and `failure_message` of custom matchers include text for helper methods
|
31
|
+
# defined using `chain`, e.g.:
|
32
|
+
# be_bigger_than(2).and_smaller_than(4).description
|
33
|
+
# # => "be bigger than 2 and smaller than 4"
|
34
|
+
# ...rather than:
|
35
|
+
# # => "be bigger than 2"
|
36
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
37
|
+
end
|
38
|
+
|
39
|
+
# rspec-mocks config goes here. You can use an alternate test double
|
40
|
+
# library (such as bogus or mocha) by changing the `mock_with` option here.
|
41
|
+
config.mock_with :rspec do |mocks|
|
42
|
+
# Prevents you from mocking or stubbing a method that does not exist on
|
43
|
+
# a real object. This is generally recommended, and will default to
|
44
|
+
# `true` in RSpec 4.
|
45
|
+
mocks.verify_partial_doubles = true
|
46
|
+
end
|
47
|
+
|
48
|
+
# The settings below are suggested to provide a good initial experience
|
49
|
+
# with RSpec, but feel free to customize to your heart's content.
|
50
|
+
=begin
|
51
|
+
# These two settings work together to allow you to limit a spec run
|
52
|
+
# to individual examples or groups you care about by tagging them with
|
53
|
+
# `:focus` metadata. When nothing is tagged with `:focus`, all examples
|
54
|
+
# get run.
|
55
|
+
config.filter_run :focus
|
56
|
+
config.run_all_when_everything_filtered = true
|
57
|
+
|
58
|
+
# Allows RSpec to persist some state between runs in order to support
|
59
|
+
# the `--only-failures` and `--next-failure` CLI options. We recommend
|
60
|
+
# you configure your source control system to ignore this file.
|
61
|
+
config.example_status_persistence_file_path = "spec/examples.txt"
|
62
|
+
|
63
|
+
# Limits the available syntax to the non-monkey patched syntax that is
|
64
|
+
# recommended. For more details, see:
|
65
|
+
# - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
|
66
|
+
# - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
|
67
|
+
# - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
|
68
|
+
config.disable_monkey_patching!
|
69
|
+
|
70
|
+
# This setting enables warnings. It's recommended, but in some cases may
|
71
|
+
# be too noisy due to issues in dependencies.
|
72
|
+
config.warnings = true
|
73
|
+
|
74
|
+
# Many RSpec users commonly either run the entire suite or an individual
|
75
|
+
# file, and it's useful to allow more verbose output when running an
|
76
|
+
# individual spec file.
|
77
|
+
if config.files_to_run.one?
|
78
|
+
# Use the documentation formatter for detailed output,
|
79
|
+
# unless a formatter has already been configured
|
80
|
+
# (e.g. via a command-line flag).
|
81
|
+
config.default_formatter = 'doc'
|
82
|
+
end
|
83
|
+
|
84
|
+
# Print the 10 slowest examples and example groups at the
|
85
|
+
# end of the spec run, to help surface which specs are running
|
86
|
+
# particularly slow.
|
87
|
+
config.profile_examples = 10
|
88
|
+
|
89
|
+
# Run specs in random order to surface order dependencies. If you find an
|
90
|
+
# order dependency and want to debug it, you can fix the order by providing
|
91
|
+
# the seed, which is printed after each run.
|
92
|
+
# --seed 1234
|
93
|
+
config.order = :random
|
94
|
+
|
95
|
+
# Seed global randomization in this process using the `--seed` CLI option.
|
96
|
+
# Setting this allows you to use `--seed` to deterministically reproduce
|
97
|
+
# test failures related to randomization by passing the same `--seed` value
|
98
|
+
# as the one that triggered the failure.
|
99
|
+
Kernel.srand config.seed
|
100
|
+
=end
|
101
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Worte::Tokenizer do
|
4
|
+
it "sets newline flag on last token in line" do
|
5
|
+
t = Worte::Tokenizer.new
|
6
|
+
token = t.tokenize("test\n").first
|
7
|
+
expect(token.newline).to be(true)
|
8
|
+
end
|
9
|
+
|
10
|
+
it "sets all tokens correct after tokenization" do
|
11
|
+
t = Worte::Tokenizer.new
|
12
|
+
tokens = t.tokenize("test test2 test3")
|
13
|
+
expect(tokens.collect(&:correct).all?).to be(true)
|
14
|
+
end
|
15
|
+
|
16
|
+
it "removes parens and brackets" do
|
17
|
+
t = Worte::Tokenizer.new
|
18
|
+
tokens = t.tokenize("(test1) [test2] <test3>")
|
19
|
+
expect(tokens.length).to eq(3)
|
20
|
+
expect(tokens[0].word).to eq('test1')
|
21
|
+
expect(tokens[1].word).to eq('test2')
|
22
|
+
expect(tokens[2].word).to eq('test3')
|
23
|
+
end
|
24
|
+
|
25
|
+
it "breaks on punctuation" do
|
26
|
+
t = Worte::Tokenizer.new
|
27
|
+
tokens = t.tokenize("Ich, ging! Nach. Hause? Nicht: so; schnell")
|
28
|
+
expect(tokens.length).to eq(7)
|
29
|
+
%w(Ich ging Nach Hause Nicht so schnell).each_with_index do |e, i|
|
30
|
+
expect(tokens[i].word).to eq(e)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
it "breaks on quotes" do
|
35
|
+
t = Worte::Tokenizer.new
|
36
|
+
tokens = t.tokenize("'Hello' sweet \"World\"")
|
37
|
+
expect(tokens.length).to eq(3)
|
38
|
+
%w(Hello sweet World).each_with_index do |e, i|
|
39
|
+
expect(tokens[i].word).to eq(e)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
it "stores position in token" do
|
44
|
+
t = Worte::Tokenizer.new
|
45
|
+
tokens = t.tokenize("a bcd \n efg ")
|
46
|
+
expect(tokens[0].position).to eq([0, 0])
|
47
|
+
expect(tokens[1].position).to eq([0, 2])
|
48
|
+
expect(tokens[2].position).to eq([1, 1])
|
49
|
+
end
|
50
|
+
end
|
data/worte.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require File.expand_path('../lib/worte/version', __FILE__)
|
2
|
+
|
3
|
+
ignored_files = [".travis.yml", ".gitignore"]
|
4
|
+
ignored_files.uniq!
|
5
|
+
|
6
|
+
ignored_test_files = []
|
7
|
+
|
8
|
+
Gem::Specification.new do |gem|
|
9
|
+
gem.authors = ["Jonas Oberschweiber"]
|
10
|
+
gem.email = ["jonas@oberschweiber.com"]
|
11
|
+
gem.summary = %q{Simple spell checker based on ffi-aspell}
|
12
|
+
gem.description = %q{A simple spell checker based on ffi-aspell. Tokenizes texts, checks them and prints them out.}
|
13
|
+
gem.homepage = "https://github.com/jonasoberschweiber/worte"
|
14
|
+
gem.license = "MIT"
|
15
|
+
|
16
|
+
gem.files = `git ls-files`.split("\n") - ignored_files + ["lib/worte/version.rb"]
|
17
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n") - ignored_test_files
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
gem.name = "worte"
|
20
|
+
gem.version = Worte::VERSION
|
21
|
+
|
22
|
+
gem.add_runtime_dependency "ffi-aspell", ["~> 1.1.0"]
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: worte
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jonas Oberschweiber
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-10-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: ffi-aspell
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.1.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.1.0
|
27
|
+
description: A simple spell checker based on ffi-aspell. Tokenizes texts, checks them
|
28
|
+
and prints them out.
|
29
|
+
email:
|
30
|
+
- jonas@oberschweiber.com
|
31
|
+
executables: []
|
32
|
+
extensions: []
|
33
|
+
extra_rdoc_files: []
|
34
|
+
files:
|
35
|
+
- ".rspec"
|
36
|
+
- Gemfile
|
37
|
+
- Gemfile.lock
|
38
|
+
- LICENSE
|
39
|
+
- README.md
|
40
|
+
- Rakefile
|
41
|
+
- lib/worte.rb
|
42
|
+
- lib/worte/filter/markdown.rb
|
43
|
+
- lib/worte/filter/url.rb
|
44
|
+
- lib/worte/printer/colorized.rb
|
45
|
+
- lib/worte/printer/simple.rb
|
46
|
+
- lib/worte/token.rb
|
47
|
+
- lib/worte/tokenizer.rb
|
48
|
+
- lib/worte/version.rb
|
49
|
+
- lib/worte/worte.rb
|
50
|
+
- spec/colorized_printer_spec.rb
|
51
|
+
- spec/simple_printer_spec.rb
|
52
|
+
- spec/spec_helper.rb
|
53
|
+
- spec/tokenizer_spec.rb
|
54
|
+
- spec/url_filter_spec.rb
|
55
|
+
- worte.gemspec
|
56
|
+
homepage: https://github.com/jonasoberschweiber/worte
|
57
|
+
licenses:
|
58
|
+
- MIT
|
59
|
+
metadata: {}
|
60
|
+
post_install_message:
|
61
|
+
rdoc_options: []
|
62
|
+
require_paths:
|
63
|
+
- lib
|
64
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0'
|
74
|
+
requirements: []
|
75
|
+
rubyforge_project:
|
76
|
+
rubygems_version: 2.2.2
|
77
|
+
signing_key:
|
78
|
+
specification_version: 4
|
79
|
+
summary: Simple spell checker based on ffi-aspell
|
80
|
+
test_files:
|
81
|
+
- spec/colorized_printer_spec.rb
|
82
|
+
- spec/simple_printer_spec.rb
|
83
|
+
- spec/spec_helper.rb
|
84
|
+
- spec/tokenizer_spec.rb
|
85
|
+
- spec/url_filter_spec.rb
|