lexical_units 0.0.9 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/.travis.yml +3 -0
- data/Gemfile +2 -1
- data/README.md +2 -1
- data/lib/lexical_units/sentences.rb +8 -8
- data/lib/lexical_units/version.rb +1 -1
- data/lib/lexical_units/words.rb +7 -12
- data/lib/lexical_units/words_without_digits.rb +6 -6
- data/spec/lexical_units/sentences_spec.rb +12 -6
- data/spec/lexical_units/string_spec.rb +4 -4
- data/spec/lexical_units/syllables_spec.rb +1 -1
- data/spec/lexical_units/words_and_sentences_spec.rb +2 -2
- data/spec/lexical_units/words_spec.rb +19 -19
- data/spec/lexical_units/words_without_digits_spec.rb +2 -2
- data/spec/spec_helper.rb +2 -1
- metadata +14 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ace74edd4ee847a562b6be2d9c995e47ae98f11
|
4
|
+
data.tar.gz: 2263aa046d3ea8a247f5fac9352b43a000c35b35
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a48795c224d60c44641462f21eb23ecccca52f85c05365c46bd382914167fb7039267702b2d82a4827d58fa19bb7042e4d4f94b3332041dc01e3d49d92bfb14
|
7
|
+
data.tar.gz: 31f3ece39c3062d320822a70172088c3a5e72cf223a0b615ba8cc16f1ddd3967fec603366e81681ca7d551ee226834f305a7fb21cf2ad34eb7705591ff2b8421
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.1.2
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# LexicalUnits [![Gem Version](https://badge.fury.io/rb/lexical_units.png)](http://badge.fury.io/rb/lexical_units) [![Build Status](https://travis-ci.org/fractalsoft/lexical_units.png)](https://travis-ci.org/fractalsoft/lexical_units) [![Dependency Status](https://gemnasium.com/fractalsoft/lexical_units.png)](https://gemnasium.com/fractalsoft/lexical_units) [![Coverage Status](https://coveralls.io/repos/fractalsoft/lexical_units/badge.png)](https://coveralls.io/r/fractalsoft/lexical_units) [![Stories in Ready](https://badge.waffle.io/fractalsoft/lexical_units.png)](http://waffle.io/fractalsoft/lexical_units)
|
1
|
+
# LexicalUnits [![Gem Version](https://badge.fury.io/rb/lexical_units.png)](http://badge.fury.io/rb/lexical_units) [![Build Status](https://travis-ci.org/fractalsoft/lexical_units.png)](https://travis-ci.org/fractalsoft/lexical_units) [![Dependency Status](https://gemnasium.com/fractalsoft/lexical_units.png)](https://gemnasium.com/fractalsoft/lexical_units) [![Coverage Status](https://coveralls.io/repos/fractalsoft/lexical_units/badge.png)](https://coveralls.io/r/fractalsoft/lexical_units) [![Code Climate](https://codeclimate.com/repos/52823cc489af7e26300298a5/badges/3783c14343942947903f/gpa.png)](https://codeclimate.com/repos/52823cc489af7e26300298a5/feed) [![Stories in Ready](https://badge.waffle.io/fractalsoft/lexical_units.png)](http://waffle.io/fractalsoft/lexical_units)
|
2
2
|
|
3
3
|
[![endorse](https://api.coderwall.com/torrocus/endorsecount.png)](https://coderwall.com/torrocus)
|
4
4
|
|
@@ -23,6 +23,7 @@ Or install it yourself as:
|
|
23
23
|
```ruby
|
24
24
|
LexicalUnits::words(text)
|
25
25
|
LexicalUnits::sentences(text)
|
26
|
+
LexicalUnits::words_and_sentences(text)
|
26
27
|
LexicalUnits::words_without_digits(text)
|
27
28
|
```
|
28
29
|
|
@@ -8,16 +8,16 @@ module LexicalUnits
|
|
8
8
|
# self.words('Lorem! Ipsum dolor?') #=> ['Lorem!', 'Ipsum dolor?']
|
9
9
|
def self.sentences(text)
|
10
10
|
separators = LexicalUnits.sentence_separators
|
11
|
-
regexp = Regexp.new("[^#{separators}]+[#{separators}]{
|
11
|
+
regexp = Regexp.new("[^#{separators}]+[#{separators}]{0,3}")
|
12
12
|
text.scan(regexp).map(&:strip)
|
13
13
|
end
|
14
14
|
|
15
15
|
private
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end
|
17
|
+
def self.sentence_separators
|
18
|
+
[
|
19
|
+
'\.', '\?', '\!',
|
20
|
+
'‽'
|
21
|
+
].join
|
22
|
+
end
|
23
|
+
end
|
data/lib/lexical_units/words.rb
CHANGED
@@ -13,16 +13,11 @@ module LexicalUnits
|
|
13
13
|
|
14
14
|
private
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
"\»", "\«",
|
24
|
-
'\@', '\#',
|
25
|
-
'\+'
|
26
|
-
].join
|
27
|
-
end
|
16
|
+
def self.separators
|
17
|
+
[
|
18
|
+
'\,', '\:', '\;', '\.', '\?', '\!', '\/',
|
19
|
+
'\(', '\)', '\[', '\]', '\>', '\<', '\{', '\}',
|
20
|
+
'\|', '\~', "\¿", "\¡", '\=', '\"', "\»", "\«", '\@', '\#', '\+'
|
21
|
+
].join
|
22
|
+
end
|
28
23
|
end
|
@@ -12,10 +12,10 @@ module LexicalUnits
|
|
12
12
|
|
13
13
|
private
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
15
|
+
def self.numeric?(value)
|
16
|
+
return true if value =~ /^\d+$/
|
17
|
+
true if Float(value)
|
18
|
+
rescue
|
19
|
+
false
|
20
|
+
end
|
21
21
|
end
|
@@ -4,8 +4,8 @@ require 'spec_helper'
|
|
4
4
|
describe LexicalUnits do
|
5
5
|
context '.sentences' do
|
6
6
|
it 'splits text into sentences' do
|
7
|
-
text = %q
|
8
|
-
Fusce ut lacinia lorem. Nullam a sem quam. Duis faucibus tortor in.
|
7
|
+
text = %q(Lorem ipsum dolor sit amet. Consectetur adipiscing elit.
|
8
|
+
Fusce ut lacinia lorem. Nullam a sem quam. Duis faucibus tortor in.)
|
9
9
|
array = [
|
10
10
|
'Lorem ipsum dolor sit amet.',
|
11
11
|
'Consectetur adipiscing elit.',
|
@@ -14,7 +14,7 @@ describe LexicalUnits do
|
|
14
14
|
'Duis faucibus tortor in.'
|
15
15
|
]
|
16
16
|
|
17
|
-
subject.sentences(text).
|
17
|
+
expect(subject.sentences(text)).to eq(array)
|
18
18
|
end
|
19
19
|
|
20
20
|
it 'splits text with question mark and exclamation mark into sentences' do
|
@@ -25,7 +25,7 @@ describe LexicalUnits do
|
|
25
25
|
'Consectetur adipiscing elit.'
|
26
26
|
]
|
27
27
|
|
28
|
-
subject.sentences(text).
|
28
|
+
expect(subject.sentences(text)).to eq(array)
|
29
29
|
end
|
30
30
|
|
31
31
|
it 'splits text with ellipsis into sentences' do
|
@@ -35,7 +35,7 @@ describe LexicalUnits do
|
|
35
35
|
'Consectetur adipiscing elit.'
|
36
36
|
]
|
37
37
|
|
38
|
-
subject.sentences(text).
|
38
|
+
expect(subject.sentences(text)).to eq(array)
|
39
39
|
end
|
40
40
|
|
41
41
|
it 'splits text with interrobangs into sentences' do
|
@@ -47,8 +47,14 @@ describe LexicalUnits do
|
|
47
47
|
'Really?'
|
48
48
|
]
|
49
49
|
|
50
|
-
subject.sentences(text).
|
50
|
+
expect(subject.sentences(text)).to eq(array)
|
51
51
|
end
|
52
52
|
|
53
|
+
it 'splits text into sentence without end-dot' do
|
54
|
+
text = 'Lorem ipsum dolor'
|
55
|
+
array = [text]
|
56
|
+
|
57
|
+
expect(subject.sentences(text)).to eq(array)
|
58
|
+
end
|
53
59
|
end
|
54
60
|
end
|
@@ -12,7 +12,7 @@ describe LexicalUnits::String do
|
|
12
12
|
array = %w(Lorem ipsum dolor sit amet)
|
13
13
|
string = array.join(' ')
|
14
14
|
|
15
|
-
string.words.
|
15
|
+
expect(string.words).to eq(array)
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
@@ -21,7 +21,7 @@ describe LexicalUnits::String do
|
|
21
21
|
array = ['Lorem ipsum!', 'Dolor sit?', 'Amet.']
|
22
22
|
string = array.join
|
23
23
|
|
24
|
-
string.sentences.
|
24
|
+
expect(string.sentences).to eq(array)
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
@@ -32,7 +32,7 @@ describe LexicalUnits::String do
|
|
32
32
|
sentence.join(' ')
|
33
33
|
end.join('. ') + '.'
|
34
34
|
|
35
|
-
string.words_and_sentences.
|
35
|
+
expect(string.words_and_sentences).to eq(array)
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
@@ -41,7 +41,7 @@ describe LexicalUnits::String do
|
|
41
41
|
array = %w(Lorem ipsum dolor sit amet)
|
42
42
|
string = 'Lorem 1 ipsum 23 dolor 456 sit 7890 amet'
|
43
43
|
|
44
|
-
string.words_without_digits.
|
44
|
+
expect(string.words_without_digits).to eq(array)
|
45
45
|
end
|
46
46
|
end
|
47
47
|
end
|
@@ -10,118 +10,118 @@ describe LexicalUnits do
|
|
10
10
|
it 'splits text with whitespaces into words' do
|
11
11
|
text = 'Lorem ipsum dolor sit amet'
|
12
12
|
|
13
|
-
subject.words(text).
|
13
|
+
expect(subject.words(text)).to eq(@array)
|
14
14
|
end
|
15
15
|
|
16
16
|
it 'splits text with comma, colon and semicolon into words' do
|
17
17
|
text = 'Lorem ipsum,dolor:sit;amet'
|
18
18
|
|
19
|
-
subject.words(text).
|
19
|
+
expect(subject.words(text)).to eq(@array)
|
20
20
|
end
|
21
21
|
|
22
22
|
it 'splits text with dot, question mark and exclamation mark into words' do
|
23
23
|
text = 'Lorem ipsum.dolor?sit!amet'
|
24
24
|
|
25
|
-
subject.words(text).
|
25
|
+
expect(subject.words(text)).to eq(@array)
|
26
26
|
end
|
27
27
|
|
28
28
|
it 'splits other text with whitespaces, comma and dot into words' do
|
29
29
|
text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.'
|
30
30
|
array = %w(Lorem ipsum dolor sit amet consectetur adipiscing elit)
|
31
31
|
|
32
|
-
subject.words(text).
|
32
|
+
expect(subject.words(text)).to eq(array)
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'no splits text with hyphen into words' do
|
36
36
|
text = 'Lorem ipsum dolor-sit amet'
|
37
37
|
array = %w(Lorem ipsum dolor-sit amet)
|
38
38
|
|
39
|
-
subject.words(text).
|
39
|
+
expect(subject.words(text)).to eq(array)
|
40
40
|
end
|
41
41
|
|
42
42
|
it 'splits text with slash into words' do
|
43
43
|
text = 'Lorem ipsum dolor sit/amet'
|
44
44
|
|
45
|
-
subject.words(text).
|
45
|
+
expect(subject.words(text)).to eq(@array)
|
46
46
|
end
|
47
47
|
|
48
48
|
it 'splits text with round brackets into words' do
|
49
49
|
text = 'Lorem ipsum(dolor sit)amet'
|
50
50
|
|
51
|
-
subject.words(text).
|
51
|
+
expect(subject.words(text)).to eq(@array)
|
52
52
|
end
|
53
53
|
|
54
54
|
it 'splits text with square brackets into words' do
|
55
55
|
text = 'Lorem ipsum dolor[sit]amet'
|
56
56
|
|
57
|
-
subject.words(text).
|
57
|
+
expect(subject.words(text)).to eq(@array)
|
58
58
|
end
|
59
59
|
|
60
60
|
it 'splits text with pointy brackets into words' do
|
61
61
|
text = 'Lorem<ipsum dolor sit>amet'
|
62
62
|
|
63
|
-
subject.words(text).
|
63
|
+
expect(subject.words(text)).to eq(@array)
|
64
64
|
end
|
65
65
|
|
66
66
|
it 'splits text with braces into words' do
|
67
67
|
text = 'Lorem ipsum{dolor}sit amet'
|
68
68
|
|
69
|
-
subject.words(text).
|
69
|
+
expect(subject.words(text)).to eq(@array)
|
70
70
|
end
|
71
71
|
|
72
72
|
it 'splits text with vertical bar into words' do
|
73
73
|
text = 'Lorem ipsum|dolor sit amet'
|
74
74
|
|
75
|
-
subject.words(text).
|
75
|
+
expect(subject.words(text)).to eq(@array)
|
76
76
|
end
|
77
77
|
|
78
78
|
it 'splits text with tilde into words' do
|
79
79
|
text = 'Lorem ipsum dolor~sit amet'
|
80
80
|
|
81
|
-
subject.words(text).
|
81
|
+
expect(subject.words(text)).to eq(@array)
|
82
82
|
end
|
83
83
|
|
84
84
|
# Spanish
|
85
85
|
it 'splits text with inverted question and exclamation marks into words' do
|
86
86
|
text = 'Lorem¿ipsum?dolor¡sit!amet'
|
87
87
|
|
88
|
-
subject.words(text).
|
88
|
+
expect(subject.words(text)).to eq(@array)
|
89
89
|
end
|
90
90
|
|
91
91
|
it 'splits text with equals sign into words' do
|
92
92
|
text = 'Lorem ipsum=dolor sit amet'
|
93
93
|
|
94
|
-
subject.words(text).
|
94
|
+
expect(subject.words(text)).to eq(@array)
|
95
95
|
end
|
96
96
|
|
97
97
|
it 'splits text with typewriter double quotes into words' do
|
98
98
|
text = %Q(Lorem"ipsum dolor"sit amet)
|
99
99
|
|
100
|
-
subject.words(text).
|
100
|
+
expect(subject.words(text)).to eq(@array)
|
101
101
|
end
|
102
102
|
|
103
103
|
it 'split text with non-English quotation marks into words' do
|
104
104
|
text = %Q(Lorem»ipsum dolor«sit amet)
|
105
105
|
|
106
|
-
subject.words(text).
|
106
|
+
expect(subject.words(text)).to eq(@array)
|
107
107
|
end
|
108
108
|
|
109
109
|
it "split text with 'at sign' (@) into words" do
|
110
110
|
text = %Q(Lorem@ipsum dolor sit amet)
|
111
111
|
|
112
|
-
subject.words(text).
|
112
|
+
expect(subject.words(text)).to eq(@array)
|
113
113
|
end
|
114
114
|
|
115
115
|
it "split text with 'number sign (#) into words" do
|
116
116
|
text = %Q(Lorem ipsum#dolor sit amet)
|
117
117
|
|
118
|
-
subject.words(text).
|
118
|
+
expect(subject.words(text)).to eq(@array)
|
119
119
|
end
|
120
120
|
|
121
121
|
it 'split text with plus (+) into words' do
|
122
122
|
text = %Q(Lorem+ipsum dolor+sit amet)
|
123
123
|
|
124
|
-
subject.words(text).
|
124
|
+
expect(subject.words(text)).to eq(@array)
|
125
125
|
end
|
126
126
|
end
|
127
127
|
end
|
@@ -9,8 +9,8 @@ describe LexicalUnits do
|
|
9
9
|
].each do |hash|
|
10
10
|
text, array = hash.values
|
11
11
|
it 'splits text into words without digits' do
|
12
|
-
subject.words_without_digits(text).
|
12
|
+
expect(subject.words_without_digits(text)).to eq(array)
|
13
13
|
end
|
14
14
|
end
|
15
15
|
end
|
16
|
-
end
|
16
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
require 'lexical_units'
|
2
2
|
require 'coveralls'
|
3
3
|
Coveralls.wear!
|
4
|
+
require 'codeclimate-test-reporter'
|
5
|
+
CodeClimate::TestReporter.start
|
4
6
|
|
5
7
|
RSpec.configure do |config|
|
6
|
-
config.treat_symbols_as_metadata_keys_with_true_values = true
|
7
8
|
config.run_all_when_everything_filtered = true
|
8
9
|
config.filter_run :focus
|
9
10
|
end
|
metadata
CHANGED
@@ -1,41 +1,41 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lexical_units
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksander Malaszkiewicz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.3'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
description:
|
@@ -45,11 +45,11 @@ executables: []
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
-
- .gitignore
|
49
|
-
- .rspec
|
50
|
-
- .ruby-gemset
|
51
|
-
- .ruby-version
|
52
|
-
- .travis.yml
|
48
|
+
- ".gitignore"
|
49
|
+
- ".rspec"
|
50
|
+
- ".ruby-gemset"
|
51
|
+
- ".ruby-version"
|
52
|
+
- ".travis.yml"
|
53
53
|
- CHANGELOG.md
|
54
54
|
- Gemfile
|
55
55
|
- Guardfile
|
@@ -82,17 +82,17 @@ require_paths:
|
|
82
82
|
- lib
|
83
83
|
required_ruby_version: !ruby/object:Gem::Requirement
|
84
84
|
requirements:
|
85
|
-
- -
|
85
|
+
- - ">="
|
86
86
|
- !ruby/object:Gem::Version
|
87
87
|
version: '0'
|
88
88
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
89
|
requirements:
|
90
|
-
- -
|
90
|
+
- - ">="
|
91
91
|
- !ruby/object:Gem::Version
|
92
92
|
version: '0'
|
93
93
|
requirements: []
|
94
94
|
rubyforge_project:
|
95
|
-
rubygems_version: 2.
|
95
|
+
rubygems_version: 2.2.2
|
96
96
|
signing_key:
|
97
97
|
specification_version: 4
|
98
98
|
summary: Split text into lexical units
|