lexical_units 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/.travis.yml +3 -0
- data/Gemfile +2 -1
- data/README.md +2 -1
- data/lib/lexical_units/sentences.rb +8 -8
- data/lib/lexical_units/version.rb +1 -1
- data/lib/lexical_units/words.rb +7 -12
- data/lib/lexical_units/words_without_digits.rb +6 -6
- data/spec/lexical_units/sentences_spec.rb +12 -6
- data/spec/lexical_units/string_spec.rb +4 -4
- data/spec/lexical_units/syllables_spec.rb +1 -1
- data/spec/lexical_units/words_and_sentences_spec.rb +2 -2
- data/spec/lexical_units/words_spec.rb +19 -19
- data/spec/lexical_units/words_without_digits_spec.rb +2 -2
- data/spec/spec_helper.rb +2 -1
- metadata +14 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0ace74edd4ee847a562b6be2d9c995e47ae98f11
|
4
|
+
data.tar.gz: 2263aa046d3ea8a247f5fac9352b43a000c35b35
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a48795c224d60c44641462f21eb23ecccca52f85c05365c46bd382914167fb7039267702b2d82a4827d58fa19bb7042e4d4f94b3332041dc01e3d49d92bfb14
|
7
|
+
data.tar.gz: 31f3ece39c3062d320822a70172088c3a5e72cf223a0b615ba8cc16f1ddd3967fec603366e81681ca7d551ee226834f305a7fb21cf2ad34eb7705591ff2b8421
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.
|
1
|
+
2.1.2
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# LexicalUnits [](http://badge.fury.io/rb/lexical_units) [](https://travis-ci.org/fractalsoft/lexical_units) [](https://gemnasium.com/fractalsoft/lexical_units) [](https://coveralls.io/r/fractalsoft/lexical_units) [](http://waffle.io/fractalsoft/lexical_units)
|
1
|
+
# LexicalUnits [](http://badge.fury.io/rb/lexical_units) [](https://travis-ci.org/fractalsoft/lexical_units) [](https://gemnasium.com/fractalsoft/lexical_units) [](https://coveralls.io/r/fractalsoft/lexical_units) [](https://codeclimate.com/repos/52823cc489af7e26300298a5/feed) [](http://waffle.io/fractalsoft/lexical_units)
|
2
2
|
|
3
3
|
[](https://coderwall.com/torrocus)
|
4
4
|
|
@@ -23,6 +23,7 @@ Or install it yourself as:
|
|
23
23
|
```ruby
|
24
24
|
LexicalUnits::words(text)
|
25
25
|
LexicalUnits::sentences(text)
|
26
|
+
LexicalUnits::words_and_sentences(text)
|
26
27
|
LexicalUnits::words_without_digits(text)
|
27
28
|
```
|
28
29
|
|
@@ -8,16 +8,16 @@ module LexicalUnits
|
|
8
8
|
# self.words('Lorem! Ipsum dolor?') #=> ['Lorem!', 'Ipsum dolor?']
|
9
9
|
def self.sentences(text)
|
10
10
|
separators = LexicalUnits.sentence_separators
|
11
|
-
regexp = Regexp.new("[^#{separators}]+[#{separators}]{
|
11
|
+
regexp = Regexp.new("[^#{separators}]+[#{separators}]{0,3}")
|
12
12
|
text.scan(regexp).map(&:strip)
|
13
13
|
end
|
14
14
|
|
15
15
|
private
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end
|
17
|
+
def self.sentence_separators
|
18
|
+
[
|
19
|
+
'\.', '\?', '\!',
|
20
|
+
'‽'
|
21
|
+
].join
|
22
|
+
end
|
23
|
+
end
|
data/lib/lexical_units/words.rb
CHANGED
@@ -13,16 +13,11 @@ module LexicalUnits
|
|
13
13
|
|
14
14
|
private
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
"\»", "\«",
|
24
|
-
'\@', '\#',
|
25
|
-
'\+'
|
26
|
-
].join
|
27
|
-
end
|
16
|
+
def self.separators
|
17
|
+
[
|
18
|
+
'\,', '\:', '\;', '\.', '\?', '\!', '\/',
|
19
|
+
'\(', '\)', '\[', '\]', '\>', '\<', '\{', '\}',
|
20
|
+
'\|', '\~', "\¿", "\¡", '\=', '\"', "\»", "\«", '\@', '\#', '\+'
|
21
|
+
].join
|
22
|
+
end
|
28
23
|
end
|
@@ -12,10 +12,10 @@ module LexicalUnits
|
|
12
12
|
|
13
13
|
private
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
15
|
+
def self.numeric?(value)
|
16
|
+
return true if value =~ /^\d+$/
|
17
|
+
true if Float(value)
|
18
|
+
rescue
|
19
|
+
false
|
20
|
+
end
|
21
21
|
end
|
@@ -4,8 +4,8 @@ require 'spec_helper'
|
|
4
4
|
describe LexicalUnits do
|
5
5
|
context '.sentences' do
|
6
6
|
it 'splits text into sentences' do
|
7
|
-
text = %q
|
8
|
-
Fusce ut lacinia lorem. Nullam a sem quam. Duis faucibus tortor in.
|
7
|
+
text = %q(Lorem ipsum dolor sit amet. Consectetur adipiscing elit.
|
8
|
+
Fusce ut lacinia lorem. Nullam a sem quam. Duis faucibus tortor in.)
|
9
9
|
array = [
|
10
10
|
'Lorem ipsum dolor sit amet.',
|
11
11
|
'Consectetur adipiscing elit.',
|
@@ -14,7 +14,7 @@ describe LexicalUnits do
|
|
14
14
|
'Duis faucibus tortor in.'
|
15
15
|
]
|
16
16
|
|
17
|
-
subject.sentences(text).
|
17
|
+
expect(subject.sentences(text)).to eq(array)
|
18
18
|
end
|
19
19
|
|
20
20
|
it 'splits text with question mark and exclamation mark into sentences' do
|
@@ -25,7 +25,7 @@ describe LexicalUnits do
|
|
25
25
|
'Consectetur adipiscing elit.'
|
26
26
|
]
|
27
27
|
|
28
|
-
subject.sentences(text).
|
28
|
+
expect(subject.sentences(text)).to eq(array)
|
29
29
|
end
|
30
30
|
|
31
31
|
it 'splits text with ellipsis into sentences' do
|
@@ -35,7 +35,7 @@ describe LexicalUnits do
|
|
35
35
|
'Consectetur adipiscing elit.'
|
36
36
|
]
|
37
37
|
|
38
|
-
subject.sentences(text).
|
38
|
+
expect(subject.sentences(text)).to eq(array)
|
39
39
|
end
|
40
40
|
|
41
41
|
it 'splits text with interrobangs into sentences' do
|
@@ -47,8 +47,14 @@ describe LexicalUnits do
|
|
47
47
|
'Really?'
|
48
48
|
]
|
49
49
|
|
50
|
-
subject.sentences(text).
|
50
|
+
expect(subject.sentences(text)).to eq(array)
|
51
51
|
end
|
52
52
|
|
53
|
+
it 'splits text into sentence without end-dot' do
|
54
|
+
text = 'Lorem ipsum dolor'
|
55
|
+
array = [text]
|
56
|
+
|
57
|
+
expect(subject.sentences(text)).to eq(array)
|
58
|
+
end
|
53
59
|
end
|
54
60
|
end
|
@@ -12,7 +12,7 @@ describe LexicalUnits::String do
|
|
12
12
|
array = %w(Lorem ipsum dolor sit amet)
|
13
13
|
string = array.join(' ')
|
14
14
|
|
15
|
-
string.words.
|
15
|
+
expect(string.words).to eq(array)
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
@@ -21,7 +21,7 @@ describe LexicalUnits::String do
|
|
21
21
|
array = ['Lorem ipsum!', 'Dolor sit?', 'Amet.']
|
22
22
|
string = array.join
|
23
23
|
|
24
|
-
string.sentences.
|
24
|
+
expect(string.sentences).to eq(array)
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
@@ -32,7 +32,7 @@ describe LexicalUnits::String do
|
|
32
32
|
sentence.join(' ')
|
33
33
|
end.join('. ') + '.'
|
34
34
|
|
35
|
-
string.words_and_sentences.
|
35
|
+
expect(string.words_and_sentences).to eq(array)
|
36
36
|
end
|
37
37
|
end
|
38
38
|
|
@@ -41,7 +41,7 @@ describe LexicalUnits::String do
|
|
41
41
|
array = %w(Lorem ipsum dolor sit amet)
|
42
42
|
string = 'Lorem 1 ipsum 23 dolor 456 sit 7890 amet'
|
43
43
|
|
44
|
-
string.words_without_digits.
|
44
|
+
expect(string.words_without_digits).to eq(array)
|
45
45
|
end
|
46
46
|
end
|
47
47
|
end
|
@@ -10,118 +10,118 @@ describe LexicalUnits do
|
|
10
10
|
it 'splits text with whitespaces into words' do
|
11
11
|
text = 'Lorem ipsum dolor sit amet'
|
12
12
|
|
13
|
-
subject.words(text).
|
13
|
+
expect(subject.words(text)).to eq(@array)
|
14
14
|
end
|
15
15
|
|
16
16
|
it 'splits text with comma, colon and semicolon into words' do
|
17
17
|
text = 'Lorem ipsum,dolor:sit;amet'
|
18
18
|
|
19
|
-
subject.words(text).
|
19
|
+
expect(subject.words(text)).to eq(@array)
|
20
20
|
end
|
21
21
|
|
22
22
|
it 'splits text with dot, question mark and exclamation mark into words' do
|
23
23
|
text = 'Lorem ipsum.dolor?sit!amet'
|
24
24
|
|
25
|
-
subject.words(text).
|
25
|
+
expect(subject.words(text)).to eq(@array)
|
26
26
|
end
|
27
27
|
|
28
28
|
it 'splits other text with whitespaces, comma and dot into words' do
|
29
29
|
text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.'
|
30
30
|
array = %w(Lorem ipsum dolor sit amet consectetur adipiscing elit)
|
31
31
|
|
32
|
-
subject.words(text).
|
32
|
+
expect(subject.words(text)).to eq(array)
|
33
33
|
end
|
34
34
|
|
35
35
|
it 'no splits text with hyphen into words' do
|
36
36
|
text = 'Lorem ipsum dolor-sit amet'
|
37
37
|
array = %w(Lorem ipsum dolor-sit amet)
|
38
38
|
|
39
|
-
subject.words(text).
|
39
|
+
expect(subject.words(text)).to eq(array)
|
40
40
|
end
|
41
41
|
|
42
42
|
it 'splits text with slash into words' do
|
43
43
|
text = 'Lorem ipsum dolor sit/amet'
|
44
44
|
|
45
|
-
subject.words(text).
|
45
|
+
expect(subject.words(text)).to eq(@array)
|
46
46
|
end
|
47
47
|
|
48
48
|
it 'splits text with round brackets into words' do
|
49
49
|
text = 'Lorem ipsum(dolor sit)amet'
|
50
50
|
|
51
|
-
subject.words(text).
|
51
|
+
expect(subject.words(text)).to eq(@array)
|
52
52
|
end
|
53
53
|
|
54
54
|
it 'splits text with square brackets into words' do
|
55
55
|
text = 'Lorem ipsum dolor[sit]amet'
|
56
56
|
|
57
|
-
subject.words(text).
|
57
|
+
expect(subject.words(text)).to eq(@array)
|
58
58
|
end
|
59
59
|
|
60
60
|
it 'splits text with pointy brackets into words' do
|
61
61
|
text = 'Lorem<ipsum dolor sit>amet'
|
62
62
|
|
63
|
-
subject.words(text).
|
63
|
+
expect(subject.words(text)).to eq(@array)
|
64
64
|
end
|
65
65
|
|
66
66
|
it 'splits text with braces into words' do
|
67
67
|
text = 'Lorem ipsum{dolor}sit amet'
|
68
68
|
|
69
|
-
subject.words(text).
|
69
|
+
expect(subject.words(text)).to eq(@array)
|
70
70
|
end
|
71
71
|
|
72
72
|
it 'splits text with vertical bar into words' do
|
73
73
|
text = 'Lorem ipsum|dolor sit amet'
|
74
74
|
|
75
|
-
subject.words(text).
|
75
|
+
expect(subject.words(text)).to eq(@array)
|
76
76
|
end
|
77
77
|
|
78
78
|
it 'splits text with tilde into words' do
|
79
79
|
text = 'Lorem ipsum dolor~sit amet'
|
80
80
|
|
81
|
-
subject.words(text).
|
81
|
+
expect(subject.words(text)).to eq(@array)
|
82
82
|
end
|
83
83
|
|
84
84
|
# Spanish
|
85
85
|
it 'splits text with inverted question and exclamation marks into words' do
|
86
86
|
text = 'Lorem¿ipsum?dolor¡sit!amet'
|
87
87
|
|
88
|
-
subject.words(text).
|
88
|
+
expect(subject.words(text)).to eq(@array)
|
89
89
|
end
|
90
90
|
|
91
91
|
it 'splits text with equals sign into words' do
|
92
92
|
text = 'Lorem ipsum=dolor sit amet'
|
93
93
|
|
94
|
-
subject.words(text).
|
94
|
+
expect(subject.words(text)).to eq(@array)
|
95
95
|
end
|
96
96
|
|
97
97
|
it 'splits text with typewriter double quotes into words' do
|
98
98
|
text = %Q(Lorem"ipsum dolor"sit amet)
|
99
99
|
|
100
|
-
subject.words(text).
|
100
|
+
expect(subject.words(text)).to eq(@array)
|
101
101
|
end
|
102
102
|
|
103
103
|
it 'split text with non-English quotation marks into words' do
|
104
104
|
text = %Q(Lorem»ipsum dolor«sit amet)
|
105
105
|
|
106
|
-
subject.words(text).
|
106
|
+
expect(subject.words(text)).to eq(@array)
|
107
107
|
end
|
108
108
|
|
109
109
|
it "split text with 'at sign' (@) into words" do
|
110
110
|
text = %Q(Lorem@ipsum dolor sit amet)
|
111
111
|
|
112
|
-
subject.words(text).
|
112
|
+
expect(subject.words(text)).to eq(@array)
|
113
113
|
end
|
114
114
|
|
115
115
|
it "split text with 'number sign (#) into words" do
|
116
116
|
text = %Q(Lorem ipsum#dolor sit amet)
|
117
117
|
|
118
|
-
subject.words(text).
|
118
|
+
expect(subject.words(text)).to eq(@array)
|
119
119
|
end
|
120
120
|
|
121
121
|
it 'split text with plus (+) into words' do
|
122
122
|
text = %Q(Lorem+ipsum dolor+sit amet)
|
123
123
|
|
124
|
-
subject.words(text).
|
124
|
+
expect(subject.words(text)).to eq(@array)
|
125
125
|
end
|
126
126
|
end
|
127
127
|
end
|
@@ -9,8 +9,8 @@ describe LexicalUnits do
|
|
9
9
|
].each do |hash|
|
10
10
|
text, array = hash.values
|
11
11
|
it 'splits text into words without digits' do
|
12
|
-
subject.words_without_digits(text).
|
12
|
+
expect(subject.words_without_digits(text)).to eq(array)
|
13
13
|
end
|
14
14
|
end
|
15
15
|
end
|
16
|
-
end
|
16
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
require 'lexical_units'
|
2
2
|
require 'coveralls'
|
3
3
|
Coveralls.wear!
|
4
|
+
require 'codeclimate-test-reporter'
|
5
|
+
CodeClimate::TestReporter.start
|
4
6
|
|
5
7
|
RSpec.configure do |config|
|
6
|
-
config.treat_symbols_as_metadata_keys_with_true_values = true
|
7
8
|
config.run_all_when_everything_filtered = true
|
8
9
|
config.filter_run :focus
|
9
10
|
end
|
metadata
CHANGED
@@ -1,41 +1,41 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lexical_units
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksander Malaszkiewicz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - ~>
|
17
|
+
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '1.3'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - ~>
|
24
|
+
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
description:
|
@@ -45,11 +45,11 @@ executables: []
|
|
45
45
|
extensions: []
|
46
46
|
extra_rdoc_files: []
|
47
47
|
files:
|
48
|
-
- .gitignore
|
49
|
-
- .rspec
|
50
|
-
- .ruby-gemset
|
51
|
-
- .ruby-version
|
52
|
-
- .travis.yml
|
48
|
+
- ".gitignore"
|
49
|
+
- ".rspec"
|
50
|
+
- ".ruby-gemset"
|
51
|
+
- ".ruby-version"
|
52
|
+
- ".travis.yml"
|
53
53
|
- CHANGELOG.md
|
54
54
|
- Gemfile
|
55
55
|
- Guardfile
|
@@ -82,17 +82,17 @@ require_paths:
|
|
82
82
|
- lib
|
83
83
|
required_ruby_version: !ruby/object:Gem::Requirement
|
84
84
|
requirements:
|
85
|
-
- -
|
85
|
+
- - ">="
|
86
86
|
- !ruby/object:Gem::Version
|
87
87
|
version: '0'
|
88
88
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
89
|
requirements:
|
90
|
-
- -
|
90
|
+
- - ">="
|
91
91
|
- !ruby/object:Gem::Version
|
92
92
|
version: '0'
|
93
93
|
requirements: []
|
94
94
|
rubyforge_project:
|
95
|
-
rubygems_version: 2.
|
95
|
+
rubygems_version: 2.2.2
|
96
96
|
signing_key:
|
97
97
|
specification_version: 4
|
98
98
|
summary: Split text into lexical units
|