lexical_units 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +1 -0
- data/lib/lexical_units.rb +1 -0
- data/lib/lexical_units/syllables.rb +5 -0
- data/lib/lexical_units/version.rb +1 -1
- data/lib/lexical_units/words.rb +3 -1
- data/spec/lexical_units/sentences_spec.rb +5 -7
- data/spec/lexical_units/syllables_spec.rb +8 -0
- data/spec/lexical_units/words_spec.rb +34 -21
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb575468ab234bcb093fc2afef714783d6aaf1e6
|
4
|
+
data.tar.gz: 90dc07a58c80092a35eec789d753be608e9af013
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a40c9845569546f2014908aa585bd68ffd1207232aac5eb6870ce435acafb5fb64342721eccb5187ecfc34f9c8bc042f643cf4fc6fce08af17782fe892af5485
|
7
|
+
data.tar.gz: 7d2eaf43d6faa80ec1fe3da6bc6d0f40e22554e2e8e7416906a88636566f1cf4ba6faf09b48234cc23dee1ed1c6f61aa47d3b9f599460aa9862a4af858fbbb7d
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# LexicalUnits [](http://badge.fury.io/rb/lexical_units) [](https://travis-ci.org/fractalsoft/lexical_units) [](https://gemnasium.com/fractalsoft/lexical_units) [](https://coveralls.io/r/fractalsoft/lexical_units)
|
2
|
+
[](https://coderwall.com/torrocus)
|
2
3
|
|
3
4
|
Lexical unit is a single word, a part of a word, or a chain of words that forms the basic elements of a language's lexicon.
|
4
5
|
|
data/lib/lexical_units.rb
CHANGED
data/lib/lexical_units/words.rb
CHANGED
@@ -3,8 +3,6 @@ require 'spec_helper'
|
|
3
3
|
|
4
4
|
describe LexicalUnits do
|
5
5
|
context ".sentences" do
|
6
|
-
let(:klass) { LexicalUnits }
|
7
|
-
|
8
6
|
it "splits text into sentences" do
|
9
7
|
text = %q{Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
10
8
|
Fusce ut lacinia lorem. Nullam a sem quam. Duis faucibus tortor in.}
|
@@ -15,7 +13,7 @@ describe LexicalUnits do
|
|
15
13
|
"Duis faucibus tortor in."
|
16
14
|
]
|
17
15
|
|
18
|
-
|
16
|
+
subject.sentences(text).should eq(array)
|
19
17
|
end
|
20
18
|
|
21
19
|
it "splits text with question mark and exclamation mark into sentences" do
|
@@ -26,7 +24,7 @@ describe LexicalUnits do
|
|
26
24
|
"Consectetur adipiscing elit."
|
27
25
|
]
|
28
26
|
|
29
|
-
|
27
|
+
subject.sentences(text).should eq(array)
|
30
28
|
end
|
31
29
|
|
32
30
|
it "splits text with ellipsis into sentences" do
|
@@ -36,7 +34,7 @@ describe LexicalUnits do
|
|
36
34
|
"Consectetur adipiscing elit."
|
37
35
|
]
|
38
36
|
|
39
|
-
|
37
|
+
subject.sentences(text).should eq(array)
|
40
38
|
end
|
41
39
|
|
42
40
|
it "splits text with interrobangs into sentences" do
|
@@ -48,8 +46,8 @@ describe LexicalUnits do
|
|
48
46
|
"Really?"
|
49
47
|
]
|
50
48
|
|
51
|
-
|
49
|
+
subject.sentences(text).should eq(array)
|
52
50
|
end
|
53
51
|
|
54
52
|
end
|
55
|
-
end
|
53
|
+
end
|
@@ -3,76 +3,89 @@ require 'spec_helper'
|
|
3
3
|
|
4
4
|
describe LexicalUnits do
|
5
5
|
context ".words" do
|
6
|
-
|
6
|
+
before do
|
7
|
+
@array = %w(Lorem ipsum dolor sit amet)
|
8
|
+
end
|
7
9
|
|
8
10
|
it "splits text with whitespaces into words" do
|
9
11
|
text = "Lorem ipsum dolor sit amet"
|
10
|
-
array = %w(Lorem ipsum dolor sit amet)
|
11
12
|
|
12
|
-
|
13
|
+
subject.words(text).should eq(@array)
|
13
14
|
end
|
14
15
|
|
15
16
|
it "splits text with comma, colon and semicolon into words" do
|
16
17
|
text = "Lorem ipsum,dolor:sit;amet"
|
17
|
-
array = %w(Lorem ipsum dolor sit amet)
|
18
18
|
|
19
|
-
|
19
|
+
subject.words(text).should eq(@array)
|
20
20
|
end
|
21
21
|
|
22
22
|
it "splits text with dot, question mark and exclamation mark into words" do
|
23
23
|
text = "Lorem ipsum.dolor?sit!amet"
|
24
|
-
array = %w(Lorem ipsum dolor sit amet)
|
25
24
|
|
26
|
-
|
25
|
+
subject.words(text).should eq(@array)
|
27
26
|
end
|
28
27
|
|
29
28
|
it "splits other text with whitespaces, comma and dot into words" do
|
30
29
|
text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
|
31
30
|
array = %w(Lorem ipsum dolor sit amet consectetur adipiscing elit)
|
32
31
|
|
33
|
-
|
32
|
+
subject.words(text).should eq(array)
|
34
33
|
end
|
35
34
|
|
36
35
|
it "splits text with hyphen into words" do
|
37
36
|
text = "Lorem ipsum dolor-sit amet"
|
38
|
-
array = %w(Lorem ipsum dolor sit amet)
|
39
37
|
|
40
|
-
|
38
|
+
subject.words(text).should eq(@array)
|
41
39
|
end
|
42
40
|
|
43
41
|
it "splits text with slash into words" do
|
44
42
|
text = "Lorem ipsum dolor sit/amet"
|
45
|
-
array = %w(Lorem ipsum dolor sit amet)
|
46
43
|
|
47
|
-
|
44
|
+
subject.words(text).should eq(@array)
|
48
45
|
end
|
49
46
|
|
50
47
|
it "splits text with round brackets into words" do
|
51
48
|
text = "Lorem ipsum(dolor sit)amet"
|
52
|
-
array = %w(Lorem ipsum dolor sit amet)
|
53
49
|
|
54
|
-
|
50
|
+
subject.words(text).should eq(@array)
|
55
51
|
end
|
56
52
|
|
57
53
|
it "splits text with square brackets into words" do
|
58
54
|
text = "Lorem ipsum dolor[sit]amet"
|
59
|
-
array = %w(Lorem ipsum dolor sit amet)
|
60
55
|
|
61
|
-
|
56
|
+
subject.words(text).should eq(@array)
|
62
57
|
end
|
63
58
|
|
64
59
|
it "splits text with pointy brackets into words" do
|
65
60
|
text = "Lorem<ipsum dolor sit>amet"
|
66
|
-
array = %w(Lorem ipsum dolor sit amet)
|
67
61
|
|
68
|
-
|
62
|
+
subject.words(text).should eq(@array)
|
69
63
|
end
|
70
64
|
|
71
65
|
it "splits text with braces into words" do
|
72
66
|
text = "Lorem ipsum{dolor}sit amet"
|
73
|
-
array = %w(Lorem ipsum dolor sit amet)
|
74
67
|
|
75
|
-
|
68
|
+
subject.words(text).should eq(@array)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "splits text with vertical bar into words" do
|
72
|
+
text = "Lorem ipsum|dolor sit amet"
|
73
|
+
|
74
|
+
subject.words(text).should eq(@array)
|
76
75
|
end
|
76
|
+
|
77
|
+
it "splits text with tilde into words" do
|
78
|
+
text = "Lorem ipsum dolor~sit amet"
|
79
|
+
|
80
|
+
subject.words(text).should eq(@array)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Spanish
|
84
|
+
it "splits text with inverted question and exclamation marks into words" do
|
85
|
+
text = "Lorem¿ipsum?dolor¡sit!amet"
|
86
|
+
|
87
|
+
subject.words(text).should eq(@array)
|
88
|
+
end
|
89
|
+
|
77
90
|
end
|
78
|
-
end
|
91
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lexical_units
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksander Malaszkiewicz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -60,10 +60,12 @@ files:
|
|
60
60
|
- lib/lexical_units.rb
|
61
61
|
- lib/lexical_units/sentences.rb
|
62
62
|
- lib/lexical_units/string.rb
|
63
|
+
- lib/lexical_units/syllables.rb
|
63
64
|
- lib/lexical_units/version.rb
|
64
65
|
- lib/lexical_units/words.rb
|
65
66
|
- spec/lexical_units/sentences_spec.rb
|
66
67
|
- spec/lexical_units/string_spec.rb
|
68
|
+
- spec/lexical_units/syllables_spec.rb
|
67
69
|
- spec/lexical_units/words_spec.rb
|
68
70
|
- spec/spec_helper.rb
|
69
71
|
homepage: ''
|
@@ -93,5 +95,6 @@ summary: Split text into lexical units
|
|
93
95
|
test_files:
|
94
96
|
- spec/lexical_units/sentences_spec.rb
|
95
97
|
- spec/lexical_units/string_spec.rb
|
98
|
+
- spec/lexical_units/syllables_spec.rb
|
96
99
|
- spec/lexical_units/words_spec.rb
|
97
100
|
- spec/spec_helper.rb
|