lexical_units 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +8 -0
- data/lib/lexical_units.rb +2 -0
- data/lib/lexical_units/sentences.rb +19 -0
- data/lib/lexical_units/string.rb +11 -0
- data/lib/lexical_units/version.rb +1 -1
- data/spec/lexical_units/sentences_spec.rb +55 -0
- data/spec/lexical_units/string_spec.rb +25 -0
- metadata +9 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 35f8950c1907e3f92afb2a3dc64ee3506779d87c
|
4
|
+
data.tar.gz: 3b08b4af5ada8ee6abaad85621b7f0cfd5850df7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7d610db7578359ee8724beea9f019b4b62e514ece052fa4d6d5b417af1069c58818387329c18b97145761522fc52ab613f20ea8e5d079b097baadad7102a83fe
|
7
|
+
data.tar.gz: eb0fd0fca6c27dbcb4169a97f4e18969f06f204b613866cee58a0365102603c131643b711bf9c42d86d06d29e50aceed173b0fee785081c073ed0b1db8dabaf9
|
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -20,6 +20,14 @@ Or install it yourself as:
|
|
20
20
|
|
21
21
|
```ruby
|
22
22
|
LexicalUnits::words(text)
|
23
|
+
LexicalUnits::sentences(text)
|
24
|
+
```
|
25
|
+
|
26
|
+
You can include methods into String class:
|
27
|
+
```ruby
|
28
|
+
class String
|
29
|
+
include LexicalUnits::String
|
30
|
+
end
|
23
31
|
```
|
24
32
|
|
25
33
|
## Contributing
|
data/lib/lexical_units.rb
CHANGED
@@ -0,0 +1,19 @@
|
|
1
|
+
module LexicalUnits
|
2
|
+
# Split text into sentences
|
3
|
+
#
|
4
|
+
# self.words("Lorem, ipsum. Dolor?") #=> ["Lorem, ipsum.", "Dolor?"]
|
5
|
+
# self.words("Lorem! Ipsum dolor?") #=> ["Lorem!", "Ipsum dolor?"]
|
6
|
+
def self.sentences(text)
|
7
|
+
separators = LexicalUnits::sentence_separators
|
8
|
+
regexp = Regexp.new("[^#{separators}]+[#{separators}]{1,3}")
|
9
|
+
text.scan(regexp).map(&:strip)
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
def self.sentence_separators
|
14
|
+
[
|
15
|
+
'\.', '\?', '\!',
|
16
|
+
'‽'
|
17
|
+
].join
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
#encoding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe LexicalUnits do
|
5
|
+
context ".sentences" do
|
6
|
+
let(:klass) { LexicalUnits }
|
7
|
+
|
8
|
+
it "splits text into sentences" do
|
9
|
+
text = %q{Lorem ipsum dolor sit amet, consectetur adipiscing elit.
|
10
|
+
Fusce ut lacinia lorem. Nullam a sem quam. Duis faucibus tortor in.}
|
11
|
+
array = [
|
12
|
+
"Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
|
13
|
+
"Fusce ut lacinia lorem.",
|
14
|
+
"Nullam a sem quam.",
|
15
|
+
"Duis faucibus tortor in."
|
16
|
+
]
|
17
|
+
|
18
|
+
klass::sentences(text).should eq(array)
|
19
|
+
end
|
20
|
+
|
21
|
+
it "splits text with question mark and exclamation mark into sentences" do
|
22
|
+
text = "Lorem ipsum dolor! Sit amet? Consectetur adipiscing elit."
|
23
|
+
array = [
|
24
|
+
"Lorem ipsum dolor!",
|
25
|
+
"Sit amet?",
|
26
|
+
"Consectetur adipiscing elit."
|
27
|
+
]
|
28
|
+
|
29
|
+
klass::sentences(text).should eq(array)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "splits text with ellipsis into sentences" do
|
33
|
+
text = "Lorem ipsum dolor, sit amet... Consectetur adipiscing elit."
|
34
|
+
array = [
|
35
|
+
"Lorem ipsum dolor, sit amet...",
|
36
|
+
"Consectetur adipiscing elit."
|
37
|
+
]
|
38
|
+
|
39
|
+
klass::sentences(text).should eq(array)
|
40
|
+
end
|
41
|
+
|
42
|
+
it "splits text with interrobangs into sentences" do
|
43
|
+
text = "Say what‽ She's pregnant‽ Who is the father‽‽‽ Really?"
|
44
|
+
array = [
|
45
|
+
"Say what‽",
|
46
|
+
"She's pregnant‽",
|
47
|
+
"Who is the father‽‽‽",
|
48
|
+
"Really?"
|
49
|
+
]
|
50
|
+
|
51
|
+
klass::sentences(text).should eq(array)
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe LexicalUnits::String do
|
4
|
+
class String
|
5
|
+
include LexicalUnits::String
|
6
|
+
end
|
7
|
+
|
8
|
+
context "#words" do
|
9
|
+
it "splits String into words" do
|
10
|
+
array = %w(Lorem ipsum dolor sit amet)
|
11
|
+
string = array.join(' ')
|
12
|
+
|
13
|
+
string.words.should eq(array)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
context "#sentences" do
|
18
|
+
it "splits String into sentences" do
|
19
|
+
array = ["Lorem ipsum!", "Dolor sit?", "Amet."]
|
20
|
+
string = array.join
|
21
|
+
|
22
|
+
string.sentences.should eq(array)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lexical_units
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksander Malaszkiewicz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -50,6 +50,7 @@ files:
|
|
50
50
|
- .ruby-gemset
|
51
51
|
- .ruby-version
|
52
52
|
- .travis.yml
|
53
|
+
- CHANGELOG.md
|
53
54
|
- Gemfile
|
54
55
|
- Guardfile
|
55
56
|
- LICENSE.txt
|
@@ -57,8 +58,12 @@ files:
|
|
57
58
|
- Rakefile
|
58
59
|
- lexical_units.gemspec
|
59
60
|
- lib/lexical_units.rb
|
61
|
+
- lib/lexical_units/sentences.rb
|
62
|
+
- lib/lexical_units/string.rb
|
60
63
|
- lib/lexical_units/version.rb
|
61
64
|
- lib/lexical_units/words.rb
|
65
|
+
- spec/lexical_units/sentences_spec.rb
|
66
|
+
- spec/lexical_units/string_spec.rb
|
62
67
|
- spec/lexical_units/words_spec.rb
|
63
68
|
- spec/spec_helper.rb
|
64
69
|
homepage: ''
|
@@ -86,5 +91,7 @@ signing_key:
|
|
86
91
|
specification_version: 4
|
87
92
|
summary: Split text into lexical units
|
88
93
|
test_files:
|
94
|
+
- spec/lexical_units/sentences_spec.rb
|
95
|
+
- spec/lexical_units/string_spec.rb
|
89
96
|
- spec/lexical_units/words_spec.rb
|
90
97
|
- spec/spec_helper.rb
|