lexical_units 0.0.8 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/lexical_units/sentences.rb +2 -2
- data/lib/lexical_units/string.rb +4 -0
- data/lib/lexical_units/version.rb +1 -1
- data/lib/lexical_units/words.rb +2 -2
- data/lib/lexical_units/words_and_sentences.rb +17 -0
- data/lib/lexical_units/words_without_digits.rb +2 -2
- data/lib/lexical_units.rb +1 -0
- data/spec/lexical_units/string_spec.rb +11 -0
- data/spec/lexical_units/words_and_sentences_spec.rb +17 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e7452aeb319ad29ed2bd345360a1908293e1bc9
|
4
|
+
data.tar.gz: 1763722a67cca06bac3dcb0aa5021ddbf8b2e5b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 995dce0a37c0bcf20f60f3e9378e7fb3e5577dc4dc29919878f1c81dc01dc080346e683bd73fe900d7e844893210ee9c5fe603ecdd42461c7d580d5f9b4ed4c2
|
7
|
+
data.tar.gz: ca5991b36cc8dd1fffe0dd87ea4f352311f4b62f55542937851e477f935c879aca9e81ddc5ee2aef87e57a5074b2fd0cc22428920c5a80462a3a37b6316d8c62
|
data/CHANGELOG.md
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
module LexicalUnits
|
5
5
|
# Split text into sentences
|
6
6
|
#
|
7
|
-
# self.words(
|
8
|
-
# self.words(
|
7
|
+
# self.words('Lorem, ipsum. Dolor?') #=> ['Lorem, ipsum.', 'Dolor?']
|
8
|
+
# self.words('Lorem! Ipsum dolor?') #=> ['Lorem!', 'Ipsum dolor?']
|
9
9
|
def self.sentences(text)
|
10
10
|
separators = LexicalUnits.sentence_separators
|
11
11
|
regexp = Regexp.new("[^#{separators}]+[#{separators}]{1,3}")
|
data/lib/lexical_units/string.rb
CHANGED
data/lib/lexical_units/words.rb
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
module LexicalUnits
|
5
5
|
# Split text into words
|
6
6
|
#
|
7
|
-
# self.words(
|
8
|
-
# self.words(
|
7
|
+
# self.words('Lorem ipsum dolor sit') #=> ['Lorem','ipsum', 'dolor', 'sit']
|
8
|
+
# self.words('Lorem, ipsum. Dolor?') #=> ['Lorem', 'ipsum', 'Dolor']
|
9
9
|
def self.words(text)
|
10
10
|
regexp = Regexp.new("[#{LexicalUnits.separators}]")
|
11
11
|
text.gsub(regexp, ' ').split(' ')
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Part of split into sentences and words
|
4
|
+
module LexicalUnits
|
5
|
+
# Split text into sentences and each into words
|
6
|
+
#
|
7
|
+
# self.words_and_sentences('Lorem, ipsum. Dolor?') #=>
|
8
|
+
# [
|
9
|
+
# ['Lorem', 'ipsum'],
|
10
|
+
# ['Dolor']
|
11
|
+
# ]
|
12
|
+
def self.words_and_sentences(text)
|
13
|
+
LexicalUnits.sentences(text).map do |sentence|
|
14
|
+
LexicalUnits.words(sentence)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -4,8 +4,8 @@
|
|
4
4
|
module LexicalUnits
|
5
5
|
# Split text into words without digits
|
6
6
|
#
|
7
|
-
# self.words(
|
8
|
-
# self.words(
|
7
|
+
# self.words('Lorem 0 ipsum') #=> ['Lorem', 'ipsum']
|
8
|
+
# self.words('Lorem ipsum 100') #=> ['Lorem', 'ipsum']
|
9
9
|
def self.words_without_digits(text)
|
10
10
|
LexicalUnits.words(text).delete_if { |word| numeric?(word) }
|
11
11
|
end
|
data/lib/lexical_units.rb
CHANGED
@@ -25,6 +25,17 @@ describe LexicalUnits::String do
|
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
|
+
context '#words_and_sentences' do
|
29
|
+
it 'splits String into words and sentences' do
|
30
|
+
array = [%w(Lorem ipsum), %w(Dolor sit), %w(Amet)]
|
31
|
+
string = array.map do |sentence|
|
32
|
+
sentence.join(' ')
|
33
|
+
end.join('. ') + '.'
|
34
|
+
|
35
|
+
string.words_and_sentences.should eq(array)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
28
39
|
context '#words_without_digits' do
|
29
40
|
it 'splits String into words (no ditigs)' do
|
30
41
|
array = %w(Lorem ipsum dolor sit amet)
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe LexicalUnits do
|
5
|
+
context '.words_and_sentences' do
|
6
|
+
it 'splits text into sentences and each into words' do
|
7
|
+
text = 'Lorem ipsum dolor! Sit amet? Consectetur adipiscing elit.'
|
8
|
+
array = [
|
9
|
+
%w(Lorem ipsum dolor),
|
10
|
+
%w(Sit amet),
|
11
|
+
%w(Consectetur adipiscing elit)
|
12
|
+
]
|
13
|
+
|
14
|
+
subject.words_and_sentences(text).should eq(array)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lexical_units
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksander Malaszkiewicz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-11-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -63,10 +63,12 @@ files:
|
|
63
63
|
- lib/lexical_units/syllables.rb
|
64
64
|
- lib/lexical_units/version.rb
|
65
65
|
- lib/lexical_units/words.rb
|
66
|
+
- lib/lexical_units/words_and_sentences.rb
|
66
67
|
- lib/lexical_units/words_without_digits.rb
|
67
68
|
- spec/lexical_units/sentences_spec.rb
|
68
69
|
- spec/lexical_units/string_spec.rb
|
69
70
|
- spec/lexical_units/syllables_spec.rb
|
71
|
+
- spec/lexical_units/words_and_sentences_spec.rb
|
70
72
|
- spec/lexical_units/words_spec.rb
|
71
73
|
- spec/lexical_units/words_without_digits_spec.rb
|
72
74
|
- spec/spec_helper.rb
|
@@ -98,6 +100,7 @@ test_files:
|
|
98
100
|
- spec/lexical_units/sentences_spec.rb
|
99
101
|
- spec/lexical_units/string_spec.rb
|
100
102
|
- spec/lexical_units/syllables_spec.rb
|
103
|
+
- spec/lexical_units/words_and_sentences_spec.rb
|
101
104
|
- spec/lexical_units/words_spec.rb
|
102
105
|
- spec/lexical_units/words_without_digits_spec.rb
|
103
106
|
- spec/spec_helper.rb
|