lexical_units 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/lexical_units/sentences.rb +2 -2
- data/lib/lexical_units/string.rb +4 -0
- data/lib/lexical_units/version.rb +1 -1
- data/lib/lexical_units/words.rb +2 -2
- data/lib/lexical_units/words_and_sentences.rb +17 -0
- data/lib/lexical_units/words_without_digits.rb +2 -2
- data/lib/lexical_units.rb +1 -0
- data/spec/lexical_units/string_spec.rb +11 -0
- data/spec/lexical_units/words_and_sentences_spec.rb +17 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9e7452aeb319ad29ed2bd345360a1908293e1bc9
|
4
|
+
data.tar.gz: 1763722a67cca06bac3dcb0aa5021ddbf8b2e5b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 995dce0a37c0bcf20f60f3e9378e7fb3e5577dc4dc29919878f1c81dc01dc080346e683bd73fe900d7e844893210ee9c5fe603ecdd42461c7d580d5f9b4ed4c2
|
7
|
+
data.tar.gz: ca5991b36cc8dd1fffe0dd87ea4f352311f4b62f55542937851e477f935c879aca9e81ddc5ee2aef87e57a5074b2fd0cc22428920c5a80462a3a37b6316d8c62
|
data/CHANGELOG.md
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
module LexicalUnits
|
5
5
|
# Split text into sentences
|
6
6
|
#
|
7
|
-
# self.words(
|
8
|
-
# self.words(
|
7
|
+
# self.words('Lorem, ipsum. Dolor?') #=> ['Lorem, ipsum.', 'Dolor?']
|
8
|
+
# self.words('Lorem! Ipsum dolor?') #=> ['Lorem!', 'Ipsum dolor?']
|
9
9
|
def self.sentences(text)
|
10
10
|
separators = LexicalUnits.sentence_separators
|
11
11
|
regexp = Regexp.new("[^#{separators}]+[#{separators}]{1,3}")
|
data/lib/lexical_units/string.rb
CHANGED
data/lib/lexical_units/words.rb
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
module LexicalUnits
|
5
5
|
# Split text into words
|
6
6
|
#
|
7
|
-
# self.words(
|
8
|
-
# self.words(
|
7
|
+
# self.words('Lorem ipsum dolor sit') #=> ['Lorem','ipsum', 'dolor', 'sit']
|
8
|
+
# self.words('Lorem, ipsum. Dolor?') #=> ['Lorem', 'ipsum', 'Dolor']
|
9
9
|
def self.words(text)
|
10
10
|
regexp = Regexp.new("[#{LexicalUnits.separators}]")
|
11
11
|
text.gsub(regexp, ' ').split(' ')
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
# Part of split into sentences and words
|
4
|
+
module LexicalUnits
|
5
|
+
# Split text into sentences and each into words
|
6
|
+
#
|
7
|
+
# self.words_and_sentences('Lorem, ipsum. Dolor?') #=>
|
8
|
+
# [
|
9
|
+
# ['Lorem', 'ipsum'],
|
10
|
+
# ['Dolor']
|
11
|
+
# ]
|
12
|
+
def self.words_and_sentences(text)
|
13
|
+
LexicalUnits.sentences(text).map do |sentence|
|
14
|
+
LexicalUnits.words(sentence)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -4,8 +4,8 @@
|
|
4
4
|
module LexicalUnits
|
5
5
|
# Split text into words without digits
|
6
6
|
#
|
7
|
-
# self.words(
|
8
|
-
# self.words(
|
7
|
+
# self.words('Lorem 0 ipsum') #=> ['Lorem', 'ipsum']
|
8
|
+
# self.words('Lorem ipsum 100') #=> ['Lorem', 'ipsum']
|
9
9
|
def self.words_without_digits(text)
|
10
10
|
LexicalUnits.words(text).delete_if { |word| numeric?(word) }
|
11
11
|
end
|
data/lib/lexical_units.rb
CHANGED
@@ -25,6 +25,17 @@ describe LexicalUnits::String do
|
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
|
+
context '#words_and_sentences' do
|
29
|
+
it 'splits String into words and sentences' do
|
30
|
+
array = [%w(Lorem ipsum), %w(Dolor sit), %w(Amet)]
|
31
|
+
string = array.map do |sentence|
|
32
|
+
sentence.join(' ')
|
33
|
+
end.join('. ') + '.'
|
34
|
+
|
35
|
+
string.words_and_sentences.should eq(array)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
28
39
|
context '#words_without_digits' do
|
29
40
|
it 'splits String into words (no ditigs)' do
|
30
41
|
array = %w(Lorem ipsum dolor sit amet)
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'spec_helper'
|
3
|
+
|
4
|
+
describe LexicalUnits do
|
5
|
+
context '.words_and_sentences' do
|
6
|
+
it 'splits text into sentences and each into words' do
|
7
|
+
text = 'Lorem ipsum dolor! Sit amet? Consectetur adipiscing elit.'
|
8
|
+
array = [
|
9
|
+
%w(Lorem ipsum dolor),
|
10
|
+
%w(Sit amet),
|
11
|
+
%w(Consectetur adipiscing elit)
|
12
|
+
]
|
13
|
+
|
14
|
+
subject.words_and_sentences(text).should eq(array)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lexical_units
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aleksander Malaszkiewicz
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-11-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -63,10 +63,12 @@ files:
|
|
63
63
|
- lib/lexical_units/syllables.rb
|
64
64
|
- lib/lexical_units/version.rb
|
65
65
|
- lib/lexical_units/words.rb
|
66
|
+
- lib/lexical_units/words_and_sentences.rb
|
66
67
|
- lib/lexical_units/words_without_digits.rb
|
67
68
|
- spec/lexical_units/sentences_spec.rb
|
68
69
|
- spec/lexical_units/string_spec.rb
|
69
70
|
- spec/lexical_units/syllables_spec.rb
|
71
|
+
- spec/lexical_units/words_and_sentences_spec.rb
|
70
72
|
- spec/lexical_units/words_spec.rb
|
71
73
|
- spec/lexical_units/words_without_digits_spec.rb
|
72
74
|
- spec/spec_helper.rb
|
@@ -98,6 +100,7 @@ test_files:
|
|
98
100
|
- spec/lexical_units/sentences_spec.rb
|
99
101
|
- spec/lexical_units/string_spec.rb
|
100
102
|
- spec/lexical_units/syllables_spec.rb
|
103
|
+
- spec/lexical_units/words_and_sentences_spec.rb
|
101
104
|
- spec/lexical_units/words_spec.rb
|
102
105
|
- spec/lexical_units/words_without_digits_spec.rb
|
103
106
|
- spec/spec_helper.rb
|