lexical_units 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fedd5fd50283e6f7f2fb2881a8c46326cdeffb7b
4
- data.tar.gz: ec83c68109e1568fdd8b7b1b2e4628ea28038b2c
3
+ metadata.gz: 9e7452aeb319ad29ed2bd345360a1908293e1bc9
4
+ data.tar.gz: 1763722a67cca06bac3dcb0aa5021ddbf8b2e5b3
5
5
  SHA512:
6
- metadata.gz: b92570fd96372f27851e2dd3c9d771d534ae99f5a49847ba9289dc61f6c716a446ff3148d33bb409c0a38a6953856ee9f71683648d12c4d1da56fd5960de44a1
7
- data.tar.gz: f749de922a2cd228733abc2de4690ffd9853e49bbba5533fe46625f41230147e0c78d5db11155b2b40be33287d4a854d597bc34e10747c5105554d337271cacc
6
+ metadata.gz: 995dce0a37c0bcf20f60f3e9378e7fb3e5577dc4dc29919878f1c81dc01dc080346e683bd73fe900d7e844893210ee9c5fe603ecdd42461c7d580d5f9b4ed4c2
7
+ data.tar.gz: ca5991b36cc8dd1fffe0dd87ea4f352311f4b62f55542937851e477f935c879aca9e81ddc5ee2aef87e57a5074b2fd0cc22428920c5a80462a3a37b6316d8c62
data/CHANGELOG.md CHANGED
@@ -29,3 +29,7 @@
29
29
  ## v0.0.8
30
30
 
31
31
  * code clean up
32
+
33
+ ## v0.0.9
34
+
35
+ * added split into sentences and each into words
@@ -4,8 +4,8 @@
4
4
  module LexicalUnits
5
5
  # Split text into sentences
6
6
  #
7
- # self.words("Lorem, ipsum. Dolor?") #=> ["Lorem, ipsum.", "Dolor?"]
8
- # self.words("Lorem! Ipsum dolor?") #=> ["Lorem!", "Ipsum dolor?"]
7
+ # self.words('Lorem, ipsum. Dolor?') #=> ['Lorem, ipsum.', 'Dolor?']
8
+ # self.words('Lorem! Ipsum dolor?') #=> ['Lorem!', 'Ipsum dolor?']
9
9
  def self.sentences(text)
10
10
  separators = LexicalUnits.sentence_separators
11
11
  regexp = Regexp.new("[^#{separators}]+[#{separators}]{1,3}")
@@ -10,6 +10,10 @@ module LexicalUnits
10
10
  LexicalUnits.sentences(self)
11
11
  end
12
12
 
13
+ def words_and_sentences
14
+ LexicalUnits.words_and_sentences(self)
15
+ end
16
+
13
17
  def words_without_digits
14
18
  LexicalUnits.words_without_digits(self)
15
19
  end
@@ -1,4 +1,4 @@
1
1
  # Gem version
2
2
  module LexicalUnits
3
- VERSION = '0.0.8'
3
+ VERSION = '0.0.9'
4
4
  end
@@ -4,8 +4,8 @@
4
4
  module LexicalUnits
5
5
  # Split text into words
6
6
  #
7
- # self.words("Lorem ipsum dolor sit") #=> ["Lorem","ipsum", "dolor", "sit"]
8
- # self.words("Lorem, ipsum. Dolor?") #=> ["Lorem", "ipsum", "Dolor"]
7
+ # self.words('Lorem ipsum dolor sit') #=> ['Lorem','ipsum', 'dolor', 'sit']
8
+ # self.words('Lorem, ipsum. Dolor?') #=> ['Lorem', 'ipsum', 'Dolor']
9
9
  def self.words(text)
10
10
  regexp = Regexp.new("[#{LexicalUnits.separators}]")
11
11
  text.gsub(regexp, ' ').split(' ')
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+
3
+ # Part of split into sentences and words
4
+ module LexicalUnits
5
+ # Split text into sentences and each into words
6
+ #
7
+ # self.words_and_sentences('Lorem, ipsum. Dolor?') #=>
8
+ # [
9
+ # ['Lorem', 'ipsum'],
10
+ # ['Dolor']
11
+ # ]
12
+ def self.words_and_sentences(text)
13
+ LexicalUnits.sentences(text).map do |sentence|
14
+ LexicalUnits.words(sentence)
15
+ end
16
+ end
17
+ end
@@ -4,8 +4,8 @@
4
4
  module LexicalUnits
5
5
  # Split text into words without digits
6
6
  #
7
- # self.words("Lorem 0 ipsum") #=> ["Lorem", "ipsum"]
8
- # self.words("Lorem ipsum 100") #=> ["Lorem", "ipsum"]
7
+ # self.words('Lorem 0 ipsum') #=> ['Lorem', 'ipsum']
8
+ # self.words('Lorem ipsum 100') #=> ['Lorem', 'ipsum']
9
9
  def self.words_without_digits(text)
10
10
  LexicalUnits.words(text).delete_if { |word| numeric?(word) }
11
11
  end
data/lib/lexical_units.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'lexical_units/words'
2
2
  require 'lexical_units/sentences'
3
3
  require 'lexical_units/syllables'
4
+ require 'lexical_units/words_and_sentences'
4
5
  require 'lexical_units/words_without_digits'
5
6
  require 'lexical_units/string'
6
7
  require 'lexical_units/version'
@@ -25,6 +25,17 @@ describe LexicalUnits::String do
25
25
  end
26
26
  end
27
27
 
28
+ context '#words_and_sentences' do
29
+ it 'splits String into words and sentences' do
30
+ array = [%w(Lorem ipsum), %w(Dolor sit), %w(Amet)]
31
+ string = array.map do |sentence|
32
+ sentence.join(' ')
33
+ end.join('. ') + '.'
34
+
35
+ string.words_and_sentences.should eq(array)
36
+ end
37
+ end
38
+
28
39
  context '#words_without_digits' do
29
40
  it 'splits String into words (no ditigs)' do
30
41
  array = %w(Lorem ipsum dolor sit amet)
@@ -0,0 +1,17 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe LexicalUnits do
5
+ context '.words_and_sentences' do
6
+ it 'splits text into sentences and each into words' do
7
+ text = 'Lorem ipsum dolor! Sit amet? Consectetur adipiscing elit.'
8
+ array = [
9
+ %w(Lorem ipsum dolor),
10
+ %w(Sit amet),
11
+ %w(Consectetur adipiscing elit)
12
+ ]
13
+
14
+ subject.words_and_sentences(text).should eq(array)
15
+ end
16
+ end
17
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lexical_units
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksander Malaszkiewicz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-08-15 00:00:00.000000000 Z
11
+ date: 2013-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -63,10 +63,12 @@ files:
63
63
  - lib/lexical_units/syllables.rb
64
64
  - lib/lexical_units/version.rb
65
65
  - lib/lexical_units/words.rb
66
+ - lib/lexical_units/words_and_sentences.rb
66
67
  - lib/lexical_units/words_without_digits.rb
67
68
  - spec/lexical_units/sentences_spec.rb
68
69
  - spec/lexical_units/string_spec.rb
69
70
  - spec/lexical_units/syllables_spec.rb
71
+ - spec/lexical_units/words_and_sentences_spec.rb
70
72
  - spec/lexical_units/words_spec.rb
71
73
  - spec/lexical_units/words_without_digits_spec.rb
72
74
  - spec/spec_helper.rb
@@ -98,6 +100,7 @@ test_files:
98
100
  - spec/lexical_units/sentences_spec.rb
99
101
  - spec/lexical_units/string_spec.rb
100
102
  - spec/lexical_units/syllables_spec.rb
103
+ - spec/lexical_units/words_and_sentences_spec.rb
101
104
  - spec/lexical_units/words_spec.rb
102
105
  - spec/lexical_units/words_without_digits_spec.rb
103
106
  - spec/spec_helper.rb