lexical_units 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fedd5fd50283e6f7f2fb2881a8c46326cdeffb7b
4
- data.tar.gz: ec83c68109e1568fdd8b7b1b2e4628ea28038b2c
3
+ metadata.gz: 9e7452aeb319ad29ed2bd345360a1908293e1bc9
4
+ data.tar.gz: 1763722a67cca06bac3dcb0aa5021ddbf8b2e5b3
5
5
  SHA512:
6
- metadata.gz: b92570fd96372f27851e2dd3c9d771d534ae99f5a49847ba9289dc61f6c716a446ff3148d33bb409c0a38a6953856ee9f71683648d12c4d1da56fd5960de44a1
7
- data.tar.gz: f749de922a2cd228733abc2de4690ffd9853e49bbba5533fe46625f41230147e0c78d5db11155b2b40be33287d4a854d597bc34e10747c5105554d337271cacc
6
+ metadata.gz: 995dce0a37c0bcf20f60f3e9378e7fb3e5577dc4dc29919878f1c81dc01dc080346e683bd73fe900d7e844893210ee9c5fe603ecdd42461c7d580d5f9b4ed4c2
7
+ data.tar.gz: ca5991b36cc8dd1fffe0dd87ea4f352311f4b62f55542937851e477f935c879aca9e81ddc5ee2aef87e57a5074b2fd0cc22428920c5a80462a3a37b6316d8c62
data/CHANGELOG.md CHANGED
@@ -29,3 +29,7 @@
29
29
  ## v0.0.8
30
30
 
31
31
  * code clean up
32
+
33
+ ## v0.0.9
34
+
35
+ * added split into sentences and each into words
@@ -4,8 +4,8 @@
4
4
  module LexicalUnits
5
5
  # Split text into sentences
6
6
  #
7
- # self.words("Lorem, ipsum. Dolor?") #=> ["Lorem, ipsum.", "Dolor?"]
8
- # self.words("Lorem! Ipsum dolor?") #=> ["Lorem!", "Ipsum dolor?"]
7
+ # self.words('Lorem, ipsum. Dolor?') #=> ['Lorem, ipsum.', 'Dolor?']
8
+ # self.words('Lorem! Ipsum dolor?') #=> ['Lorem!', 'Ipsum dolor?']
9
9
  def self.sentences(text)
10
10
  separators = LexicalUnits.sentence_separators
11
11
  regexp = Regexp.new("[^#{separators}]+[#{separators}]{1,3}")
@@ -10,6 +10,10 @@ module LexicalUnits
10
10
  LexicalUnits.sentences(self)
11
11
  end
12
12
 
13
+ def words_and_sentences
14
+ LexicalUnits.words_and_sentences(self)
15
+ end
16
+
13
17
  def words_without_digits
14
18
  LexicalUnits.words_without_digits(self)
15
19
  end
@@ -1,4 +1,4 @@
1
1
  # Gem version
2
2
  module LexicalUnits
3
- VERSION = '0.0.8'
3
+ VERSION = '0.0.9'
4
4
  end
@@ -4,8 +4,8 @@
4
4
  module LexicalUnits
5
5
  # Split text into words
6
6
  #
7
- # self.words("Lorem ipsum dolor sit") #=> ["Lorem","ipsum", "dolor", "sit"]
8
- # self.words("Lorem, ipsum. Dolor?") #=> ["Lorem", "ipsum", "Dolor"]
7
+ # self.words('Lorem ipsum dolor sit') #=> ['Lorem','ipsum', 'dolor', 'sit']
8
+ # self.words('Lorem, ipsum. Dolor?') #=> ['Lorem', 'ipsum', 'Dolor']
9
9
  def self.words(text)
10
10
  regexp = Regexp.new("[#{LexicalUnits.separators}]")
11
11
  text.gsub(regexp, ' ').split(' ')
@@ -0,0 +1,17 @@
1
+ # encoding: utf-8
2
+
3
+ # Part of split into sentences and words
4
+ module LexicalUnits
5
+ # Split text into sentences and each into words
6
+ #
7
+ # self.words_and_sentences('Lorem, ipsum. Dolor?') #=>
8
+ # [
9
+ # ['Lorem', 'ipsum'],
10
+ # ['Dolor']
11
+ # ]
12
+ def self.words_and_sentences(text)
13
+ LexicalUnits.sentences(text).map do |sentence|
14
+ LexicalUnits.words(sentence)
15
+ end
16
+ end
17
+ end
@@ -4,8 +4,8 @@
4
4
  module LexicalUnits
5
5
  # Split text into words without digits
6
6
  #
7
- # self.words("Lorem 0 ipsum") #=> ["Lorem", "ipsum"]
8
- # self.words("Lorem ipsum 100") #=> ["Lorem", "ipsum"]
7
+ # self.words('Lorem 0 ipsum') #=> ['Lorem', 'ipsum']
8
+ # self.words('Lorem ipsum 100') #=> ['Lorem', 'ipsum']
9
9
  def self.words_without_digits(text)
10
10
  LexicalUnits.words(text).delete_if { |word| numeric?(word) }
11
11
  end
data/lib/lexical_units.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'lexical_units/words'
2
2
  require 'lexical_units/sentences'
3
3
  require 'lexical_units/syllables'
4
+ require 'lexical_units/words_and_sentences'
4
5
  require 'lexical_units/words_without_digits'
5
6
  require 'lexical_units/string'
6
7
  require 'lexical_units/version'
@@ -25,6 +25,17 @@ describe LexicalUnits::String do
25
25
  end
26
26
  end
27
27
 
28
+ context '#words_and_sentences' do
29
+ it 'splits String into words and sentences' do
30
+ array = [%w(Lorem ipsum), %w(Dolor sit), %w(Amet)]
31
+ string = array.map do |sentence|
32
+ sentence.join(' ')
33
+ end.join('. ') + '.'
34
+
35
+ string.words_and_sentences.should eq(array)
36
+ end
37
+ end
38
+
28
39
  context '#words_without_digits' do
29
40
  it 'splits String into words (no ditigs)' do
30
41
  array = %w(Lorem ipsum dolor sit amet)
@@ -0,0 +1,17 @@
1
+ # coding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe LexicalUnits do
5
+ context '.words_and_sentences' do
6
+ it 'splits text into sentences and each into words' do
7
+ text = 'Lorem ipsum dolor! Sit amet? Consectetur adipiscing elit.'
8
+ array = [
9
+ %w(Lorem ipsum dolor),
10
+ %w(Sit amet),
11
+ %w(Consectetur adipiscing elit)
12
+ ]
13
+
14
+ subject.words_and_sentences(text).should eq(array)
15
+ end
16
+ end
17
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lexical_units
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksander Malaszkiewicz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-08-15 00:00:00.000000000 Z
11
+ date: 2013-11-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -63,10 +63,12 @@ files:
63
63
  - lib/lexical_units/syllables.rb
64
64
  - lib/lexical_units/version.rb
65
65
  - lib/lexical_units/words.rb
66
+ - lib/lexical_units/words_and_sentences.rb
66
67
  - lib/lexical_units/words_without_digits.rb
67
68
  - spec/lexical_units/sentences_spec.rb
68
69
  - spec/lexical_units/string_spec.rb
69
70
  - spec/lexical_units/syllables_spec.rb
71
+ - spec/lexical_units/words_and_sentences_spec.rb
70
72
  - spec/lexical_units/words_spec.rb
71
73
  - spec/lexical_units/words_without_digits_spec.rb
72
74
  - spec/spec_helper.rb
@@ -98,6 +100,7 @@ test_files:
98
100
  - spec/lexical_units/sentences_spec.rb
99
101
  - spec/lexical_units/string_spec.rb
100
102
  - spec/lexical_units/syllables_spec.rb
103
+ - spec/lexical_units/words_and_sentences_spec.rb
101
104
  - spec/lexical_units/words_spec.rb
102
105
  - spec/lexical_units/words_without_digits_spec.rb
103
106
  - spec/spec_helper.rb