lexical_units 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f525e4f5314f36a83f70fe377e9fab926bc837ff
4
- data.tar.gz: 166743d0dd6094a56146f2c0de941e573c151505
3
+ metadata.gz: 35f8950c1907e3f92afb2a3dc64ee3506779d87c
4
+ data.tar.gz: 3b08b4af5ada8ee6abaad85621b7f0cfd5850df7
5
5
  SHA512:
6
- metadata.gz: 2005b725f605df1857f138d24d7febb224c887dfb5ba00626a735eba275a4073a68fb823ba12023699aad1ebbb8ce8452838d6bcb37521c03c277dd0eebb0f61
7
- data.tar.gz: 9bffe65d097bd7580044f8f6cd8469663b29fb18f08b8a6da19f70afc7173afa61ba79aa5c6524bcf2d263df6b9d44b59acc1d050c7efe29fc9965d3aba74f06
6
+ metadata.gz: 7d610db7578359ee8724beea9f019b4b62e514ece052fa4d6d5b417af1069c58818387329c18b97145761522fc52ab613f20ea8e5d079b097baadad7102a83fe
7
+ data.tar.gz: eb0fd0fca6c27dbcb4169a97f4e18969f06f204b613866cee58a0365102603c131643b711bf9c42d86d06d29e50aceed173b0fee785081c073ed0b1db8dabaf9
data/CHANGELOG.md ADDED
@@ -0,0 +1,7 @@
1
+ ## v0.0.1
2
+
3
+ * initial release
4
+
5
+ ## v0.0.2
6
+
7
+ * added split into sentences
data/README.md CHANGED
@@ -20,6 +20,14 @@ Or install it yourself as:
20
20
 
21
21
  ```ruby
22
22
  LexicalUnits::words(text)
23
+ LexicalUnits::sentences(text)
24
+ ```
25
+
26
+ You can include methods into String class:
27
+ ```ruby
28
+ class String
29
+ include LexicalUnits::String
30
+ end
23
31
  ```
24
32
 
25
33
  ## Contributing
data/lib/lexical_units.rb CHANGED
@@ -1,4 +1,6 @@
1
1
  require "lexical_units/words"
2
+ require "lexical_units/sentences"
3
+ require "lexical_units/string"
2
4
  require "lexical_units/version"
3
5
 
4
6
  module LexicalUnits
@@ -0,0 +1,19 @@
1
+ module LexicalUnits
2
+ # Split text into sentences
3
+ #
4
+ # self.words("Lorem, ipsum. Dolor?") #=> ["Lorem, ipsum.", "Dolor?"]
5
+ # self.words("Lorem! Ipsum dolor?") #=> ["Lorem!", "Ipsum dolor?"]
6
+ def self.sentences(text)
7
+ separators = LexicalUnits::sentence_separators
8
+ regexp = Regexp.new("[^#{separators}]+[#{separators}]{1,3}")
9
+ text.scan(regexp).map(&:strip)
10
+ end
11
+
12
+ private
13
+ def self.sentence_separators
14
+ [
15
+ '\.', '\?', '\!',
16
+ '‽'
17
+ ].join
18
+ end
19
+ end
@@ -0,0 +1,11 @@
1
+ module LexicalUnits
2
+ module String
3
+ def words
4
+ LexicalUnits::words(self)
5
+ end
6
+
7
+ def sentences
8
+ LexicalUnits::sentences(self)
9
+ end
10
+ end
11
+ end
@@ -1,3 +1,3 @@
1
1
  module LexicalUnits
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -0,0 +1,55 @@
1
+ #encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe LexicalUnits do
5
+ context ".sentences" do
6
+ let(:klass) { LexicalUnits }
7
+
8
+ it "splits text into sentences" do
9
+ text = %q{Lorem ipsum dolor sit amet, consectetur adipiscing elit.
10
+ Fusce ut lacinia lorem. Nullam a sem quam. Duis faucibus tortor in.}
11
+ array = [
12
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
13
+ "Fusce ut lacinia lorem.",
14
+ "Nullam a sem quam.",
15
+ "Duis faucibus tortor in."
16
+ ]
17
+
18
+ klass::sentences(text).should eq(array)
19
+ end
20
+
21
+ it "splits text with question mark and exclamation mark into sentences" do
22
+ text = "Lorem ipsum dolor! Sit amet? Consectetur adipiscing elit."
23
+ array = [
24
+ "Lorem ipsum dolor!",
25
+ "Sit amet?",
26
+ "Consectetur adipiscing elit."
27
+ ]
28
+
29
+ klass::sentences(text).should eq(array)
30
+ end
31
+
32
+ it "splits text with ellipsis into sentences" do
33
+ text = "Lorem ipsum dolor, sit amet... Consectetur adipiscing elit."
34
+ array = [
35
+ "Lorem ipsum dolor, sit amet...",
36
+ "Consectetur adipiscing elit."
37
+ ]
38
+
39
+ klass::sentences(text).should eq(array)
40
+ end
41
+
42
+ it "splits text with interrobangs into sentences" do
43
+ text = "Say what‽ She's pregnant‽ Who is the father‽‽‽ Really?"
44
+ array = [
45
+ "Say what‽",
46
+ "She's pregnant‽",
47
+ "Who is the father‽‽‽",
48
+ "Really?"
49
+ ]
50
+
51
+ klass::sentences(text).should eq(array)
52
+ end
53
+
54
+ end
55
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ describe LexicalUnits::String do
4
+ class String
5
+ include LexicalUnits::String
6
+ end
7
+
8
+ context "#words" do
9
+ it "splits String into words" do
10
+ array = %w(Lorem ipsum dolor sit amet)
11
+ string = array.join(' ')
12
+
13
+ string.words.should eq(array)
14
+ end
15
+ end
16
+
17
+ context "#sentences" do
18
+ it "splits String into sentences" do
19
+ array = ["Lorem ipsum!", "Dolor sit?", "Amet."]
20
+ string = array.join
21
+
22
+ string.sentences.should eq(array)
23
+ end
24
+ end
25
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lexical_units
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksander Malaszkiewicz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-06 00:00:00.000000000 Z
11
+ date: 2013-07-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -50,6 +50,7 @@ files:
50
50
  - .ruby-gemset
51
51
  - .ruby-version
52
52
  - .travis.yml
53
+ - CHANGELOG.md
53
54
  - Gemfile
54
55
  - Guardfile
55
56
  - LICENSE.txt
@@ -57,8 +58,12 @@ files:
57
58
  - Rakefile
58
59
  - lexical_units.gemspec
59
60
  - lib/lexical_units.rb
61
+ - lib/lexical_units/sentences.rb
62
+ - lib/lexical_units/string.rb
60
63
  - lib/lexical_units/version.rb
61
64
  - lib/lexical_units/words.rb
65
+ - spec/lexical_units/sentences_spec.rb
66
+ - spec/lexical_units/string_spec.rb
62
67
  - spec/lexical_units/words_spec.rb
63
68
  - spec/spec_helper.rb
64
69
  homepage: ''
@@ -86,5 +91,7 @@ signing_key:
86
91
  specification_version: 4
87
92
  summary: Split text into lexical units
88
93
  test_files:
94
+ - spec/lexical_units/sentences_spec.rb
95
+ - spec/lexical_units/string_spec.rb
89
96
  - spec/lexical_units/words_spec.rb
90
97
  - spec/spec_helper.rb