lexical_units 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f525e4f5314f36a83f70fe377e9fab926bc837ff
4
- data.tar.gz: 166743d0dd6094a56146f2c0de941e573c151505
3
+ metadata.gz: 35f8950c1907e3f92afb2a3dc64ee3506779d87c
4
+ data.tar.gz: 3b08b4af5ada8ee6abaad85621b7f0cfd5850df7
5
5
  SHA512:
6
- metadata.gz: 2005b725f605df1857f138d24d7febb224c887dfb5ba00626a735eba275a4073a68fb823ba12023699aad1ebbb8ce8452838d6bcb37521c03c277dd0eebb0f61
7
- data.tar.gz: 9bffe65d097bd7580044f8f6cd8469663b29fb18f08b8a6da19f70afc7173afa61ba79aa5c6524bcf2d263df6b9d44b59acc1d050c7efe29fc9965d3aba74f06
6
+ metadata.gz: 7d610db7578359ee8724beea9f019b4b62e514ece052fa4d6d5b417af1069c58818387329c18b97145761522fc52ab613f20ea8e5d079b097baadad7102a83fe
7
+ data.tar.gz: eb0fd0fca6c27dbcb4169a97f4e18969f06f204b613866cee58a0365102603c131643b711bf9c42d86d06d29e50aceed173b0fee785081c073ed0b1db8dabaf9
data/CHANGELOG.md ADDED
@@ -0,0 +1,7 @@
1
+ ## v0.0.1
2
+
3
+ * initial release
4
+
5
+ ## v0.0.2
6
+
7
+ * added split into sentences
data/README.md CHANGED
@@ -20,6 +20,14 @@ Or install it yourself as:
20
20
 
21
21
  ```ruby
22
22
  LexicalUnits::words(text)
23
+ LexicalUnits::sentences(text)
24
+ ```
25
+
26
+ You can include methods into String class:
27
+ ```ruby
28
+ class String
29
+ include LexicalUnits::String
30
+ end
23
31
  ```
24
32
 
25
33
  ## Contributing
data/lib/lexical_units.rb CHANGED
@@ -1,4 +1,6 @@
1
1
  require "lexical_units/words"
2
+ require "lexical_units/sentences"
3
+ require "lexical_units/string"
2
4
  require "lexical_units/version"
3
5
 
4
6
  module LexicalUnits
@@ -0,0 +1,19 @@
1
+ module LexicalUnits
2
+ # Split text into sentences
3
+ #
4
+ # self.words("Lorem, ipsum. Dolor?") #=> ["Lorem, ipsum.", "Dolor?"]
5
+ # self.words("Lorem! Ipsum dolor?") #=> ["Lorem!", "Ipsum dolor?"]
6
+ def self.sentences(text)
7
+ separators = LexicalUnits::sentence_separators
8
+ regexp = Regexp.new("[^#{separators}]+[#{separators}]{1,3}")
9
+ text.scan(regexp).map(&:strip)
10
+ end
11
+
12
+ private
13
+ def self.sentence_separators
14
+ [
15
+ '\.', '\?', '\!',
16
+ '‽'
17
+ ].join
18
+ end
19
+ end
@@ -0,0 +1,11 @@
1
+ module LexicalUnits
2
+ module String
3
+ def words
4
+ LexicalUnits::words(self)
5
+ end
6
+
7
+ def sentences
8
+ LexicalUnits::sentences(self)
9
+ end
10
+ end
11
+ end
@@ -1,3 +1,3 @@
1
1
  module LexicalUnits
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -0,0 +1,55 @@
1
+ #encoding: utf-8
2
+ require 'spec_helper'
3
+
4
+ describe LexicalUnits do
5
+ context ".sentences" do
6
+ let(:klass) { LexicalUnits }
7
+
8
+ it "splits text into sentences" do
9
+ text = %q{Lorem ipsum dolor sit amet, consectetur adipiscing elit.
10
+ Fusce ut lacinia lorem. Nullam a sem quam. Duis faucibus tortor in.}
11
+ array = [
12
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
13
+ "Fusce ut lacinia lorem.",
14
+ "Nullam a sem quam.",
15
+ "Duis faucibus tortor in."
16
+ ]
17
+
18
+ klass::sentences(text).should eq(array)
19
+ end
20
+
21
+ it "splits text with question mark and exclamation mark into sentences" do
22
+ text = "Lorem ipsum dolor! Sit amet? Consectetur adipiscing elit."
23
+ array = [
24
+ "Lorem ipsum dolor!",
25
+ "Sit amet?",
26
+ "Consectetur adipiscing elit."
27
+ ]
28
+
29
+ klass::sentences(text).should eq(array)
30
+ end
31
+
32
+ it "splits text with ellipsis into sentences" do
33
+ text = "Lorem ipsum dolor, sit amet... Consectetur adipiscing elit."
34
+ array = [
35
+ "Lorem ipsum dolor, sit amet...",
36
+ "Consectetur adipiscing elit."
37
+ ]
38
+
39
+ klass::sentences(text).should eq(array)
40
+ end
41
+
42
+ it "splits text with interrobangs into sentences" do
43
+ text = "Say what‽ She's pregnant‽ Who is the father‽‽‽ Really?"
44
+ array = [
45
+ "Say what‽",
46
+ "She's pregnant‽",
47
+ "Who is the father‽‽‽",
48
+ "Really?"
49
+ ]
50
+
51
+ klass::sentences(text).should eq(array)
52
+ end
53
+
54
+ end
55
+ end
@@ -0,0 +1,25 @@
1
+ require 'spec_helper'
2
+
3
+ describe LexicalUnits::String do
4
+ class String
5
+ include LexicalUnits::String
6
+ end
7
+
8
+ context "#words" do
9
+ it "splits String into words" do
10
+ array = %w(Lorem ipsum dolor sit amet)
11
+ string = array.join(' ')
12
+
13
+ string.words.should eq(array)
14
+ end
15
+ end
16
+
17
+ context "#sentences" do
18
+ it "splits String into sentences" do
19
+ array = ["Lorem ipsum!", "Dolor sit?", "Amet."]
20
+ string = array.join
21
+
22
+ string.sentences.should eq(array)
23
+ end
24
+ end
25
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lexical_units
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksander Malaszkiewicz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-06 00:00:00.000000000 Z
11
+ date: 2013-07-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -50,6 +50,7 @@ files:
50
50
  - .ruby-gemset
51
51
  - .ruby-version
52
52
  - .travis.yml
53
+ - CHANGELOG.md
53
54
  - Gemfile
54
55
  - Guardfile
55
56
  - LICENSE.txt
@@ -57,8 +58,12 @@ files:
57
58
  - Rakefile
58
59
  - lexical_units.gemspec
59
60
  - lib/lexical_units.rb
61
+ - lib/lexical_units/sentences.rb
62
+ - lib/lexical_units/string.rb
60
63
  - lib/lexical_units/version.rb
61
64
  - lib/lexical_units/words.rb
65
+ - spec/lexical_units/sentences_spec.rb
66
+ - spec/lexical_units/string_spec.rb
62
67
  - spec/lexical_units/words_spec.rb
63
68
  - spec/spec_helper.rb
64
69
  homepage: ''
@@ -86,5 +91,7 @@ signing_key:
86
91
  specification_version: 4
87
92
  summary: Split text into lexical units
88
93
  test_files:
94
+ - spec/lexical_units/sentences_spec.rb
95
+ - spec/lexical_units/string_spec.rb
89
96
  - spec/lexical_units/words_spec.rb
90
97
  - spec/spec_helper.rb