lexical_units 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e0900a1a365540816cacaf4ea41ae0ce6d70a37f
4
- data.tar.gz: 77c84fbc21845351e53eb83fd912afc389700e34
3
+ metadata.gz: fedd5fd50283e6f7f2fb2881a8c46326cdeffb7b
4
+ data.tar.gz: ec83c68109e1568fdd8b7b1b2e4628ea28038b2c
5
5
  SHA512:
6
- metadata.gz: 2ccaaba2910f859a987252cb9dfe38b0812462657077382fcb377e4c05d59259df9b565d56977e3699d0c6427b12da8e9d3bf1390948bd1e10fcb5cd9ca05639
7
- data.tar.gz: 2a3bec215bdf473fe76287090aafe31f25d75ca481c0515c5c90011cd81444a9e957b32b3cfdd1a61a86046b89884a53549da05fc264534badbe652034c54ac9
6
+ metadata.gz: b92570fd96372f27851e2dd3c9d771d534ae99f5a49847ba9289dc61f6c716a446ff3148d33bb409c0a38a6953856ee9f71683648d12c4d1da56fd5960de44a1
7
+ data.tar.gz: f749de922a2cd228733abc2de4690ffd9853e49bbba5533fe46625f41230147e0c78d5db11155b2b40be33287d4a854d597bc34e10747c5105554d337271cacc
data/CHANGELOG.md CHANGED
@@ -25,3 +25,7 @@
25
25
  ## v0.0.7
26
26
 
27
27
  * added split into words without digits
28
+
29
+ ## v0.0.8
30
+
31
+ * code clean up
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
- # LexicalUnits [![Gem Version](https://badge.fury.io/rb/lexical_units.png)](http://badge.fury.io/rb/lexical_units) [![Build Status](https://travis-ci.org/fractalsoft/lexical_units.png)](https://travis-ci.org/fractalsoft/lexical_units) [![Dependency Status](https://gemnasium.com/fractalsoft/lexical_units.png)](https://gemnasium.com/fractalsoft/lexical_units) [![Coverage Status](https://coveralls.io/repos/fractalsoft/lexical_units/badge.png)](https://coveralls.io/r/fractalsoft/lexical_units)
1
+ # LexicalUnits [![Gem Version](https://badge.fury.io/rb/lexical_units.png)](http://badge.fury.io/rb/lexical_units) [![Build Status](https://travis-ci.org/fractalsoft/lexical_units.png)](https://travis-ci.org/fractalsoft/lexical_units) [![Dependency Status](https://gemnasium.com/fractalsoft/lexical_units.png)](https://gemnasium.com/fractalsoft/lexical_units) [![Coverage Status](https://coveralls.io/repos/fractalsoft/lexical_units/badge.png)](https://coveralls.io/r/fractalsoft/lexical_units) [![Stories in Ready](https://badge.waffle.io/fractalsoft/lexical_units.png)](http://waffle.io/fractalsoft/lexical_units)
2
+
2
3
  [![endorse](https://api.coderwall.com/torrocus/endorsecount.png)](https://coderwall.com/torrocus)
3
4
 
4
5
  Lexical unit is a single word, a part of a word, or a chain of words that forms the basic elements of a language's lexicon.
@@ -22,6 +23,7 @@ Or install it yourself as:
22
23
  ```ruby
23
24
  LexicalUnits::words(text)
24
25
  LexicalUnits::sentences(text)
26
+ LexicalUnits::words_without_digits(text)
25
27
  ```
26
28
 
27
29
  You can include methods into String class:
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Aleksander Malaszkiewicz"]
10
10
  spec.email = ["info@fractalsoft.org"]
11
11
  spec.summary = %q{Split text into lexical units}
12
- spec.homepage = ""
12
+ spec.homepage = "https://github.com/fractalsoft/lexical_units"
13
13
  spec.license = "MIT"
14
14
 
15
15
  spec.files = `git ls-files`.split($/)
@@ -1,20 +1,23 @@
1
1
  # encoding: utf-8
2
+
3
+ # Part of split into sentences
2
4
  module LexicalUnits
3
5
  # Split text into sentences
4
6
  #
5
7
  # self.words("Lorem, ipsum. Dolor?") #=> ["Lorem, ipsum.", "Dolor?"]
6
8
  # self.words("Lorem! Ipsum dolor?") #=> ["Lorem!", "Ipsum dolor?"]
7
9
  def self.sentences(text)
8
- separators = LexicalUnits::sentence_separators
10
+ separators = LexicalUnits.sentence_separators
9
11
  regexp = Regexp.new("[^#{separators}]+[#{separators}]{1,3}")
10
12
  text.scan(regexp).map(&:strip)
11
13
  end
12
14
 
13
15
  private
16
+
14
17
  def self.sentence_separators
15
18
  [
16
19
  '\.', '\?', '\!',
17
- ""
20
+ ''
18
21
  ].join
19
22
  end
20
23
  end
@@ -1,12 +1,17 @@
1
1
  # encoding: utf-8
2
2
  module LexicalUnits
3
+ # Use lexical units inside String class
3
4
  module String
4
5
  def words
5
- LexicalUnits::words(self)
6
+ LexicalUnits.words(self)
6
7
  end
7
8
 
8
9
  def sentences
9
- LexicalUnits::sentences(self)
10
+ LexicalUnits.sentences(self)
11
+ end
12
+
13
+ def words_without_digits
14
+ LexicalUnits.words_without_digits(self)
10
15
  end
11
16
  end
12
17
  end
@@ -1,4 +1,6 @@
1
1
  # encoding: utf-8
2
+
3
+ #
2
4
  module LexicalUnits
3
5
  def self.syllables(text)
4
6
  end
@@ -1,3 +1,4 @@
1
+ # Gem version
1
2
  module LexicalUnits
2
- VERSION = "0.0.7"
3
+ VERSION = '0.0.8'
3
4
  end
@@ -1,27 +1,28 @@
1
1
  # encoding: utf-8
2
+
3
+ # Part of split into words
2
4
  module LexicalUnits
3
5
  # Split text into words
4
6
  #
5
- # self.words("Lorem ipsum dolor sit") #=> ["Lorem", "ipsum", "dolor", "sit"]
7
+ # self.words("Lorem ipsum dolor sit") #=> ["Lorem","ipsum", "dolor", "sit"]
6
8
  # self.words("Lorem, ipsum. Dolor?") #=> ["Lorem", "ipsum", "Dolor"]
7
9
  def self.words(text)
8
- regexp = Regexp.new("[#{LexicalUnits::separators}]")
9
- text.gsub(regexp, " ").split(" ")
10
+ regexp = Regexp.new("[#{LexicalUnits.separators}]")
11
+ text.gsub(regexp, ' ').split(' ')
10
12
  end
11
13
 
12
14
  private
15
+
13
16
  def self.separators
14
17
  [
15
- '\,', '\:', '\;',
16
- '\.', '\?', '\!',
17
- '\/',
18
- '\(', '\)',
19
- '\[', '\]',
20
- '\>', '\<',
21
- '\{', '\}',
18
+ '\,', '\:', '\;', '\.', '\?', '\!', '\/',
19
+ '\(', '\)', '\[', '\]', '\>', '\<', '\{', '\}',
22
20
  '\|', '\~',
23
21
  "\¿", "\¡",
24
- '\=', '\"'
22
+ '\=', '\"',
23
+ "\»", "\«",
24
+ '\@', '\#',
25
+ '\+'
25
26
  ].join
26
27
  end
27
28
  end
@@ -1,16 +1,21 @@
1
1
  # encoding: utf-8
2
+
3
+ # Part of split into words, but excluding digits
2
4
  module LexicalUnits
3
5
  # Split text into words without digits
4
6
  #
5
7
  # self.words("Lorem 0 ipsum") #=> ["Lorem", "ipsum"]
6
8
  # self.words("Lorem ipsum 100") #=> ["Lorem", "ipsum"]
7
9
  def self.words_without_digits(text)
8
- LexicalUnits::words(text).delete_if { |word| numeric?(word) }
10
+ LexicalUnits.words(text).delete_if { |word| numeric?(word) }
9
11
  end
10
12
 
11
13
  private
14
+
12
15
  def self.numeric?(value)
13
16
  return true if value =~ /^\d+$/
14
- true if Float(value) rescue false
17
+ true if Float(value)
18
+ rescue
19
+ false
15
20
  end
16
21
  end
data/lib/lexical_units.rb CHANGED
@@ -1,9 +1,6 @@
1
- require "lexical_units/words"
2
- require "lexical_units/sentences"
3
- require "lexical_units/syllables"
4
- require "lexical_units/words_without_digits"
5
- require "lexical_units/string"
6
- require "lexical_units/version"
7
-
8
- module LexicalUnits
9
- end
1
+ require 'lexical_units/words'
2
+ require 'lexical_units/sentences'
3
+ require 'lexical_units/syllables'
4
+ require 'lexical_units/words_without_digits'
5
+ require 'lexical_units/string'
6
+ require 'lexical_units/version'
@@ -2,48 +2,49 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe LexicalUnits do
5
- context ".sentences" do
6
- it "splits text into sentences" do
7
- text = %q{Lorem ipsum dolor sit amet, consectetur adipiscing elit.
5
+ context '.sentences' do
6
+ it 'splits text into sentences' do
7
+ text = %q{Lorem ipsum dolor sit amet. Consectetur adipiscing elit.
8
8
  Fusce ut lacinia lorem. Nullam a sem quam. Duis faucibus tortor in.}
9
9
  array = [
10
- "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
11
- "Fusce ut lacinia lorem.",
12
- "Nullam a sem quam.",
13
- "Duis faucibus tortor in."
10
+ 'Lorem ipsum dolor sit amet.',
11
+ 'Consectetur adipiscing elit.',
12
+ 'Fusce ut lacinia lorem.',
13
+ 'Nullam a sem quam.',
14
+ 'Duis faucibus tortor in.'
14
15
  ]
15
16
 
16
17
  subject.sentences(text).should eq(array)
17
18
  end
18
19
 
19
- it "splits text with question mark and exclamation mark into sentences" do
20
- text = "Lorem ipsum dolor! Sit amet? Consectetur adipiscing elit."
20
+ it 'splits text with question mark and exclamation mark into sentences' do
21
+ text = 'Lorem ipsum dolor! Sit amet? Consectetur adipiscing elit.'
21
22
  array = [
22
- "Lorem ipsum dolor!",
23
- "Sit amet?",
24
- "Consectetur adipiscing elit."
23
+ 'Lorem ipsum dolor!',
24
+ 'Sit amet?',
25
+ 'Consectetur adipiscing elit.'
25
26
  ]
26
27
 
27
28
  subject.sentences(text).should eq(array)
28
29
  end
29
30
 
30
- it "splits text with ellipsis into sentences" do
31
- text = "Lorem ipsum dolor, sit amet... Consectetur adipiscing elit."
31
+ it 'splits text with ellipsis into sentences' do
32
+ text = 'Lorem ipsum dolor, sit amet... Consectetur adipiscing elit.'
32
33
  array = [
33
- "Lorem ipsum dolor, sit amet...",
34
- "Consectetur adipiscing elit."
34
+ 'Lorem ipsum dolor, sit amet...',
35
+ 'Consectetur adipiscing elit.'
35
36
  ]
36
37
 
37
38
  subject.sentences(text).should eq(array)
38
39
  end
39
40
 
40
- it "splits text with interrobangs into sentences" do
41
+ it 'splits text with interrobangs into sentences' do
41
42
  text = "Say what‽ She's pregnant‽ Who is the father‽‽‽ Really?"
42
43
  array = [
43
- "Say what‽",
44
+ 'Say what‽',
44
45
  "She's pregnant‽",
45
- "Who is the father‽‽‽",
46
- "Really?"
46
+ 'Who is the father‽‽‽',
47
+ 'Really?'
47
48
  ]
48
49
 
49
50
  subject.sentences(text).should eq(array)
@@ -2,12 +2,13 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe LexicalUnits::String do
5
+ # Testing class
5
6
  class String
6
7
  include LexicalUnits::String
7
8
  end
8
9
 
9
- context "#words" do
10
- it "splits String into words" do
10
+ context '#words' do
11
+ it 'splits String into words' do
11
12
  array = %w(Lorem ipsum dolor sit amet)
12
13
  string = array.join(' ')
13
14
 
@@ -15,12 +16,21 @@ describe LexicalUnits::String do
15
16
  end
16
17
  end
17
18
 
18
- context "#sentences" do
19
- it "splits String into sentences" do
20
- array = ["Lorem ipsum!", "Dolor sit?", "Amet."]
19
+ context '#sentences' do
20
+ it 'splits String into sentences' do
21
+ array = ['Lorem ipsum!', 'Dolor sit?', 'Amet.']
21
22
  string = array.join
22
23
 
23
24
  string.sentences.should eq(array)
24
25
  end
25
26
  end
27
+
28
+ context '#words_without_digits' do
29
+ it 'splits String into words (no ditigs)' do
30
+ array = %w(Lorem ipsum dolor sit amet)
31
+ string = 'Lorem 1 ipsum 23 dolor 456 sit 7890 amet'
32
+
33
+ string.words_without_digits.should eq(array)
34
+ end
35
+ end
26
36
  end
@@ -2,7 +2,6 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe LexicalUnits do
5
- context ".syllables" do
6
- let(:klass) { LexicalUnits }
5
+ context '.syllables' do
7
6
  end
8
7
  end
@@ -2,102 +2,126 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe LexicalUnits do
5
- context ".words" do
5
+ context '.words' do
6
6
  before do
7
7
  @array = %w(Lorem ipsum dolor sit amet)
8
8
  end
9
9
 
10
- it "splits text with whitespaces into words" do
11
- text = "Lorem ipsum dolor sit amet"
10
+ it 'splits text with whitespaces into words' do
11
+ text = 'Lorem ipsum dolor sit amet'
12
12
 
13
13
  subject.words(text).should eq(@array)
14
14
  end
15
15
 
16
- it "splits text with comma, colon and semicolon into words" do
17
- text = "Lorem ipsum,dolor:sit;amet"
16
+ it 'splits text with comma, colon and semicolon into words' do
17
+ text = 'Lorem ipsum,dolor:sit;amet'
18
18
 
19
19
  subject.words(text).should eq(@array)
20
20
  end
21
21
 
22
- it "splits text with dot, question mark and exclamation mark into words" do
23
- text = "Lorem ipsum.dolor?sit!amet"
22
+ it 'splits text with dot, question mark and exclamation mark into words' do
23
+ text = 'Lorem ipsum.dolor?sit!amet'
24
24
 
25
25
  subject.words(text).should eq(@array)
26
26
  end
27
27
 
28
- it "splits other text with whitespaces, comma and dot into words" do
29
- text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
28
+ it 'splits other text with whitespaces, comma and dot into words' do
29
+ text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.'
30
30
  array = %w(Lorem ipsum dolor sit amet consectetur adipiscing elit)
31
31
 
32
32
  subject.words(text).should eq(array)
33
33
  end
34
34
 
35
- it "no splits text with hyphen into words" do
36
- text = "Lorem ipsum dolor-sit amet"
35
+ it 'no splits text with hyphen into words' do
36
+ text = 'Lorem ipsum dolor-sit amet'
37
37
  array = %w(Lorem ipsum dolor-sit amet)
38
38
 
39
39
  subject.words(text).should eq(array)
40
40
  end
41
41
 
42
- it "splits text with slash into words" do
43
- text = "Lorem ipsum dolor sit/amet"
42
+ it 'splits text with slash into words' do
43
+ text = 'Lorem ipsum dolor sit/amet'
44
44
 
45
45
  subject.words(text).should eq(@array)
46
46
  end
47
47
 
48
- it "splits text with round brackets into words" do
49
- text = "Lorem ipsum(dolor sit)amet"
48
+ it 'splits text with round brackets into words' do
49
+ text = 'Lorem ipsum(dolor sit)amet'
50
50
 
51
51
  subject.words(text).should eq(@array)
52
52
  end
53
53
 
54
- it "splits text with square brackets into words" do
55
- text = "Lorem ipsum dolor[sit]amet"
54
+ it 'splits text with square brackets into words' do
55
+ text = 'Lorem ipsum dolor[sit]amet'
56
56
 
57
57
  subject.words(text).should eq(@array)
58
58
  end
59
59
 
60
- it "splits text with pointy brackets into words" do
61
- text = "Lorem<ipsum dolor sit>amet"
60
+ it 'splits text with pointy brackets into words' do
61
+ text = 'Lorem<ipsum dolor sit>amet'
62
62
 
63
63
  subject.words(text).should eq(@array)
64
64
  end
65
65
 
66
- it "splits text with braces into words" do
67
- text = "Lorem ipsum{dolor}sit amet"
66
+ it 'splits text with braces into words' do
67
+ text = 'Lorem ipsum{dolor}sit amet'
68
68
 
69
69
  subject.words(text).should eq(@array)
70
70
  end
71
71
 
72
- it "splits text with vertical bar into words" do
73
- text = "Lorem ipsum|dolor sit amet"
72
+ it 'splits text with vertical bar into words' do
73
+ text = 'Lorem ipsum|dolor sit amet'
74
74
 
75
75
  subject.words(text).should eq(@array)
76
76
  end
77
77
 
78
- it "splits text with tilde into words" do
79
- text = "Lorem ipsum dolor~sit amet"
78
+ it 'splits text with tilde into words' do
79
+ text = 'Lorem ipsum dolor~sit amet'
80
80
 
81
81
  subject.words(text).should eq(@array)
82
82
  end
83
83
 
84
84
  # Spanish
85
- it "splits text with inverted question and exclamation marks into words" do
86
- text = "Lorem¿ipsum?dolor¡sit!amet"
85
+ it 'splits text with inverted question and exclamation marks into words' do
86
+ text = 'Lorem¿ipsum?dolor¡sit!amet'
87
87
 
88
88
  subject.words(text).should eq(@array)
89
89
  end
90
90
 
91
- it "splits text with equals sign into words" do
92
- text = "Lorem ipsum=dolor sit amet"
91
+ it 'splits text with equals sign into words' do
92
+ text = 'Lorem ipsum=dolor sit amet'
93
93
 
94
94
  subject.words(text).should eq(@array)
95
95
  end
96
96
 
97
- it "splits text with typewriter double quotes into words" do
97
+ it 'splits text with typewriter double quotes into words' do
98
98
  text = %Q(Lorem"ipsum dolor"sit amet)
99
99
 
100
100
  subject.words(text).should eq(@array)
101
101
  end
102
+
103
+ it 'split text with non-English quotation marks into words' do
104
+ text = %Q(Lorem»ipsum dolor«sit amet)
105
+
106
+ subject.words(text).should eq(@array)
107
+ end
108
+
109
+ it "split text with 'at sign' (@) into words" do
110
+ text = %Q(Lorem@ipsum dolor sit amet)
111
+
112
+ subject.words(text).should eq(@array)
113
+ end
114
+
115
+ it "split text with 'number sign (#) into words" do
116
+ text = %Q(Lorem ipsum#dolor sit amet)
117
+
118
+ subject.words(text).should eq(@array)
119
+ end
120
+
121
+ it 'split text with plus (+) into words' do
122
+ text = %Q(Lorem+ipsum dolor+sit amet)
123
+
124
+ subject.words(text).should eq(@array)
125
+ end
102
126
  end
103
127
  end
@@ -2,13 +2,13 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe LexicalUnits do
5
- context ".words_without_digits" do
5
+ context '.words_without_digits' do
6
6
  [
7
- {text: "Lorem ipsum 12345", array: %w(Lorem ipsum)},
8
- {text: "dolor 98765 sit amet.", array: %w(dolor sit amet)}
7
+ { text: 'Lorem ipsum 12345', array: %w(Lorem ipsum) },
8
+ { text: 'dolor 98765 sit amet.', array: %w(dolor sit amet) }
9
9
  ].each do |hash|
10
10
  text, array = hash.values
11
- it "splits text into words without digits" do
11
+ it 'splits text into words without digits' do
12
12
  subject.words_without_digits(text).should eq(array)
13
13
  end
14
14
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lexical_units
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksander Malaszkiewicz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-16 00:00:00.000000000 Z
11
+ date: 2013-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -70,7 +70,7 @@ files:
70
70
  - spec/lexical_units/words_spec.rb
71
71
  - spec/lexical_units/words_without_digits_spec.rb
72
72
  - spec/spec_helper.rb
73
- homepage: ''
73
+ homepage: https://github.com/fractalsoft/lexical_units
74
74
  licenses:
75
75
  - MIT
76
76
  metadata: {}