lexical_units 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e0900a1a365540816cacaf4ea41ae0ce6d70a37f
4
- data.tar.gz: 77c84fbc21845351e53eb83fd912afc389700e34
3
+ metadata.gz: fedd5fd50283e6f7f2fb2881a8c46326cdeffb7b
4
+ data.tar.gz: ec83c68109e1568fdd8b7b1b2e4628ea28038b2c
5
5
  SHA512:
6
- metadata.gz: 2ccaaba2910f859a987252cb9dfe38b0812462657077382fcb377e4c05d59259df9b565d56977e3699d0c6427b12da8e9d3bf1390948bd1e10fcb5cd9ca05639
7
- data.tar.gz: 2a3bec215bdf473fe76287090aafe31f25d75ca481c0515c5c90011cd81444a9e957b32b3cfdd1a61a86046b89884a53549da05fc264534badbe652034c54ac9
6
+ metadata.gz: b92570fd96372f27851e2dd3c9d771d534ae99f5a49847ba9289dc61f6c716a446ff3148d33bb409c0a38a6953856ee9f71683648d12c4d1da56fd5960de44a1
7
+ data.tar.gz: f749de922a2cd228733abc2de4690ffd9853e49bbba5533fe46625f41230147e0c78d5db11155b2b40be33287d4a854d597bc34e10747c5105554d337271cacc
data/CHANGELOG.md CHANGED
@@ -25,3 +25,7 @@
25
25
  ## v0.0.7
26
26
 
27
27
  * added split into words without digits
28
+
29
+ ## v0.0.8
30
+
31
+ * code clean up
data/README.md CHANGED
@@ -1,4 +1,5 @@
1
- # LexicalUnits [![Gem Version](https://badge.fury.io/rb/lexical_units.png)](http://badge.fury.io/rb/lexical_units) [![Build Status](https://travis-ci.org/fractalsoft/lexical_units.png)](https://travis-ci.org/fractalsoft/lexical_units) [![Dependency Status](https://gemnasium.com/fractalsoft/lexical_units.png)](https://gemnasium.com/fractalsoft/lexical_units) [![Coverage Status](https://coveralls.io/repos/fractalsoft/lexical_units/badge.png)](https://coveralls.io/r/fractalsoft/lexical_units)
1
+ # LexicalUnits [![Gem Version](https://badge.fury.io/rb/lexical_units.png)](http://badge.fury.io/rb/lexical_units) [![Build Status](https://travis-ci.org/fractalsoft/lexical_units.png)](https://travis-ci.org/fractalsoft/lexical_units) [![Dependency Status](https://gemnasium.com/fractalsoft/lexical_units.png)](https://gemnasium.com/fractalsoft/lexical_units) [![Coverage Status](https://coveralls.io/repos/fractalsoft/lexical_units/badge.png)](https://coveralls.io/r/fractalsoft/lexical_units) [![Stories in Ready](https://badge.waffle.io/fractalsoft/lexical_units.png)](http://waffle.io/fractalsoft/lexical_units)
2
+
2
3
  [![endorse](https://api.coderwall.com/torrocus/endorsecount.png)](https://coderwall.com/torrocus)
3
4
 
4
5
  Lexical unit is a single word, a part of a word, or a chain of words that forms the basic elements of a language's lexicon.
@@ -22,6 +23,7 @@ Or install it yourself as:
22
23
  ```ruby
23
24
  LexicalUnits::words(text)
24
25
  LexicalUnits::sentences(text)
26
+ LexicalUnits::words_without_digits(text)
25
27
  ```
26
28
 
27
29
  You can include methods into String class:
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.authors = ["Aleksander Malaszkiewicz"]
10
10
  spec.email = ["info@fractalsoft.org"]
11
11
  spec.summary = %q{Split text into lexical units}
12
- spec.homepage = ""
12
+ spec.homepage = "https://github.com/fractalsoft/lexical_units"
13
13
  spec.license = "MIT"
14
14
 
15
15
  spec.files = `git ls-files`.split($/)
@@ -1,20 +1,23 @@
1
1
  # encoding: utf-8
2
+
3
+ # Part of split into sentences
2
4
  module LexicalUnits
3
5
  # Split text into sentences
4
6
  #
5
7
  # self.words("Lorem, ipsum. Dolor?") #=> ["Lorem, ipsum.", "Dolor?"]
6
8
  # self.words("Lorem! Ipsum dolor?") #=> ["Lorem!", "Ipsum dolor?"]
7
9
  def self.sentences(text)
8
- separators = LexicalUnits::sentence_separators
10
+ separators = LexicalUnits.sentence_separators
9
11
  regexp = Regexp.new("[^#{separators}]+[#{separators}]{1,3}")
10
12
  text.scan(regexp).map(&:strip)
11
13
  end
12
14
 
13
15
  private
16
+
14
17
  def self.sentence_separators
15
18
  [
16
19
  '\.', '\?', '\!',
17
- ""
20
+ ''
18
21
  ].join
19
22
  end
20
23
  end
@@ -1,12 +1,17 @@
1
1
  # encoding: utf-8
2
2
  module LexicalUnits
3
+ # Use lexical units inside String class
3
4
  module String
4
5
  def words
5
- LexicalUnits::words(self)
6
+ LexicalUnits.words(self)
6
7
  end
7
8
 
8
9
  def sentences
9
- LexicalUnits::sentences(self)
10
+ LexicalUnits.sentences(self)
11
+ end
12
+
13
+ def words_without_digits
14
+ LexicalUnits.words_without_digits(self)
10
15
  end
11
16
  end
12
17
  end
@@ -1,4 +1,6 @@
1
1
  # encoding: utf-8
2
+
3
+ #
2
4
  module LexicalUnits
3
5
  def self.syllables(text)
4
6
  end
@@ -1,3 +1,4 @@
1
+ # Gem version
1
2
  module LexicalUnits
2
- VERSION = "0.0.7"
3
+ VERSION = '0.0.8'
3
4
  end
@@ -1,27 +1,28 @@
1
1
  # encoding: utf-8
2
+
3
+ # Part of split into words
2
4
  module LexicalUnits
3
5
  # Split text into words
4
6
  #
5
- # self.words("Lorem ipsum dolor sit") #=> ["Lorem", "ipsum", "dolor", "sit"]
7
+ # self.words("Lorem ipsum dolor sit") #=> ["Lorem","ipsum", "dolor", "sit"]
6
8
  # self.words("Lorem, ipsum. Dolor?") #=> ["Lorem", "ipsum", "Dolor"]
7
9
  def self.words(text)
8
- regexp = Regexp.new("[#{LexicalUnits::separators}]")
9
- text.gsub(regexp, " ").split(" ")
10
+ regexp = Regexp.new("[#{LexicalUnits.separators}]")
11
+ text.gsub(regexp, ' ').split(' ')
10
12
  end
11
13
 
12
14
  private
15
+
13
16
  def self.separators
14
17
  [
15
- '\,', '\:', '\;',
16
- '\.', '\?', '\!',
17
- '\/',
18
- '\(', '\)',
19
- '\[', '\]',
20
- '\>', '\<',
21
- '\{', '\}',
18
+ '\,', '\:', '\;', '\.', '\?', '\!', '\/',
19
+ '\(', '\)', '\[', '\]', '\>', '\<', '\{', '\}',
22
20
  '\|', '\~',
23
21
  "\¿", "\¡",
24
- '\=', '\"'
22
+ '\=', '\"',
23
+ "\»", "\«",
24
+ '\@', '\#',
25
+ '\+'
25
26
  ].join
26
27
  end
27
28
  end
@@ -1,16 +1,21 @@
1
1
  # encoding: utf-8
2
+
3
+ # Part of split into words, but excluding digits
2
4
  module LexicalUnits
3
5
  # Split text into words without digits
4
6
  #
5
7
  # self.words("Lorem 0 ipsum") #=> ["Lorem", "ipsum"]
6
8
  # self.words("Lorem ipsum 100") #=> ["Lorem", "ipsum"]
7
9
  def self.words_without_digits(text)
8
- LexicalUnits::words(text).delete_if { |word| numeric?(word) }
10
+ LexicalUnits.words(text).delete_if { |word| numeric?(word) }
9
11
  end
10
12
 
11
13
  private
14
+
12
15
  def self.numeric?(value)
13
16
  return true if value =~ /^\d+$/
14
- true if Float(value) rescue false
17
+ true if Float(value)
18
+ rescue
19
+ false
15
20
  end
16
21
  end
data/lib/lexical_units.rb CHANGED
@@ -1,9 +1,6 @@
1
- require "lexical_units/words"
2
- require "lexical_units/sentences"
3
- require "lexical_units/syllables"
4
- require "lexical_units/words_without_digits"
5
- require "lexical_units/string"
6
- require "lexical_units/version"
7
-
8
- module LexicalUnits
9
- end
1
+ require 'lexical_units/words'
2
+ require 'lexical_units/sentences'
3
+ require 'lexical_units/syllables'
4
+ require 'lexical_units/words_without_digits'
5
+ require 'lexical_units/string'
6
+ require 'lexical_units/version'
@@ -2,48 +2,49 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe LexicalUnits do
5
- context ".sentences" do
6
- it "splits text into sentences" do
7
- text = %q{Lorem ipsum dolor sit amet, consectetur adipiscing elit.
5
+ context '.sentences' do
6
+ it 'splits text into sentences' do
7
+ text = %q{Lorem ipsum dolor sit amet. Consectetur adipiscing elit.
8
8
  Fusce ut lacinia lorem. Nullam a sem quam. Duis faucibus tortor in.}
9
9
  array = [
10
- "Lorem ipsum dolor sit amet, consectetur adipiscing elit.",
11
- "Fusce ut lacinia lorem.",
12
- "Nullam a sem quam.",
13
- "Duis faucibus tortor in."
10
+ 'Lorem ipsum dolor sit amet.',
11
+ 'Consectetur adipiscing elit.',
12
+ 'Fusce ut lacinia lorem.',
13
+ 'Nullam a sem quam.',
14
+ 'Duis faucibus tortor in.'
14
15
  ]
15
16
 
16
17
  subject.sentences(text).should eq(array)
17
18
  end
18
19
 
19
- it "splits text with question mark and exclamation mark into sentences" do
20
- text = "Lorem ipsum dolor! Sit amet? Consectetur adipiscing elit."
20
+ it 'splits text with question mark and exclamation mark into sentences' do
21
+ text = 'Lorem ipsum dolor! Sit amet? Consectetur adipiscing elit.'
21
22
  array = [
22
- "Lorem ipsum dolor!",
23
- "Sit amet?",
24
- "Consectetur adipiscing elit."
23
+ 'Lorem ipsum dolor!',
24
+ 'Sit amet?',
25
+ 'Consectetur adipiscing elit.'
25
26
  ]
26
27
 
27
28
  subject.sentences(text).should eq(array)
28
29
  end
29
30
 
30
- it "splits text with ellipsis into sentences" do
31
- text = "Lorem ipsum dolor, sit amet... Consectetur adipiscing elit."
31
+ it 'splits text with ellipsis into sentences' do
32
+ text = 'Lorem ipsum dolor, sit amet... Consectetur adipiscing elit.'
32
33
  array = [
33
- "Lorem ipsum dolor, sit amet...",
34
- "Consectetur adipiscing elit."
34
+ 'Lorem ipsum dolor, sit amet...',
35
+ 'Consectetur adipiscing elit.'
35
36
  ]
36
37
 
37
38
  subject.sentences(text).should eq(array)
38
39
  end
39
40
 
40
- it "splits text with interrobangs into sentences" do
41
+ it 'splits text with interrobangs into sentences' do
41
42
  text = "Say what‽ She's pregnant‽ Who is the father‽‽‽ Really?"
42
43
  array = [
43
- "Say what‽",
44
+ 'Say what‽',
44
45
  "She's pregnant‽",
45
- "Who is the father‽‽‽",
46
- "Really?"
46
+ 'Who is the father‽‽‽',
47
+ 'Really?'
47
48
  ]
48
49
 
49
50
  subject.sentences(text).should eq(array)
@@ -2,12 +2,13 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe LexicalUnits::String do
5
+ # Testing class
5
6
  class String
6
7
  include LexicalUnits::String
7
8
  end
8
9
 
9
- context "#words" do
10
- it "splits String into words" do
10
+ context '#words' do
11
+ it 'splits String into words' do
11
12
  array = %w(Lorem ipsum dolor sit amet)
12
13
  string = array.join(' ')
13
14
 
@@ -15,12 +16,21 @@ describe LexicalUnits::String do
15
16
  end
16
17
  end
17
18
 
18
- context "#sentences" do
19
- it "splits String into sentences" do
20
- array = ["Lorem ipsum!", "Dolor sit?", "Amet."]
19
+ context '#sentences' do
20
+ it 'splits String into sentences' do
21
+ array = ['Lorem ipsum!', 'Dolor sit?', 'Amet.']
21
22
  string = array.join
22
23
 
23
24
  string.sentences.should eq(array)
24
25
  end
25
26
  end
27
+
28
+ context '#words_without_digits' do
29
+ it 'splits String into words (no ditigs)' do
30
+ array = %w(Lorem ipsum dolor sit amet)
31
+ string = 'Lorem 1 ipsum 23 dolor 456 sit 7890 amet'
32
+
33
+ string.words_without_digits.should eq(array)
34
+ end
35
+ end
26
36
  end
@@ -2,7 +2,6 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe LexicalUnits do
5
- context ".syllables" do
6
- let(:klass) { LexicalUnits }
5
+ context '.syllables' do
7
6
  end
8
7
  end
@@ -2,102 +2,126 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe LexicalUnits do
5
- context ".words" do
5
+ context '.words' do
6
6
  before do
7
7
  @array = %w(Lorem ipsum dolor sit amet)
8
8
  end
9
9
 
10
- it "splits text with whitespaces into words" do
11
- text = "Lorem ipsum dolor sit amet"
10
+ it 'splits text with whitespaces into words' do
11
+ text = 'Lorem ipsum dolor sit amet'
12
12
 
13
13
  subject.words(text).should eq(@array)
14
14
  end
15
15
 
16
- it "splits text with comma, colon and semicolon into words" do
17
- text = "Lorem ipsum,dolor:sit;amet"
16
+ it 'splits text with comma, colon and semicolon into words' do
17
+ text = 'Lorem ipsum,dolor:sit;amet'
18
18
 
19
19
  subject.words(text).should eq(@array)
20
20
  end
21
21
 
22
- it "splits text with dot, question mark and exclamation mark into words" do
23
- text = "Lorem ipsum.dolor?sit!amet"
22
+ it 'splits text with dot, question mark and exclamation mark into words' do
23
+ text = 'Lorem ipsum.dolor?sit!amet'
24
24
 
25
25
  subject.words(text).should eq(@array)
26
26
  end
27
27
 
28
- it "splits other text with whitespaces, comma and dot into words" do
29
- text = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."
28
+ it 'splits other text with whitespaces, comma and dot into words' do
29
+ text = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.'
30
30
  array = %w(Lorem ipsum dolor sit amet consectetur adipiscing elit)
31
31
 
32
32
  subject.words(text).should eq(array)
33
33
  end
34
34
 
35
- it "no splits text with hyphen into words" do
36
- text = "Lorem ipsum dolor-sit amet"
35
+ it 'no splits text with hyphen into words' do
36
+ text = 'Lorem ipsum dolor-sit amet'
37
37
  array = %w(Lorem ipsum dolor-sit amet)
38
38
 
39
39
  subject.words(text).should eq(array)
40
40
  end
41
41
 
42
- it "splits text with slash into words" do
43
- text = "Lorem ipsum dolor sit/amet"
42
+ it 'splits text with slash into words' do
43
+ text = 'Lorem ipsum dolor sit/amet'
44
44
 
45
45
  subject.words(text).should eq(@array)
46
46
  end
47
47
 
48
- it "splits text with round brackets into words" do
49
- text = "Lorem ipsum(dolor sit)amet"
48
+ it 'splits text with round brackets into words' do
49
+ text = 'Lorem ipsum(dolor sit)amet'
50
50
 
51
51
  subject.words(text).should eq(@array)
52
52
  end
53
53
 
54
- it "splits text with square brackets into words" do
55
- text = "Lorem ipsum dolor[sit]amet"
54
+ it 'splits text with square brackets into words' do
55
+ text = 'Lorem ipsum dolor[sit]amet'
56
56
 
57
57
  subject.words(text).should eq(@array)
58
58
  end
59
59
 
60
- it "splits text with pointy brackets into words" do
61
- text = "Lorem<ipsum dolor sit>amet"
60
+ it 'splits text with pointy brackets into words' do
61
+ text = 'Lorem<ipsum dolor sit>amet'
62
62
 
63
63
  subject.words(text).should eq(@array)
64
64
  end
65
65
 
66
- it "splits text with braces into words" do
67
- text = "Lorem ipsum{dolor}sit amet"
66
+ it 'splits text with braces into words' do
67
+ text = 'Lorem ipsum{dolor}sit amet'
68
68
 
69
69
  subject.words(text).should eq(@array)
70
70
  end
71
71
 
72
- it "splits text with vertical bar into words" do
73
- text = "Lorem ipsum|dolor sit amet"
72
+ it 'splits text with vertical bar into words' do
73
+ text = 'Lorem ipsum|dolor sit amet'
74
74
 
75
75
  subject.words(text).should eq(@array)
76
76
  end
77
77
 
78
- it "splits text with tilde into words" do
79
- text = "Lorem ipsum dolor~sit amet"
78
+ it 'splits text with tilde into words' do
79
+ text = 'Lorem ipsum dolor~sit amet'
80
80
 
81
81
  subject.words(text).should eq(@array)
82
82
  end
83
83
 
84
84
  # Spanish
85
- it "splits text with inverted question and exclamation marks into words" do
86
- text = "Lorem¿ipsum?dolor¡sit!amet"
85
+ it 'splits text with inverted question and exclamation marks into words' do
86
+ text = 'Lorem¿ipsum?dolor¡sit!amet'
87
87
 
88
88
  subject.words(text).should eq(@array)
89
89
  end
90
90
 
91
- it "splits text with equals sign into words" do
92
- text = "Lorem ipsum=dolor sit amet"
91
+ it 'splits text with equals sign into words' do
92
+ text = 'Lorem ipsum=dolor sit amet'
93
93
 
94
94
  subject.words(text).should eq(@array)
95
95
  end
96
96
 
97
- it "splits text with typewriter double quotes into words" do
97
+ it 'splits text with typewriter double quotes into words' do
98
98
  text = %Q(Lorem"ipsum dolor"sit amet)
99
99
 
100
100
  subject.words(text).should eq(@array)
101
101
  end
102
+
103
+ it 'split text with non-English quotation marks into words' do
104
+ text = %Q(Lorem»ipsum dolor«sit amet)
105
+
106
+ subject.words(text).should eq(@array)
107
+ end
108
+
109
+ it "split text with 'at sign' (@) into words" do
110
+ text = %Q(Lorem@ipsum dolor sit amet)
111
+
112
+ subject.words(text).should eq(@array)
113
+ end
114
+
115
+ it "split text with 'number sign (#) into words" do
116
+ text = %Q(Lorem ipsum#dolor sit amet)
117
+
118
+ subject.words(text).should eq(@array)
119
+ end
120
+
121
+ it 'split text with plus (+) into words' do
122
+ text = %Q(Lorem+ipsum dolor+sit amet)
123
+
124
+ subject.words(text).should eq(@array)
125
+ end
102
126
  end
103
127
  end
@@ -2,13 +2,13 @@
2
2
  require 'spec_helper'
3
3
 
4
4
  describe LexicalUnits do
5
- context ".words_without_digits" do
5
+ context '.words_without_digits' do
6
6
  [
7
- {text: "Lorem ipsum 12345", array: %w(Lorem ipsum)},
8
- {text: "dolor 98765 sit amet.", array: %w(dolor sit amet)}
7
+ { text: 'Lorem ipsum 12345', array: %w(Lorem ipsum) },
8
+ { text: 'dolor 98765 sit amet.', array: %w(dolor sit amet) }
9
9
  ].each do |hash|
10
10
  text, array = hash.values
11
- it "splits text into words without digits" do
11
+ it 'splits text into words without digits' do
12
12
  subject.words_without_digits(text).should eq(array)
13
13
  end
14
14
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lexical_units
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksander Malaszkiewicz
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-16 00:00:00.000000000 Z
11
+ date: 2013-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -70,7 +70,7 @@ files:
70
70
  - spec/lexical_units/words_spec.rb
71
71
  - spec/lexical_units/words_without_digits_spec.rb
72
72
  - spec/spec_helper.rb
73
- homepage: ''
73
+ homepage: https://github.com/fractalsoft/lexical_units
74
74
  licenses:
75
75
  - MIT
76
76
  metadata: {}