rundown 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
3
  gemspec
4
+
5
+ gem 'nickel', github: 'modsognir/nickel'
data/README.md CHANGED
@@ -1,7 +1,47 @@
1
1
  Rundown
2
2
  =======
3
3
 
4
- [![Build Status](https://travis-ci.org/modsognir/rundown.png)](https://travis-ci.org/modsognir/rundown)
4
+ [![Gem Version](https://badge.fury.io/rb/rundown.png)](http://badge.fury.io/rb/rundown) [![Build Status](https://travis-ci.org/modsognir/rundown.png)](https://travis-ci.org/modsognir/rundown)
5
+
6
+ Rundown is a simple Natural Language Processor built with Ruby, inspired by [Knwl.js](https://github.com/loadfive/Knwl.js). Rundown scans through text, user data, or just about anything for likely data of interest, phone numbers, dates, locations, emails, times, as well as likelyhood of spam and overall emotion.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'sentiment_parser'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install sentiment_parser
21
+
22
+ ## Usage
23
+
24
+ rd = Rundown.parse("I'll see you on the 18th, give me a ring on 07912 345 678. - Jerertt, me@example.com")
25
+
26
+ rd.emails
27
+ => ["me@example.com"]
28
+
29
+ rd.phones
30
+ => ["07912 345 678"]
31
+
32
+ rd.sentiment
33
+ => -0.5333
34
+
35
+ rd.dates
36
+ => [#<Date: 2013-12-18>]
37
+
38
+ ## Contributing
39
+
40
+ 1. Fork it
41
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
42
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
43
+ 4. Push to the branch (`git push origin my-new-feature`)
44
+ 5. Create new Pull Request
5
45
 
6
46
  ###Known Issues
7
47
 
data/lib/rundown.rb CHANGED
@@ -4,13 +4,17 @@ require 'phony'
4
4
  require 'nickel'
5
5
  require 'sentiment_parser'
6
6
 
7
+ require 'rundown/parser'
7
8
  require 'rundown/processor'
8
- require 'rundown/words'
9
- require 'rundown/email_processor'
10
- require 'rundown/date_processor'
11
- require 'rundown/phone_processor'
12
- require 'rundown/sentiment_processor'
9
+ require 'rundown/processors/email'
10
+ require 'rundown/processors/dates'
11
+ require 'rundown/processors/phone'
12
+ require 'rundown/processors/sentiment'
13
13
 
14
14
  module Rundown
15
15
  module_function
16
+
17
+ def parse(text)
18
+ Parser.new(text)
19
+ end
16
20
  end
@@ -0,0 +1,34 @@
1
+ module Rundown
2
+ class Parser
3
+ attr_accessor :text
4
+
5
+ def initialize(text)
6
+ @text = text
7
+ end
8
+
9
+ # def processors
10
+ # [
11
+ # ,
12
+ # Processors::Email,
13
+ # Processors::Phone,
14
+ # Processors::Sentiment
15
+ # ]
16
+ # end
17
+
18
+ def dates
19
+ @dates ||= Processors::Dates.new(text).process
20
+ end
21
+
22
+ def emails
23
+ @email ||= Processors::Email.new(text).process
24
+ end
25
+
26
+ def phones
27
+ @phone ||= Processors::Phone.new(text).process
28
+ end
29
+
30
+ def sentiment
31
+ @sentiment ||= Processors::Sentiment.new(text).process
32
+ end
33
+ end
34
+ end
@@ -3,7 +3,7 @@ module Rundown
3
3
  attr_accessor :words
4
4
 
5
5
  def initialize(words)
6
- @words = Array(words)
6
+ @words = words.to_s.split(/\s/)
7
7
  end
8
8
  end
9
9
  end
@@ -0,0 +1,17 @@
1
+ module Rundown
2
+ module Processors
3
+ class Dates < Rundown::Processor
4
+ attr_accessor :text, :parser
5
+
6
+ def initialize(words, parser=Nickel)
7
+ @text = words
8
+ @parser = parser
9
+ end
10
+
11
+ def process
12
+ parser.parse(text).occurrences
13
+ end
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,29 @@
1
+ module Rundown
2
+ module Processors
3
+ class Email < Rundown::Processor
4
+ REGEX = /\A[^@\s]+@([^@\s]+\.)+[^@\s]+\z/
5
+
6
+ def cleanup_words
7
+ words = @words.map { |word|
8
+ word.gsub!(/\(|\)/, "")
9
+ }
10
+ end
11
+
12
+ def process
13
+ cleanup_words
14
+ words.select { |word|
15
+ Array(word.match(REGEX))[0]
16
+ }.reject(&:empty?).map {|word|
17
+ word.split('@')
18
+ }.reject { |words|
19
+ words.size < 2
20
+ }.select { |words|
21
+ x = Array(words.last).last.to_s.split('.').last
22
+ x.length <= 4 && !x.match(/\d+/)
23
+ }.map { |words|
24
+ words.join("@")
25
+ }
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,20 @@
1
+ module Rundown
2
+ module Processors
3
+ class Phone < Rundown::Processor
4
+ attr_accessor :validator, :text
5
+
6
+ def initialize(words, validator=Phony)
7
+ @text = words
8
+ @validator = validator
9
+ end
10
+
11
+ def plausible?(number)
12
+ validator.plausible?(number)
13
+ end
14
+
15
+ def process
16
+ text.scan(/(\+?(\(|\)|[0-9]|\s|-|\.){4,20})/).select {|e| plausible?(e) }
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,16 @@
1
+ module Rundown
2
+ module Processors
3
+ class Sentiment < Rundown::Processor
4
+ attr_accessor :parser
5
+
6
+ def initialize(words, parser=SentimentParser)
7
+ @parser = parser
8
+ super(words)
9
+ end
10
+
11
+ def process
12
+ parser.parse(words.join(' '))
13
+ end
14
+ end
15
+ end
16
+ end
@@ -1,3 +1,3 @@
1
1
  module Rundown
2
- VERSION = '0.0.1'
2
+ VERSION = '0.0.2'
3
3
  end
data/rundown.gemspec CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |spec|
8
8
  spec.version = Rundown::VERSION
9
9
  spec.authors = ["Jared Fraser"]
10
10
  spec.email = ["dev@jsf.io"]
11
- spec.description = %q{knows things}
12
- spec.summary = %q{knows things}
11
+ spec.description = %q{Extracts dates, phone numbers, sentiment and other items from naturally worded text.}
12
+ spec.summary = %q{Natural Language Processor}
13
13
  spec.homepage = ""
14
14
  spec.license = "MIT"
15
15
 
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+
3
+ module Rundown
4
+ module Processors
5
+ describe Dates do
6
+ describe 'month' do
7
+ {
8
+ "I will see you on 12/15/2013" => Date.new(2013, 12, 15),
9
+ " the 28th of december." => Date.new(Time.now.year, 12, 28)
10
+ }.each do |input, expected|
11
+ it "parse '#{input}' to '#{expected}'" do
12
+ expect(Rundown::Processors::Dates.new(input).process.first.start_date.to_date).to eql(expected)
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ module Rundown
4
+ module Processors
5
+ describe Email do
6
+
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ module Rundown
4
+ module Processors
5
+ describe Phone do
6
+
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ module Rundown
4
+ module Processors
5
+ describe Sentiment do
6
+
7
+ end
8
+ end
9
+ end
data/spec/rundown_spec.rb CHANGED
@@ -5,24 +5,41 @@ describe Rundown do
5
5
  "I'm sorry, I'm extremely busy right now. I just looked at the clock, and it's 12:54 AM, I've still got a lot of work to do. Don't worry about the event tomorrow, it's been moved ahead a week, the 28th of december. Remember though, you've got to call to get a ticket soon, their # is 1-212-323-1239. Their website says it costs $23 per person.
6
6
  If you've got enough time, they have some more information on their website, http://theevent.com.
7
7
  Regards,
8
- David (david32@gmail.com)".split(/\s/)
8
+ David (david32@gmail.com)"
9
9
  }
10
10
 
11
11
  describe 'emails' do
12
- it { expect(Rundown::EmailProcessor.new(subject).process).to eql ["david32@gmail.com"]}
12
+ it { expect(Rundown::Processors::Email.new(subject).process).to eql ["david32@gmail.com"]}
13
13
  end
14
14
 
15
15
  describe 'dates' do
16
16
  # FIXME: remove dependence on start date knowledge
17
- it { expect(Rundown::DateProcessor.new(subject).process.map(&:start_date).map(&:date)).to include "20131228"}
17
+ it { expect(Rundown::Processors::Dates.new(subject).process.map(&:start_date).map(&:date)).to include "20131228"}
18
18
  end
19
19
 
20
20
  describe 'phones' do
21
- it { pending; expect(Rundown::PhoneProcessor.new(subject).process).to eql ["212-323-1239"]}
21
+ it { pending; expect(Rundown::Processors::Phone.new(subject).process).to eql ["212-323-1239"]}
22
22
  end
23
23
 
24
24
  describe 'sentiment' do
25
- it { expect(Rundown::SentimentProcessor.new(subject).process.floor).to eql(2) }
25
+ it { expect(Rundown::Processors::Sentiment.new(subject).process.floor).to eql(2) }
26
+ end
27
+
28
+ context 'basic string' do
29
+ subject { Rundown.parse("I'll see you on the 18th, give me a ring on 07912 345 678. - Jerertt, me@example.com") }
30
+
31
+ it 'extracts emails' do
32
+ expect(subject.emails).to eql(["me@example.com"])
33
+ end
34
+
35
+ it 'extracts phone numbers' do
36
+ pending
37
+ expect(subject.phones).to eql(["07912 345 678"])
38
+ end
39
+
40
+ it 'extracts sentiment' do
41
+ expect(subject.sentiment).to eql(0.791666666667)
42
+ end
26
43
  end
27
44
 
28
45
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rundown
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -107,7 +107,8 @@ dependencies:
107
107
  - - '>='
108
108
  - !ruby/object:Gem::Version
109
109
  version: '0'
110
- description: knows things
110
+ description: Extracts dates, phone numbers, sentiment and other items from naturally
111
+ worded text.
111
112
  email:
112
113
  - dev@jsf.io
113
114
  executables: []
@@ -121,14 +122,19 @@ files:
121
122
  - README.md
122
123
  - Rakefile
123
124
  - lib/rundown.rb
124
- - lib/rundown/date_processor.rb
125
- - lib/rundown/email_processor.rb
126
- - lib/rundown/phone_processor.rb
125
+ - lib/rundown/parser.rb
127
126
  - lib/rundown/processor.rb
128
- - lib/rundown/sentiment_processor.rb
127
+ - lib/rundown/processors/dates.rb
128
+ - lib/rundown/processors/email.rb
129
+ - lib/rundown/processors/phone.rb
130
+ - lib/rundown/processors/sentiment.rb
129
131
  - lib/rundown/version.rb
130
132
  - lib/rundown/words.rb
131
133
  - rundown.gemspec
134
+ - spec/processors/dates_spec.rb
135
+ - spec/processors/email_spec.rb
136
+ - spec/processors/phone_spec.rb
137
+ - spec/processors/sentiment_spec.rb
132
138
  - spec/rundown_spec.rb
133
139
  - spec/spec_helper.rb
134
140
  homepage: ''
@@ -146,7 +152,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
146
152
  version: '0'
147
153
  segments:
148
154
  - 0
149
- hash: -2506835466625535810
155
+ hash: -3027207295298503492
150
156
  required_rubygems_version: !ruby/object:Gem::Requirement
151
157
  none: false
152
158
  requirements:
@@ -155,13 +161,17 @@ required_rubygems_version: !ruby/object:Gem::Requirement
155
161
  version: '0'
156
162
  segments:
157
163
  - 0
158
- hash: -2506835466625535810
164
+ hash: -3027207295298503492
159
165
  requirements: []
160
166
  rubyforge_project:
161
167
  rubygems_version: 1.8.25
162
168
  signing_key:
163
169
  specification_version: 3
164
- summary: knows things
170
+ summary: Natural Language Processor
165
171
  test_files:
172
+ - spec/processors/dates_spec.rb
173
+ - spec/processors/email_spec.rb
174
+ - spec/processors/phone_spec.rb
175
+ - spec/processors/sentiment_spec.rb
166
176
  - spec/rundown_spec.rb
167
177
  - spec/spec_helper.rb
@@ -1,15 +0,0 @@
1
- module Rundown
2
- class DateProcessor < Processor
3
- attr_accessor :text, :parser
4
-
5
- def initialize(words, parser=Nickel)
6
- @text = words.join(' ')
7
- @parser = parser
8
- end
9
-
10
- def process
11
- parser.parse(text).occurrences
12
- end
13
-
14
- end
15
- end
@@ -1,27 +0,0 @@
1
- module Rundown
2
- class EmailProcessor < Processor
3
- REGEX = /\A[^@\s]+@([^@\s]+\.)+[^@\s]+\z/
4
-
5
- def cleanup_words
6
- words = @words.map { |word|
7
- word.gsub!(/\(|\)/, "")
8
- }
9
- end
10
-
11
- def process
12
- cleanup_words
13
- words.select { |word|
14
- Array(word.match(REGEX))[0]
15
- }.reject(&:empty?).map {|word|
16
- word.split('@')
17
- }.reject { |words|
18
- words.size < 2
19
- }.select { |words|
20
- x = Array(words.last).last.to_s.split('.').last
21
- x.length <= 4 && !x.match(/\d+/)
22
- }.map { |words|
23
- words.join("@")
24
- }
25
- end
26
- end
27
- end
@@ -1,18 +0,0 @@
1
- module Rundown
2
- class PhoneProcessor < Processor
3
- attr_accessor :validator, :text
4
-
5
- def initialize(words, validator=Phony)
6
- @text = words.join(' ')
7
- @validator = validator
8
- end
9
-
10
- def plausible?(number)
11
- validator.plausible?(number)
12
- end
13
-
14
- def process
15
- text.scan(/(\+?(\(|\)|[0-9]|\s|-|\.){4,20})/).select {|e| plausible?(e) }
16
- end
17
- end
18
- end
@@ -1,14 +0,0 @@
1
- module Rundown
2
- class SentimentProcessor < Processor
3
- attr_accessor :parser
4
-
5
- def initialize(words, parser=SentimentParser)
6
- @parser = parser
7
- super(words)
8
- end
9
-
10
- def process
11
- parser.parse(words.join(' '))
12
- end
13
- end
14
- end