rundown 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -1,3 +1,5 @@
1
1
  source 'https://rubygems.org'
2
2
 
3
3
  gemspec
4
+
5
+ gem 'nickel', github: 'modsognir/nickel'
data/README.md CHANGED
@@ -1,7 +1,47 @@
1
1
  Rundown
2
2
  =======
3
3
 
4
- [![Build Status](https://travis-ci.org/modsognir/rundown.png)](https://travis-ci.org/modsognir/rundown)
4
+ [![Gem Version](https://badge.fury.io/rb/rundown.png)](http://badge.fury.io/rb/rundown) [![Build Status](https://travis-ci.org/modsognir/rundown.png)](https://travis-ci.org/modsognir/rundown)
5
+
6
+ Rundown is a simple Natural Language Processor built with Ruby, inspired by [Knwl.js](https://github.com/loadfive/Knwl.js). Rundown scans through text, user data, or just about anything for likely data of interest, phone numbers, dates, locations, emails, times, as well as likelyhood of spam and overall emotion.
7
+
8
+ ## Installation
9
+
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'sentiment_parser'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Or install it yourself as:
19
+
20
+ $ gem install sentiment_parser
21
+
22
+ ## Usage
23
+
24
+ rd = Rundown.parse("I'll see you on the 18th, give me a ring on 07912 345 678. - Jerertt, me@example.com")
25
+
26
+ rd.emails
27
+ => ["me@example.com"]
28
+
29
+ rd.phones
30
+ => ["07912 345 678"]
31
+
32
+ rd.sentiment
33
+ => -0.5333
34
+
35
+ rd.dates
36
+ => [#<Date: 2013-12-18>]
37
+
38
+ ## Contributing
39
+
40
+ 1. Fork it
41
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
42
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
43
+ 4. Push to the branch (`git push origin my-new-feature`)
44
+ 5. Create new Pull Request
5
45
 
6
46
  ###Known Issues
7
47
 
data/lib/rundown.rb CHANGED
@@ -4,13 +4,17 @@ require 'phony'
4
4
  require 'nickel'
5
5
  require 'sentiment_parser'
6
6
 
7
+ require 'rundown/parser'
7
8
  require 'rundown/processor'
8
- require 'rundown/words'
9
- require 'rundown/email_processor'
10
- require 'rundown/date_processor'
11
- require 'rundown/phone_processor'
12
- require 'rundown/sentiment_processor'
9
+ require 'rundown/processors/email'
10
+ require 'rundown/processors/dates'
11
+ require 'rundown/processors/phone'
12
+ require 'rundown/processors/sentiment'
13
13
 
14
14
  module Rundown
15
15
  module_function
16
+
17
+ def parse(text)
18
+ Parser.new(text)
19
+ end
16
20
  end
@@ -0,0 +1,34 @@
1
+ module Rundown
2
+ class Parser
3
+ attr_accessor :text
4
+
5
+ def initialize(text)
6
+ @text = text
7
+ end
8
+
9
+ # def processors
10
+ # [
11
+ # ,
12
+ # Processors::Email,
13
+ # Processors::Phone,
14
+ # Processors::Sentiment
15
+ # ]
16
+ # end
17
+
18
+ def dates
19
+ @dates ||= Processors::Dates.new(text).process
20
+ end
21
+
22
+ def emails
23
+ @email ||= Processors::Email.new(text).process
24
+ end
25
+
26
+ def phones
27
+ @phone ||= Processors::Phone.new(text).process
28
+ end
29
+
30
+ def sentiment
31
+ @sentiment ||= Processors::Sentiment.new(text).process
32
+ end
33
+ end
34
+ end
@@ -3,7 +3,7 @@ module Rundown
3
3
  attr_accessor :words
4
4
 
5
5
  def initialize(words)
6
- @words = Array(words)
6
+ @words = words.to_s.split(/\s/)
7
7
  end
8
8
  end
9
9
  end
@@ -0,0 +1,17 @@
1
+ module Rundown
2
+ module Processors
3
+ class Dates < Rundown::Processor
4
+ attr_accessor :text, :parser
5
+
6
+ def initialize(words, parser=Nickel)
7
+ @text = words
8
+ @parser = parser
9
+ end
10
+
11
+ def process
12
+ parser.parse(text).occurrences
13
+ end
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,29 @@
1
+ module Rundown
2
+ module Processors
3
+ class Email < Rundown::Processor
4
+ REGEX = /\A[^@\s]+@([^@\s]+\.)+[^@\s]+\z/
5
+
6
+ def cleanup_words
7
+ words = @words.map { |word|
8
+ word.gsub!(/\(|\)/, "")
9
+ }
10
+ end
11
+
12
+ def process
13
+ cleanup_words
14
+ words.select { |word|
15
+ Array(word.match(REGEX))[0]
16
+ }.reject(&:empty?).map {|word|
17
+ word.split('@')
18
+ }.reject { |words|
19
+ words.size < 2
20
+ }.select { |words|
21
+ x = Array(words.last).last.to_s.split('.').last
22
+ x.length <= 4 && !x.match(/\d+/)
23
+ }.map { |words|
24
+ words.join("@")
25
+ }
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,20 @@
1
+ module Rundown
2
+ module Processors
3
+ class Phone < Rundown::Processor
4
+ attr_accessor :validator, :text
5
+
6
+ def initialize(words, validator=Phony)
7
+ @text = words
8
+ @validator = validator
9
+ end
10
+
11
+ def plausible?(number)
12
+ validator.plausible?(number)
13
+ end
14
+
15
+ def process
16
+ text.scan(/(\+?(\(|\)|[0-9]|\s|-|\.){4,20})/).select {|e| plausible?(e) }
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,16 @@
1
+ module Rundown
2
+ module Processors
3
+ class Sentiment < Rundown::Processor
4
+ attr_accessor :parser
5
+
6
+ def initialize(words, parser=SentimentParser)
7
+ @parser = parser
8
+ super(words)
9
+ end
10
+
11
+ def process
12
+ parser.parse(words.join(' '))
13
+ end
14
+ end
15
+ end
16
+ end
@@ -1,3 +1,3 @@
1
1
  module Rundown
2
- VERSION = '0.0.1'
2
+ VERSION = '0.0.2'
3
3
  end
data/rundown.gemspec CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |spec|
8
8
  spec.version = Rundown::VERSION
9
9
  spec.authors = ["Jared Fraser"]
10
10
  spec.email = ["dev@jsf.io"]
11
- spec.description = %q{knows things}
12
- spec.summary = %q{knows things}
11
+ spec.description = %q{Extracts dates, phone numbers, sentiment and other items from naturally worded text.}
12
+ spec.summary = %q{Natural Language Processor}
13
13
  spec.homepage = ""
14
14
  spec.license = "MIT"
15
15
 
@@ -0,0 +1,18 @@
1
+ require 'spec_helper'
2
+
3
+ module Rundown
4
+ module Processors
5
+ describe Dates do
6
+ describe 'month' do
7
+ {
8
+ "I will see you on 12/15/2013" => Date.new(2013, 12, 15),
9
+ " the 28th of december." => Date.new(Time.now.year, 12, 28)
10
+ }.each do |input, expected|
11
+ it "parse '#{input}' to '#{expected}'" do
12
+ expect(Rundown::Processors::Dates.new(input).process.first.start_date.to_date).to eql(expected)
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ module Rundown
4
+ module Processors
5
+ describe Email do
6
+
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ module Rundown
4
+ module Processors
5
+ describe Phone do
6
+
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ require 'spec_helper'
2
+
3
+ module Rundown
4
+ module Processors
5
+ describe Sentiment do
6
+
7
+ end
8
+ end
9
+ end
data/spec/rundown_spec.rb CHANGED
@@ -5,24 +5,41 @@ describe Rundown do
5
5
  "I'm sorry, I'm extremely busy right now. I just looked at the clock, and it's 12:54 AM, I've still got a lot of work to do. Don't worry about the event tomorrow, it's been moved ahead a week, the 28th of december. Remember though, you've got to call to get a ticket soon, their # is 1-212-323-1239. Their website says it costs $23 per person.
6
6
  If you've got enough time, they have some more information on their website, http://theevent.com.
7
7
  Regards,
8
- David (david32@gmail.com)".split(/\s/)
8
+ David (david32@gmail.com)"
9
9
  }
10
10
 
11
11
  describe 'emails' do
12
- it { expect(Rundown::EmailProcessor.new(subject).process).to eql ["david32@gmail.com"]}
12
+ it { expect(Rundown::Processors::Email.new(subject).process).to eql ["david32@gmail.com"]}
13
13
  end
14
14
 
15
15
  describe 'dates' do
16
16
  # FIXME: remove dependence on start date knowledge
17
- it { expect(Rundown::DateProcessor.new(subject).process.map(&:start_date).map(&:date)).to include "20131228"}
17
+ it { expect(Rundown::Processors::Dates.new(subject).process.map(&:start_date).map(&:date)).to include "20131228"}
18
18
  end
19
19
 
20
20
  describe 'phones' do
21
- it { pending; expect(Rundown::PhoneProcessor.new(subject).process).to eql ["212-323-1239"]}
21
+ it { pending; expect(Rundown::Processors::Phone.new(subject).process).to eql ["212-323-1239"]}
22
22
  end
23
23
 
24
24
  describe 'sentiment' do
25
- it { expect(Rundown::SentimentProcessor.new(subject).process.floor).to eql(2) }
25
+ it { expect(Rundown::Processors::Sentiment.new(subject).process.floor).to eql(2) }
26
+ end
27
+
28
+ context 'basic string' do
29
+ subject { Rundown.parse("I'll see you on the 18th, give me a ring on 07912 345 678. - Jerertt, me@example.com") }
30
+
31
+ it 'extracts emails' do
32
+ expect(subject.emails).to eql(["me@example.com"])
33
+ end
34
+
35
+ it 'extracts phone numbers' do
36
+ pending
37
+ expect(subject.phones).to eql(["07912 345 678"])
38
+ end
39
+
40
+ it 'extracts sentiment' do
41
+ expect(subject.sentiment).to eql(0.791666666667)
42
+ end
26
43
  end
27
44
 
28
45
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rundown
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -107,7 +107,8 @@ dependencies:
107
107
  - - '>='
108
108
  - !ruby/object:Gem::Version
109
109
  version: '0'
110
- description: knows things
110
+ description: Extracts dates, phone numbers, sentiment and other items from naturally
111
+ worded text.
111
112
  email:
112
113
  - dev@jsf.io
113
114
  executables: []
@@ -121,14 +122,19 @@ files:
121
122
  - README.md
122
123
  - Rakefile
123
124
  - lib/rundown.rb
124
- - lib/rundown/date_processor.rb
125
- - lib/rundown/email_processor.rb
126
- - lib/rundown/phone_processor.rb
125
+ - lib/rundown/parser.rb
127
126
  - lib/rundown/processor.rb
128
- - lib/rundown/sentiment_processor.rb
127
+ - lib/rundown/processors/dates.rb
128
+ - lib/rundown/processors/email.rb
129
+ - lib/rundown/processors/phone.rb
130
+ - lib/rundown/processors/sentiment.rb
129
131
  - lib/rundown/version.rb
130
132
  - lib/rundown/words.rb
131
133
  - rundown.gemspec
134
+ - spec/processors/dates_spec.rb
135
+ - spec/processors/email_spec.rb
136
+ - spec/processors/phone_spec.rb
137
+ - spec/processors/sentiment_spec.rb
132
138
  - spec/rundown_spec.rb
133
139
  - spec/spec_helper.rb
134
140
  homepage: ''
@@ -146,7 +152,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
146
152
  version: '0'
147
153
  segments:
148
154
  - 0
149
- hash: -2506835466625535810
155
+ hash: -3027207295298503492
150
156
  required_rubygems_version: !ruby/object:Gem::Requirement
151
157
  none: false
152
158
  requirements:
@@ -155,13 +161,17 @@ required_rubygems_version: !ruby/object:Gem::Requirement
155
161
  version: '0'
156
162
  segments:
157
163
  - 0
158
- hash: -2506835466625535810
164
+ hash: -3027207295298503492
159
165
  requirements: []
160
166
  rubyforge_project:
161
167
  rubygems_version: 1.8.25
162
168
  signing_key:
163
169
  specification_version: 3
164
- summary: knows things
170
+ summary: Natural Language Processor
165
171
  test_files:
172
+ - spec/processors/dates_spec.rb
173
+ - spec/processors/email_spec.rb
174
+ - spec/processors/phone_spec.rb
175
+ - spec/processors/sentiment_spec.rb
166
176
  - spec/rundown_spec.rb
167
177
  - spec/spec_helper.rb
@@ -1,15 +0,0 @@
1
- module Rundown
2
- class DateProcessor < Processor
3
- attr_accessor :text, :parser
4
-
5
- def initialize(words, parser=Nickel)
6
- @text = words.join(' ')
7
- @parser = parser
8
- end
9
-
10
- def process
11
- parser.parse(text).occurrences
12
- end
13
-
14
- end
15
- end
@@ -1,27 +0,0 @@
1
- module Rundown
2
- class EmailProcessor < Processor
3
- REGEX = /\A[^@\s]+@([^@\s]+\.)+[^@\s]+\z/
4
-
5
- def cleanup_words
6
- words = @words.map { |word|
7
- word.gsub!(/\(|\)/, "")
8
- }
9
- end
10
-
11
- def process
12
- cleanup_words
13
- words.select { |word|
14
- Array(word.match(REGEX))[0]
15
- }.reject(&:empty?).map {|word|
16
- word.split('@')
17
- }.reject { |words|
18
- words.size < 2
19
- }.select { |words|
20
- x = Array(words.last).last.to_s.split('.').last
21
- x.length <= 4 && !x.match(/\d+/)
22
- }.map { |words|
23
- words.join("@")
24
- }
25
- end
26
- end
27
- end
@@ -1,18 +0,0 @@
1
- module Rundown
2
- class PhoneProcessor < Processor
3
- attr_accessor :validator, :text
4
-
5
- def initialize(words, validator=Phony)
6
- @text = words.join(' ')
7
- @validator = validator
8
- end
9
-
10
- def plausible?(number)
11
- validator.plausible?(number)
12
- end
13
-
14
- def process
15
- text.scan(/(\+?(\(|\)|[0-9]|\s|-|\.){4,20})/).select {|e| plausible?(e) }
16
- end
17
- end
18
- end
@@ -1,14 +0,0 @@
1
- module Rundown
2
- class SentimentProcessor < Processor
3
- attr_accessor :parser
4
-
5
- def initialize(words, parser=SentimentParser)
6
- @parser = parser
7
- super(words)
8
- end
9
-
10
- def process
11
- parser.parse(words.join(' '))
12
- end
13
- end
14
- end