rundown 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/README.md +41 -1
- data/lib/rundown.rb +9 -5
- data/lib/rundown/parser.rb +34 -0
- data/lib/rundown/processor.rb +1 -1
- data/lib/rundown/processors/dates.rb +17 -0
- data/lib/rundown/processors/email.rb +29 -0
- data/lib/rundown/processors/phone.rb +20 -0
- data/lib/rundown/processors/sentiment.rb +16 -0
- data/lib/rundown/version.rb +1 -1
- data/rundown.gemspec +2 -2
- data/spec/processors/dates_spec.rb +18 -0
- data/spec/processors/email_spec.rb +9 -0
- data/spec/processors/phone_spec.rb +9 -0
- data/spec/processors/sentiment_spec.rb +9 -0
- data/spec/rundown_spec.rb +22 -5
- metadata +19 -9
- data/lib/rundown/date_processor.rb +0 -15
- data/lib/rundown/email_processor.rb +0 -27
- data/lib/rundown/phone_processor.rb +0 -18
- data/lib/rundown/sentiment_processor.rb +0 -14
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,47 @@
|
|
1
1
|
Rundown
|
2
2
|
=======
|
3
3
|
|
4
|
-
[![Build Status](https://travis-ci.org/modsognir/rundown.png)](https://travis-ci.org/modsognir/rundown)
|
4
|
+
[![Gem Version](https://badge.fury.io/rb/rundown.png)](http://badge.fury.io/rb/rundown) [![Build Status](https://travis-ci.org/modsognir/rundown.png)](https://travis-ci.org/modsognir/rundown)
|
5
|
+
|
6
|
+
Rundown is a simple Natural Language Processor built with Ruby, inspired by [Knwl.js](https://github.com/loadfive/Knwl.js). Rundown scans through text, user data, or just about anything for likely data of interest, phone numbers, dates, locations, emails, times, as well as likelyhood of spam and overall emotion.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Add this line to your application's Gemfile:
|
11
|
+
|
12
|
+
gem 'sentiment_parser'
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install sentiment_parser
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
rd = Rundown.parse("I'll see you on the 18th, give me a ring on 07912 345 678. - Jerertt, me@example.com")
|
25
|
+
|
26
|
+
rd.emails
|
27
|
+
=> ["me@example.com"]
|
28
|
+
|
29
|
+
rd.phones
|
30
|
+
=> ["07912 345 678"]
|
31
|
+
|
32
|
+
rd.sentiment
|
33
|
+
=> -0.5333
|
34
|
+
|
35
|
+
rd.dates
|
36
|
+
=> [#<Date: 2013-12-18>]
|
37
|
+
|
38
|
+
## Contributing
|
39
|
+
|
40
|
+
1. Fork it
|
41
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
42
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
43
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
44
|
+
5. Create new Pull Request
|
5
45
|
|
6
46
|
###Known Issues
|
7
47
|
|
data/lib/rundown.rb
CHANGED
@@ -4,13 +4,17 @@ require 'phony'
|
|
4
4
|
require 'nickel'
|
5
5
|
require 'sentiment_parser'
|
6
6
|
|
7
|
+
require 'rundown/parser'
|
7
8
|
require 'rundown/processor'
|
8
|
-
require 'rundown/
|
9
|
-
require 'rundown/
|
10
|
-
require 'rundown/
|
11
|
-
require 'rundown/
|
12
|
-
require 'rundown/sentiment_processor'
|
9
|
+
require 'rundown/processors/email'
|
10
|
+
require 'rundown/processors/dates'
|
11
|
+
require 'rundown/processors/phone'
|
12
|
+
require 'rundown/processors/sentiment'
|
13
13
|
|
14
14
|
module Rundown
|
15
15
|
module_function
|
16
|
+
|
17
|
+
def parse(text)
|
18
|
+
Parser.new(text)
|
19
|
+
end
|
16
20
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Rundown
|
2
|
+
class Parser
|
3
|
+
attr_accessor :text
|
4
|
+
|
5
|
+
def initialize(text)
|
6
|
+
@text = text
|
7
|
+
end
|
8
|
+
|
9
|
+
# def processors
|
10
|
+
# [
|
11
|
+
# ,
|
12
|
+
# Processors::Email,
|
13
|
+
# Processors::Phone,
|
14
|
+
# Processors::Sentiment
|
15
|
+
# ]
|
16
|
+
# end
|
17
|
+
|
18
|
+
def dates
|
19
|
+
@dates ||= Processors::Dates.new(text).process
|
20
|
+
end
|
21
|
+
|
22
|
+
def emails
|
23
|
+
@email ||= Processors::Email.new(text).process
|
24
|
+
end
|
25
|
+
|
26
|
+
def phones
|
27
|
+
@phone ||= Processors::Phone.new(text).process
|
28
|
+
end
|
29
|
+
|
30
|
+
def sentiment
|
31
|
+
@sentiment ||= Processors::Sentiment.new(text).process
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/rundown/processor.rb
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
module Rundown
|
2
|
+
module Processors
|
3
|
+
class Dates < Rundown::Processor
|
4
|
+
attr_accessor :text, :parser
|
5
|
+
|
6
|
+
def initialize(words, parser=Nickel)
|
7
|
+
@text = words
|
8
|
+
@parser = parser
|
9
|
+
end
|
10
|
+
|
11
|
+
def process
|
12
|
+
parser.parse(text).occurrences
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Rundown
|
2
|
+
module Processors
|
3
|
+
class Email < Rundown::Processor
|
4
|
+
REGEX = /\A[^@\s]+@([^@\s]+\.)+[^@\s]+\z/
|
5
|
+
|
6
|
+
def cleanup_words
|
7
|
+
words = @words.map { |word|
|
8
|
+
word.gsub!(/\(|\)/, "")
|
9
|
+
}
|
10
|
+
end
|
11
|
+
|
12
|
+
def process
|
13
|
+
cleanup_words
|
14
|
+
words.select { |word|
|
15
|
+
Array(word.match(REGEX))[0]
|
16
|
+
}.reject(&:empty?).map {|word|
|
17
|
+
word.split('@')
|
18
|
+
}.reject { |words|
|
19
|
+
words.size < 2
|
20
|
+
}.select { |words|
|
21
|
+
x = Array(words.last).last.to_s.split('.').last
|
22
|
+
x.length <= 4 && !x.match(/\d+/)
|
23
|
+
}.map { |words|
|
24
|
+
words.join("@")
|
25
|
+
}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Rundown
|
2
|
+
module Processors
|
3
|
+
class Phone < Rundown::Processor
|
4
|
+
attr_accessor :validator, :text
|
5
|
+
|
6
|
+
def initialize(words, validator=Phony)
|
7
|
+
@text = words
|
8
|
+
@validator = validator
|
9
|
+
end
|
10
|
+
|
11
|
+
def plausible?(number)
|
12
|
+
validator.plausible?(number)
|
13
|
+
end
|
14
|
+
|
15
|
+
def process
|
16
|
+
text.scan(/(\+?(\(|\)|[0-9]|\s|-|\.){4,20})/).select {|e| plausible?(e) }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Rundown
|
2
|
+
module Processors
|
3
|
+
class Sentiment < Rundown::Processor
|
4
|
+
attr_accessor :parser
|
5
|
+
|
6
|
+
def initialize(words, parser=SentimentParser)
|
7
|
+
@parser = parser
|
8
|
+
super(words)
|
9
|
+
end
|
10
|
+
|
11
|
+
def process
|
12
|
+
parser.parse(words.join(' '))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/rundown/version.rb
CHANGED
data/rundown.gemspec
CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.version = Rundown::VERSION
|
9
9
|
spec.authors = ["Jared Fraser"]
|
10
10
|
spec.email = ["dev@jsf.io"]
|
11
|
-
spec.description = %q{
|
12
|
-
spec.summary = %q{
|
11
|
+
spec.description = %q{Extracts dates, phone numbers, sentiment and other items from naturally worded text.}
|
12
|
+
spec.summary = %q{Natural Language Processor}
|
13
13
|
spec.homepage = ""
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Rundown
|
4
|
+
module Processors
|
5
|
+
describe Dates do
|
6
|
+
describe 'month' do
|
7
|
+
{
|
8
|
+
"I will see you on 12/15/2013" => Date.new(2013, 12, 15),
|
9
|
+
" the 28th of december." => Date.new(Time.now.year, 12, 28)
|
10
|
+
}.each do |input, expected|
|
11
|
+
it "parse '#{input}' to '#{expected}'" do
|
12
|
+
expect(Rundown::Processors::Dates.new(input).process.first.start_date.to_date).to eql(expected)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/spec/rundown_spec.rb
CHANGED
@@ -5,24 +5,41 @@ describe Rundown do
|
|
5
5
|
"I'm sorry, I'm extremely busy right now. I just looked at the clock, and it's 12:54 AM, I've still got a lot of work to do. Don't worry about the event tomorrow, it's been moved ahead a week, the 28th of december. Remember though, you've got to call to get a ticket soon, their # is 1-212-323-1239. Their website says it costs $23 per person.
|
6
6
|
If you've got enough time, they have some more information on their website, http://theevent.com.
|
7
7
|
Regards,
|
8
|
-
David (david32@gmail.com)"
|
8
|
+
David (david32@gmail.com)"
|
9
9
|
}
|
10
10
|
|
11
11
|
describe 'emails' do
|
12
|
-
it { expect(Rundown::
|
12
|
+
it { expect(Rundown::Processors::Email.new(subject).process).to eql ["david32@gmail.com"]}
|
13
13
|
end
|
14
14
|
|
15
15
|
describe 'dates' do
|
16
16
|
# FIXME: remove dependence on start date knowledge
|
17
|
-
it { expect(Rundown::
|
17
|
+
it { expect(Rundown::Processors::Dates.new(subject).process.map(&:start_date).map(&:date)).to include "20131228"}
|
18
18
|
end
|
19
19
|
|
20
20
|
describe 'phones' do
|
21
|
-
it { pending; expect(Rundown::
|
21
|
+
it { pending; expect(Rundown::Processors::Phone.new(subject).process).to eql ["212-323-1239"]}
|
22
22
|
end
|
23
23
|
|
24
24
|
describe 'sentiment' do
|
25
|
-
it { expect(Rundown::
|
25
|
+
it { expect(Rundown::Processors::Sentiment.new(subject).process.floor).to eql(2) }
|
26
|
+
end
|
27
|
+
|
28
|
+
context 'basic string' do
|
29
|
+
subject { Rundown.parse("I'll see you on the 18th, give me a ring on 07912 345 678. - Jerertt, me@example.com") }
|
30
|
+
|
31
|
+
it 'extracts emails' do
|
32
|
+
expect(subject.emails).to eql(["me@example.com"])
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'extracts phone numbers' do
|
36
|
+
pending
|
37
|
+
expect(subject.phones).to eql(["07912 345 678"])
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'extracts sentiment' do
|
41
|
+
expect(subject.sentiment).to eql(0.791666666667)
|
42
|
+
end
|
26
43
|
end
|
27
44
|
|
28
45
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rundown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -107,7 +107,8 @@ dependencies:
|
|
107
107
|
- - '>='
|
108
108
|
- !ruby/object:Gem::Version
|
109
109
|
version: '0'
|
110
|
-
description:
|
110
|
+
description: Extracts dates, phone numbers, sentiment and other items from naturally
|
111
|
+
worded text.
|
111
112
|
email:
|
112
113
|
- dev@jsf.io
|
113
114
|
executables: []
|
@@ -121,14 +122,19 @@ files:
|
|
121
122
|
- README.md
|
122
123
|
- Rakefile
|
123
124
|
- lib/rundown.rb
|
124
|
-
- lib/rundown/
|
125
|
-
- lib/rundown/email_processor.rb
|
126
|
-
- lib/rundown/phone_processor.rb
|
125
|
+
- lib/rundown/parser.rb
|
127
126
|
- lib/rundown/processor.rb
|
128
|
-
- lib/rundown/
|
127
|
+
- lib/rundown/processors/dates.rb
|
128
|
+
- lib/rundown/processors/email.rb
|
129
|
+
- lib/rundown/processors/phone.rb
|
130
|
+
- lib/rundown/processors/sentiment.rb
|
129
131
|
- lib/rundown/version.rb
|
130
132
|
- lib/rundown/words.rb
|
131
133
|
- rundown.gemspec
|
134
|
+
- spec/processors/dates_spec.rb
|
135
|
+
- spec/processors/email_spec.rb
|
136
|
+
- spec/processors/phone_spec.rb
|
137
|
+
- spec/processors/sentiment_spec.rb
|
132
138
|
- spec/rundown_spec.rb
|
133
139
|
- spec/spec_helper.rb
|
134
140
|
homepage: ''
|
@@ -146,7 +152,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
146
152
|
version: '0'
|
147
153
|
segments:
|
148
154
|
- 0
|
149
|
-
hash: -
|
155
|
+
hash: -3027207295298503492
|
150
156
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
157
|
none: false
|
152
158
|
requirements:
|
@@ -155,13 +161,17 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
155
161
|
version: '0'
|
156
162
|
segments:
|
157
163
|
- 0
|
158
|
-
hash: -
|
164
|
+
hash: -3027207295298503492
|
159
165
|
requirements: []
|
160
166
|
rubyforge_project:
|
161
167
|
rubygems_version: 1.8.25
|
162
168
|
signing_key:
|
163
169
|
specification_version: 3
|
164
|
-
summary:
|
170
|
+
summary: Natural Language Processor
|
165
171
|
test_files:
|
172
|
+
- spec/processors/dates_spec.rb
|
173
|
+
- spec/processors/email_spec.rb
|
174
|
+
- spec/processors/phone_spec.rb
|
175
|
+
- spec/processors/sentiment_spec.rb
|
166
176
|
- spec/rundown_spec.rb
|
167
177
|
- spec/spec_helper.rb
|
@@ -1,27 +0,0 @@
|
|
1
|
-
module Rundown
|
2
|
-
class EmailProcessor < Processor
|
3
|
-
REGEX = /\A[^@\s]+@([^@\s]+\.)+[^@\s]+\z/
|
4
|
-
|
5
|
-
def cleanup_words
|
6
|
-
words = @words.map { |word|
|
7
|
-
word.gsub!(/\(|\)/, "")
|
8
|
-
}
|
9
|
-
end
|
10
|
-
|
11
|
-
def process
|
12
|
-
cleanup_words
|
13
|
-
words.select { |word|
|
14
|
-
Array(word.match(REGEX))[0]
|
15
|
-
}.reject(&:empty?).map {|word|
|
16
|
-
word.split('@')
|
17
|
-
}.reject { |words|
|
18
|
-
words.size < 2
|
19
|
-
}.select { |words|
|
20
|
-
x = Array(words.last).last.to_s.split('.').last
|
21
|
-
x.length <= 4 && !x.match(/\d+/)
|
22
|
-
}.map { |words|
|
23
|
-
words.join("@")
|
24
|
-
}
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
module Rundown
|
2
|
-
class PhoneProcessor < Processor
|
3
|
-
attr_accessor :validator, :text
|
4
|
-
|
5
|
-
def initialize(words, validator=Phony)
|
6
|
-
@text = words.join(' ')
|
7
|
-
@validator = validator
|
8
|
-
end
|
9
|
-
|
10
|
-
def plausible?(number)
|
11
|
-
validator.plausible?(number)
|
12
|
-
end
|
13
|
-
|
14
|
-
def process
|
15
|
-
text.scan(/(\+?(\(|\)|[0-9]|\s|-|\.){4,20})/).select {|e| plausible?(e) }
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|