rundown 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -0
- data/README.md +41 -1
- data/lib/rundown.rb +9 -5
- data/lib/rundown/parser.rb +34 -0
- data/lib/rundown/processor.rb +1 -1
- data/lib/rundown/processors/dates.rb +17 -0
- data/lib/rundown/processors/email.rb +29 -0
- data/lib/rundown/processors/phone.rb +20 -0
- data/lib/rundown/processors/sentiment.rb +16 -0
- data/lib/rundown/version.rb +1 -1
- data/rundown.gemspec +2 -2
- data/spec/processors/dates_spec.rb +18 -0
- data/spec/processors/email_spec.rb +9 -0
- data/spec/processors/phone_spec.rb +9 -0
- data/spec/processors/sentiment_spec.rb +9 -0
- data/spec/rundown_spec.rb +22 -5
- metadata +19 -9
- data/lib/rundown/date_processor.rb +0 -15
- data/lib/rundown/email_processor.rb +0 -27
- data/lib/rundown/phone_processor.rb +0 -18
- data/lib/rundown/sentiment_processor.rb +0 -14
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,47 @@
|
|
1
1
|
Rundown
|
2
2
|
=======
|
3
3
|
|
4
|
-
[](https://travis-ci.org/modsognir/rundown)
|
4
|
+
[](http://badge.fury.io/rb/rundown) [](https://travis-ci.org/modsognir/rundown)
|
5
|
+
|
6
|
+
Rundown is a simple Natural Language Processor built with Ruby, inspired by [Knwl.js](https://github.com/loadfive/Knwl.js). Rundown scans through text, user data, or just about anything for likely data of interest, phone numbers, dates, locations, emails, times, as well as likelyhood of spam and overall emotion.
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Add this line to your application's Gemfile:
|
11
|
+
|
12
|
+
gem 'sentiment_parser'
|
13
|
+
|
14
|
+
And then execute:
|
15
|
+
|
16
|
+
$ bundle
|
17
|
+
|
18
|
+
Or install it yourself as:
|
19
|
+
|
20
|
+
$ gem install sentiment_parser
|
21
|
+
|
22
|
+
## Usage
|
23
|
+
|
24
|
+
rd = Rundown.parse("I'll see you on the 18th, give me a ring on 07912 345 678. - Jerertt, me@example.com")
|
25
|
+
|
26
|
+
rd.emails
|
27
|
+
=> ["me@example.com"]
|
28
|
+
|
29
|
+
rd.phones
|
30
|
+
=> ["07912 345 678"]
|
31
|
+
|
32
|
+
rd.sentiment
|
33
|
+
=> -0.5333
|
34
|
+
|
35
|
+
rd.dates
|
36
|
+
=> [#<Date: 2013-12-18>]
|
37
|
+
|
38
|
+
## Contributing
|
39
|
+
|
40
|
+
1. Fork it
|
41
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
42
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
43
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
44
|
+
5. Create new Pull Request
|
5
45
|
|
6
46
|
###Known Issues
|
7
47
|
|
data/lib/rundown.rb
CHANGED
@@ -4,13 +4,17 @@ require 'phony'
|
|
4
4
|
require 'nickel'
|
5
5
|
require 'sentiment_parser'
|
6
6
|
|
7
|
+
require 'rundown/parser'
|
7
8
|
require 'rundown/processor'
|
8
|
-
require 'rundown/
|
9
|
-
require 'rundown/
|
10
|
-
require 'rundown/
|
11
|
-
require 'rundown/
|
12
|
-
require 'rundown/sentiment_processor'
|
9
|
+
require 'rundown/processors/email'
|
10
|
+
require 'rundown/processors/dates'
|
11
|
+
require 'rundown/processors/phone'
|
12
|
+
require 'rundown/processors/sentiment'
|
13
13
|
|
14
14
|
module Rundown
|
15
15
|
module_function
|
16
|
+
|
17
|
+
def parse(text)
|
18
|
+
Parser.new(text)
|
19
|
+
end
|
16
20
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Rundown
|
2
|
+
class Parser
|
3
|
+
attr_accessor :text
|
4
|
+
|
5
|
+
def initialize(text)
|
6
|
+
@text = text
|
7
|
+
end
|
8
|
+
|
9
|
+
# def processors
|
10
|
+
# [
|
11
|
+
# ,
|
12
|
+
# Processors::Email,
|
13
|
+
# Processors::Phone,
|
14
|
+
# Processors::Sentiment
|
15
|
+
# ]
|
16
|
+
# end
|
17
|
+
|
18
|
+
def dates
|
19
|
+
@dates ||= Processors::Dates.new(text).process
|
20
|
+
end
|
21
|
+
|
22
|
+
def emails
|
23
|
+
@email ||= Processors::Email.new(text).process
|
24
|
+
end
|
25
|
+
|
26
|
+
def phones
|
27
|
+
@phone ||= Processors::Phone.new(text).process
|
28
|
+
end
|
29
|
+
|
30
|
+
def sentiment
|
31
|
+
@sentiment ||= Processors::Sentiment.new(text).process
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/rundown/processor.rb
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
module Rundown
|
2
|
+
module Processors
|
3
|
+
class Dates < Rundown::Processor
|
4
|
+
attr_accessor :text, :parser
|
5
|
+
|
6
|
+
def initialize(words, parser=Nickel)
|
7
|
+
@text = words
|
8
|
+
@parser = parser
|
9
|
+
end
|
10
|
+
|
11
|
+
def process
|
12
|
+
parser.parse(text).occurrences
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Rundown
|
2
|
+
module Processors
|
3
|
+
class Email < Rundown::Processor
|
4
|
+
REGEX = /\A[^@\s]+@([^@\s]+\.)+[^@\s]+\z/
|
5
|
+
|
6
|
+
def cleanup_words
|
7
|
+
words = @words.map { |word|
|
8
|
+
word.gsub!(/\(|\)/, "")
|
9
|
+
}
|
10
|
+
end
|
11
|
+
|
12
|
+
def process
|
13
|
+
cleanup_words
|
14
|
+
words.select { |word|
|
15
|
+
Array(word.match(REGEX))[0]
|
16
|
+
}.reject(&:empty?).map {|word|
|
17
|
+
word.split('@')
|
18
|
+
}.reject { |words|
|
19
|
+
words.size < 2
|
20
|
+
}.select { |words|
|
21
|
+
x = Array(words.last).last.to_s.split('.').last
|
22
|
+
x.length <= 4 && !x.match(/\d+/)
|
23
|
+
}.map { |words|
|
24
|
+
words.join("@")
|
25
|
+
}
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Rundown
|
2
|
+
module Processors
|
3
|
+
class Phone < Rundown::Processor
|
4
|
+
attr_accessor :validator, :text
|
5
|
+
|
6
|
+
def initialize(words, validator=Phony)
|
7
|
+
@text = words
|
8
|
+
@validator = validator
|
9
|
+
end
|
10
|
+
|
11
|
+
def plausible?(number)
|
12
|
+
validator.plausible?(number)
|
13
|
+
end
|
14
|
+
|
15
|
+
def process
|
16
|
+
text.scan(/(\+?(\(|\)|[0-9]|\s|-|\.){4,20})/).select {|e| plausible?(e) }
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Rundown
|
2
|
+
module Processors
|
3
|
+
class Sentiment < Rundown::Processor
|
4
|
+
attr_accessor :parser
|
5
|
+
|
6
|
+
def initialize(words, parser=SentimentParser)
|
7
|
+
@parser = parser
|
8
|
+
super(words)
|
9
|
+
end
|
10
|
+
|
11
|
+
def process
|
12
|
+
parser.parse(words.join(' '))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
data/lib/rundown/version.rb
CHANGED
data/rundown.gemspec
CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |spec|
|
|
8
8
|
spec.version = Rundown::VERSION
|
9
9
|
spec.authors = ["Jared Fraser"]
|
10
10
|
spec.email = ["dev@jsf.io"]
|
11
|
-
spec.description = %q{
|
12
|
-
spec.summary = %q{
|
11
|
+
spec.description = %q{Extracts dates, phone numbers, sentiment and other items from naturally worded text.}
|
12
|
+
spec.summary = %q{Natural Language Processor}
|
13
13
|
spec.homepage = ""
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
module Rundown
|
4
|
+
module Processors
|
5
|
+
describe Dates do
|
6
|
+
describe 'month' do
|
7
|
+
{
|
8
|
+
"I will see you on 12/15/2013" => Date.new(2013, 12, 15),
|
9
|
+
" the 28th of december." => Date.new(Time.now.year, 12, 28)
|
10
|
+
}.each do |input, expected|
|
11
|
+
it "parse '#{input}' to '#{expected}'" do
|
12
|
+
expect(Rundown::Processors::Dates.new(input).process.first.start_date.to_date).to eql(expected)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/spec/rundown_spec.rb
CHANGED
@@ -5,24 +5,41 @@ describe Rundown do
|
|
5
5
|
"I'm sorry, I'm extremely busy right now. I just looked at the clock, and it's 12:54 AM, I've still got a lot of work to do. Don't worry about the event tomorrow, it's been moved ahead a week, the 28th of december. Remember though, you've got to call to get a ticket soon, their # is 1-212-323-1239. Their website says it costs $23 per person.
|
6
6
|
If you've got enough time, they have some more information on their website, http://theevent.com.
|
7
7
|
Regards,
|
8
|
-
David (david32@gmail.com)"
|
8
|
+
David (david32@gmail.com)"
|
9
9
|
}
|
10
10
|
|
11
11
|
describe 'emails' do
|
12
|
-
it { expect(Rundown::
|
12
|
+
it { expect(Rundown::Processors::Email.new(subject).process).to eql ["david32@gmail.com"]}
|
13
13
|
end
|
14
14
|
|
15
15
|
describe 'dates' do
|
16
16
|
# FIXME: remove dependence on start date knowledge
|
17
|
-
it { expect(Rundown::
|
17
|
+
it { expect(Rundown::Processors::Dates.new(subject).process.map(&:start_date).map(&:date)).to include "20131228"}
|
18
18
|
end
|
19
19
|
|
20
20
|
describe 'phones' do
|
21
|
-
it { pending; expect(Rundown::
|
21
|
+
it { pending; expect(Rundown::Processors::Phone.new(subject).process).to eql ["212-323-1239"]}
|
22
22
|
end
|
23
23
|
|
24
24
|
describe 'sentiment' do
|
25
|
-
it { expect(Rundown::
|
25
|
+
it { expect(Rundown::Processors::Sentiment.new(subject).process.floor).to eql(2) }
|
26
|
+
end
|
27
|
+
|
28
|
+
context 'basic string' do
|
29
|
+
subject { Rundown.parse("I'll see you on the 18th, give me a ring on 07912 345 678. - Jerertt, me@example.com") }
|
30
|
+
|
31
|
+
it 'extracts emails' do
|
32
|
+
expect(subject.emails).to eql(["me@example.com"])
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'extracts phone numbers' do
|
36
|
+
pending
|
37
|
+
expect(subject.phones).to eql(["07912 345 678"])
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'extracts sentiment' do
|
41
|
+
expect(subject.sentiment).to eql(0.791666666667)
|
42
|
+
end
|
26
43
|
end
|
27
44
|
|
28
45
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rundown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -107,7 +107,8 @@ dependencies:
|
|
107
107
|
- - '>='
|
108
108
|
- !ruby/object:Gem::Version
|
109
109
|
version: '0'
|
110
|
-
description:
|
110
|
+
description: Extracts dates, phone numbers, sentiment and other items from naturally
|
111
|
+
worded text.
|
111
112
|
email:
|
112
113
|
- dev@jsf.io
|
113
114
|
executables: []
|
@@ -121,14 +122,19 @@ files:
|
|
121
122
|
- README.md
|
122
123
|
- Rakefile
|
123
124
|
- lib/rundown.rb
|
124
|
-
- lib/rundown/
|
125
|
-
- lib/rundown/email_processor.rb
|
126
|
-
- lib/rundown/phone_processor.rb
|
125
|
+
- lib/rundown/parser.rb
|
127
126
|
- lib/rundown/processor.rb
|
128
|
-
- lib/rundown/
|
127
|
+
- lib/rundown/processors/dates.rb
|
128
|
+
- lib/rundown/processors/email.rb
|
129
|
+
- lib/rundown/processors/phone.rb
|
130
|
+
- lib/rundown/processors/sentiment.rb
|
129
131
|
- lib/rundown/version.rb
|
130
132
|
- lib/rundown/words.rb
|
131
133
|
- rundown.gemspec
|
134
|
+
- spec/processors/dates_spec.rb
|
135
|
+
- spec/processors/email_spec.rb
|
136
|
+
- spec/processors/phone_spec.rb
|
137
|
+
- spec/processors/sentiment_spec.rb
|
132
138
|
- spec/rundown_spec.rb
|
133
139
|
- spec/spec_helper.rb
|
134
140
|
homepage: ''
|
@@ -146,7 +152,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
146
152
|
version: '0'
|
147
153
|
segments:
|
148
154
|
- 0
|
149
|
-
hash: -
|
155
|
+
hash: -3027207295298503492
|
150
156
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
151
157
|
none: false
|
152
158
|
requirements:
|
@@ -155,13 +161,17 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
155
161
|
version: '0'
|
156
162
|
segments:
|
157
163
|
- 0
|
158
|
-
hash: -
|
164
|
+
hash: -3027207295298503492
|
159
165
|
requirements: []
|
160
166
|
rubyforge_project:
|
161
167
|
rubygems_version: 1.8.25
|
162
168
|
signing_key:
|
163
169
|
specification_version: 3
|
164
|
-
summary:
|
170
|
+
summary: Natural Language Processor
|
165
171
|
test_files:
|
172
|
+
- spec/processors/dates_spec.rb
|
173
|
+
- spec/processors/email_spec.rb
|
174
|
+
- spec/processors/phone_spec.rb
|
175
|
+
- spec/processors/sentiment_spec.rb
|
166
176
|
- spec/rundown_spec.rb
|
167
177
|
- spec/spec_helper.rb
|
@@ -1,27 +0,0 @@
|
|
1
|
-
module Rundown
|
2
|
-
class EmailProcessor < Processor
|
3
|
-
REGEX = /\A[^@\s]+@([^@\s]+\.)+[^@\s]+\z/
|
4
|
-
|
5
|
-
def cleanup_words
|
6
|
-
words = @words.map { |word|
|
7
|
-
word.gsub!(/\(|\)/, "")
|
8
|
-
}
|
9
|
-
end
|
10
|
-
|
11
|
-
def process
|
12
|
-
cleanup_words
|
13
|
-
words.select { |word|
|
14
|
-
Array(word.match(REGEX))[0]
|
15
|
-
}.reject(&:empty?).map {|word|
|
16
|
-
word.split('@')
|
17
|
-
}.reject { |words|
|
18
|
-
words.size < 2
|
19
|
-
}.select { |words|
|
20
|
-
x = Array(words.last).last.to_s.split('.').last
|
21
|
-
x.length <= 4 && !x.match(/\d+/)
|
22
|
-
}.map { |words|
|
23
|
-
words.join("@")
|
24
|
-
}
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
module Rundown
|
2
|
-
class PhoneProcessor < Processor
|
3
|
-
attr_accessor :validator, :text
|
4
|
-
|
5
|
-
def initialize(words, validator=Phony)
|
6
|
-
@text = words.join(' ')
|
7
|
-
@validator = validator
|
8
|
-
end
|
9
|
-
|
10
|
-
def plausible?(number)
|
11
|
-
validator.plausible?(number)
|
12
|
-
end
|
13
|
-
|
14
|
-
def process
|
15
|
-
text.scan(/(\+?(\(|\)|[0-9]|\s|-|\.){4,20})/).select {|e| plausible?(e) }
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|