natural 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +17 -0
- data/Gemfile.lock +59 -0
- data/LICENSE.txt +20 -0
- data/README.markdown +165 -0
- data/Rakefile +55 -0
- data/VERSION +1 -0
- data/features/natural.feature +9 -0
- data/features/step_definitions/natural_steps.rb +0 -0
- data/features/support/env.rb +16 -0
- data/lib/natural/array.rb +22 -0
- data/lib/natural/fragment.rb +332 -0
- data/lib/natural/fragments/example.rb +38 -0
- data/lib/natural/fragments/misc.rb +30 -0
- data/lib/natural/fragments/timeframes.rb +57 -0
- data/lib/natural/inflections.rb +6 -0
- data/lib/natural/string.rb +5 -0
- data/lib/natural.rb +147 -0
- data/natural.gemspec +88 -0
- data/spec/array_spec.rb +7 -0
- data/spec/natural_spec.rb +7 -0
- data/spec/spec_helper.rb +15 -0
- data/spec/string_spec.rb +10 -0
- metadata +183 -0
data/Gemfile
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
source "http://rubygems.org"
|
2
|
+
# Add dependencies required to use your gem here.
|
3
|
+
gem 'rubytree' # store parse results in a tree
|
4
|
+
gem 'logger' # logging
|
5
|
+
gem 'map_by_method' # DRYs up map {|a| a.method}
|
6
|
+
gem 'activesupport' # singuralize, underscore, etc.
|
7
|
+
|
8
|
+
# Add dependencies to develop your gem here.
|
9
|
+
# Include everything needed to run rake, tests, features, etc.
|
10
|
+
group :development do
|
11
|
+
gem "rspec", "~> 2.8.0"
|
12
|
+
gem "rdoc", "~> 3.12"
|
13
|
+
gem "cucumber", ">= 0"
|
14
|
+
gem "bundler", "~> 1.0.0"
|
15
|
+
gem "jeweler", "~> 1.8.3"
|
16
|
+
gem "simplecov", ">= 0"
|
17
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
activesupport (3.2.1)
|
5
|
+
i18n (~> 0.6)
|
6
|
+
multi_json (~> 1.0)
|
7
|
+
builder (3.0.0)
|
8
|
+
cucumber (1.1.9)
|
9
|
+
builder (>= 2.1.2)
|
10
|
+
diff-lcs (>= 1.1.2)
|
11
|
+
gherkin (~> 2.9.0)
|
12
|
+
json (>= 1.4.6)
|
13
|
+
term-ansicolor (>= 1.0.6)
|
14
|
+
diff-lcs (1.1.3)
|
15
|
+
gherkin (2.9.0)
|
16
|
+
json (>= 1.4.6)
|
17
|
+
git (1.2.5)
|
18
|
+
i18n (0.6.0)
|
19
|
+
jeweler (1.8.3)
|
20
|
+
bundler (~> 1.0)
|
21
|
+
git (>= 1.2.5)
|
22
|
+
rake
|
23
|
+
rdoc
|
24
|
+
json (1.6.5)
|
25
|
+
logger (1.2.8)
|
26
|
+
map_by_method (0.8.3)
|
27
|
+
multi_json (1.1.0)
|
28
|
+
rake (0.9.2.2)
|
29
|
+
rdoc (3.12)
|
30
|
+
json (~> 1.4)
|
31
|
+
rspec (2.8.0)
|
32
|
+
rspec-core (~> 2.8.0)
|
33
|
+
rspec-expectations (~> 2.8.0)
|
34
|
+
rspec-mocks (~> 2.8.0)
|
35
|
+
rspec-core (2.8.0)
|
36
|
+
rspec-expectations (2.8.0)
|
37
|
+
diff-lcs (~> 1.1.2)
|
38
|
+
rspec-mocks (2.8.0)
|
39
|
+
rubytree (0.8.2)
|
40
|
+
simplecov (0.6.1)
|
41
|
+
multi_json (~> 1.0)
|
42
|
+
simplecov-html (~> 0.5.3)
|
43
|
+
simplecov-html (0.5.3)
|
44
|
+
term-ansicolor (1.0.7)
|
45
|
+
|
46
|
+
PLATFORMS
|
47
|
+
ruby
|
48
|
+
|
49
|
+
DEPENDENCIES
|
50
|
+
activesupport
|
51
|
+
bundler (~> 1.0.0)
|
52
|
+
cucumber
|
53
|
+
jeweler (~> 1.8.3)
|
54
|
+
logger
|
55
|
+
map_by_method
|
56
|
+
rdoc (~> 3.12)
|
57
|
+
rspec (~> 2.8.0)
|
58
|
+
rubytree
|
59
|
+
simplecov
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2012 Scott Bonds
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
# Natural
|
2
|
+
|
3
|
+
Natural provides a framework for answering 'naturally' worded questions like 'how many books did I buy last month' or 'list my Facebook friends'.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
$ gem install natural
|
8
|
+
|
9
|
+
## Example
|
10
|
+
|
11
|
+
$ require 'natural'
|
12
|
+
$ Natural.new('how many days of the week start with the letter T').answer
|
13
|
+
=> 2
|
14
|
+
|
15
|
+
## Example Log
|
16
|
+
|
17
|
+
[n][perf] matching took 1.6 seconds
|
18
|
+
[n][perf] scoring took 0.1 seconds
|
19
|
+
[n]
|
20
|
+
[n][scor] 45 how many | days of the week | start with the letter t
|
21
|
+
[n][scor] 41 days of the week | start with the letter t
|
22
|
+
[n][scor] 31 how many | days | week | start with the letter t
|
23
|
+
[n][scor] 30 how many | days | start with the letter t
|
24
|
+
[n][scor] 30 how many | week | start with the letter t
|
25
|
+
[n][scor] 29 how many | start with the letter t
|
26
|
+
[n][scor] 27 days | week | start with the letter t
|
27
|
+
[n][scor] 26 week | start with the letter t
|
28
|
+
[n][scor] 26 days | start with the letter t
|
29
|
+
[n][scor] 25 start with the letter t
|
30
|
+
[n][scor] 20 how many | days of the week
|
31
|
+
[n][scor] 19 how many | days of the week | commence
|
32
|
+
[n][scor] 19 how many | days of the week | begin
|
33
|
+
[n][scor] 16 days of the week
|
34
|
+
[n][scor] 15 days of the week | begin
|
35
|
+
[n][scor] 15 days of the week | commence
|
36
|
+
[n][scor] 06 how many | days | week
|
37
|
+
[n][scor] 05 how many | week
|
38
|
+
[n][scor] 05 how many | days | week | begin
|
39
|
+
[n][scor] 05 how many | days
|
40
|
+
[n][scor] 05 how many | days | week | commence
|
41
|
+
[n][scor] 04 how many | week | commence
|
42
|
+
[n][scor] 04 how many | days | commence
|
43
|
+
[n][scor] 04 how many
|
44
|
+
[n][scor] 04 how many | days | begin
|
45
|
+
[n][scor] 04 how many | week | begin
|
46
|
+
[n][scor] 03 how many | begin
|
47
|
+
[n][scor] 03 how many | commence
|
48
|
+
[n][scor] 02 days | week
|
49
|
+
[n][scor] 01 days
|
50
|
+
[n][scor] 01 week
|
51
|
+
[n][scor] 01 days | week | begin
|
52
|
+
[n][scor] 01 days | week | commence
|
53
|
+
[n][scor] 00 week | commence
|
54
|
+
[n][scor] 00 days | begin
|
55
|
+
[n][scor] 00 week | begin
|
56
|
+
[n][scor] 00 days | commence
|
57
|
+
[n][scor] -1 commence
|
58
|
+
[n][scor] -1 begin
|
59
|
+
[n]
|
60
|
+
[n][tree] * how many days of the week start with the letter T fragment (0..10)
|
61
|
+
[n][tree] |---> how many count (0..1)
|
62
|
+
[n][tree] |---> days of the week day_names (2..5)
|
63
|
+
[n][tree] +---+ start with the letter t starts_with_letter (6..10)
|
64
|
+
[n][tree] |---> start with the letter fragment (6..9)
|
65
|
+
[n][tree] +---> t fragment (10)
|
66
|
+
[n]
|
67
|
+
[n][orig] how many days of the week start with the letter T
|
68
|
+
[n][used] how many days of the week start with the letter t
|
69
|
+
|
70
|
+
## Creating Your Vocabulary
|
71
|
+
|
72
|
+
* create a class that inherits from Natural::Fragment
|
73
|
+
* override class method 'find' to specify which phrases it should match
|
74
|
+
- return a hash of all matches found so far, keys are the match class, values are the matches for that class
|
75
|
+
* optional: override instance method 'data' to specify which data a match adds to the answer
|
76
|
+
* optional: override instance method 'filters' to specify which method to call on each data set in the answer to filter out results
|
77
|
+
* optional: override instance method 'aggregators' to specify which method to call on each data set to aggregate results
|
78
|
+
* optional: override instance method 'score' to specify the relative value of a match
|
79
|
+
|
80
|
+
This is a bit easier to understand by looking at an example, take a gander at: lib/natural/fragments/example.rb
|
81
|
+
|
82
|
+
### Simple Fragments
|
83
|
+
|
84
|
+
class Letter < Natural::Fragment
|
85
|
+
def self.find(options)
|
86
|
+
super options.merge(:looking_for => ('a'..'z').to_a)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
#### Plurals and Inflectors
|
91
|
+
|
92
|
+
Everything is singularized and downcased before being matched. lib/infections.rb can be used to customize the singularization behavior.
|
93
|
+
|
94
|
+
### Compound Fragments
|
95
|
+
|
96
|
+
class StartsWithLetter < Natural::Fragment
|
97
|
+
def self.find(options)
|
98
|
+
super options.merge(:looking_for => {:and => ['start with the letter', Letter]})
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
### Alternative Spellings, Synonyms, and Expansions
|
103
|
+
|
104
|
+
Natural.new('how many days of the wek beginn with the letter T').answer
|
105
|
+
|
106
|
+
[n][tree] * how many days of the wek beginn with the letter T fragment (0..10)
|
107
|
+
[n][tree] |---> how many count (0..1)
|
108
|
+
[n][tree] |---+ days of the week day_names (2..5)
|
109
|
+
[n][tree] | |---> days of the fragment (2..4)
|
110
|
+
[n][tree] +---+ week spelling (5)
|
111
|
+
[n][tree] +---> wek fragment (5)
|
112
|
+
[n][tree] +---+ start with the letter t starts_with_letter (6..10)
|
113
|
+
[n][tree] |---+ start with the letter fragment (6..9)
|
114
|
+
[n][tree] | |---+ start synonym (6)
|
115
|
+
[n][tree] | +---+ begin spelling (6)
|
116
|
+
[n][tree] +---> beginn fragment (6)
|
117
|
+
[n][tree] | +---> with the letter fragment (7..9)
|
118
|
+
[n][tree] +---> t fragment (10)
|
119
|
+
[n]
|
120
|
+
[n][orig] how many days of the wek begin with the letter T
|
121
|
+
[n][used] how many days of the week start with the letter t
|
122
|
+
|
123
|
+
Natural.new('movies').answer
|
124
|
+
|
125
|
+
[n][tree] * movies fragment (0)
|
126
|
+
[n][tree] +---+ blu-rays blu_ray (0)
|
127
|
+
[n][tree] +---+ blu-rays expansion (0)
|
128
|
+
[n][tree] +---> movies fragment (0)
|
129
|
+
|
130
|
+
### Scoring
|
131
|
+
|
132
|
+
## Generating the Answer
|
133
|
+
|
134
|
+
### Sets
|
135
|
+
|
136
|
+
### Filters
|
137
|
+
|
138
|
+
### Aggregators
|
139
|
+
|
140
|
+
### Putting Them Together
|
141
|
+
|
142
|
+
conform to the Natural way and use the built in answer method or navigate the tree and assemble sets, filters, and aggregators any way you want
|
143
|
+
|
144
|
+
## Performance
|
145
|
+
|
146
|
+
Natural has not (yet) been optimized for cpu or memory usage. Natural works best with short questions and a small vocabulary.
|
147
|
+
|
148
|
+
## Testing
|
149
|
+
|
150
|
+
Yah. Need to write these. :p
|
151
|
+
|
152
|
+
## Contributing to Natural
|
153
|
+
|
154
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
155
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
156
|
+
* Fork the project.
|
157
|
+
* Start a feature/bugfix branch.
|
158
|
+
* Commit and push until you are happy with your contribution.
|
159
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
160
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
161
|
+
|
162
|
+
## Copyright
|
163
|
+
|
164
|
+
Copyright (c) 2012 Scott Bonds. See LICENSE.txt for
|
165
|
+
further details.
|
data/Rakefile
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
require 'rake'
|
13
|
+
|
14
|
+
require 'jeweler'
|
15
|
+
Jeweler::Tasks.new do |gem|
|
16
|
+
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
17
|
+
gem.name = "natural"
|
18
|
+
gem.homepage = "http://github.com/bonds/natural"
|
19
|
+
gem.license = "MIT"
|
20
|
+
gem.summary = "natural language parser"
|
21
|
+
gem.description = "Natural provides a framework for answering 'naturally' worded questions like 'how many books did I buy last month' or 'list my Facebook friends'."
|
22
|
+
gem.email = "scott@ggr.com"
|
23
|
+
gem.authors = ["Scott Bonds"]
|
24
|
+
# dependencies defined in Gemfile
|
25
|
+
end
|
26
|
+
Jeweler::RubygemsDotOrgTasks.new
|
27
|
+
|
28
|
+
require 'rspec/core'
|
29
|
+
require 'rspec/core/rake_task'
|
30
|
+
RSpec::Core::RakeTask.new(:spec) do |spec|
|
31
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
32
|
+
end
|
33
|
+
|
34
|
+
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
35
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
36
|
+
spec.rcov = true
|
37
|
+
end
|
38
|
+
|
39
|
+
require 'cucumber/rake/task'
|
40
|
+
Cucumber::Rake::Task.new(:features)
|
41
|
+
|
42
|
+
task :default do
|
43
|
+
Rake::Task[:spec].invoke
|
44
|
+
Rake::Task[:features].invoke
|
45
|
+
end
|
46
|
+
|
47
|
+
require 'rdoc/task'
|
48
|
+
Rake::RDocTask.new do |rdoc|
|
49
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
50
|
+
|
51
|
+
rdoc.rdoc_dir = 'rdoc'
|
52
|
+
rdoc.title = "natural #{version}"
|
53
|
+
rdoc.rdoc_files.include('README*')
|
54
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
55
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
File without changes
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'simplecov'
|
2
|
+
SimpleCov.start
|
3
|
+
|
4
|
+
require 'bundler'
|
5
|
+
begin
|
6
|
+
Bundler.setup(:default, :development)
|
7
|
+
rescue Bundler::BundlerError => e
|
8
|
+
$stderr.puts e.message
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
10
|
+
exit e.status_code
|
11
|
+
end
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../../lib')
|
14
|
+
require 'natural'
|
15
|
+
|
16
|
+
require 'rspec/expectations'
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# from http://snippets.dzone.com/posts/show/4677
|
2
|
+
class Array
|
3
|
+
def to_ranges
|
4
|
+
array = self.compact.uniq.sort
|
5
|
+
ranges = []
|
6
|
+
if !array.empty?
|
7
|
+
# Initialize the left and right endpoints of the range
|
8
|
+
left, right = self.first, nil
|
9
|
+
array.each do |obj|
|
10
|
+
# If the right endpoint is set and obj is not equal to right's successor
|
11
|
+
# then we need to create a range.
|
12
|
+
if right && obj != right.succ
|
13
|
+
ranges << Range.new(left,right)
|
14
|
+
left = obj
|
15
|
+
end
|
16
|
+
right = obj
|
17
|
+
end
|
18
|
+
ranges << Range.new(left,right)
|
19
|
+
end
|
20
|
+
ranges
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,332 @@
|
|
1
|
+
class Natural
|
2
|
+
require 'tree'
|
3
|
+
|
4
|
+
class Fragment < Tree::TreeNode
|
5
|
+
attr_accessor :text, :score, :filter, :aggregator
|
6
|
+
|
7
|
+
def initialize(options={})
|
8
|
+
@ids = options[:ids]
|
9
|
+
self.text = options[:text]
|
10
|
+
super("#{GREEN}#{options[:text]}#{CLEAR} #{self.class.to_s.split('::').last.underscore} (#{self.id_range})", options[:text])
|
11
|
+
end
|
12
|
+
|
13
|
+
# recurse to the leaves and print out the id range of the underyling words
|
14
|
+
def ids
|
15
|
+
self.is_leaf? ? [@ids].flatten : self.children.inject([]) {|result, item| result += item.ids}
|
16
|
+
end
|
17
|
+
|
18
|
+
def ids=(values)
|
19
|
+
@ids = values
|
20
|
+
end
|
21
|
+
|
22
|
+
def id_range
|
23
|
+
@ids.size > 1 ? @ids.first..@ids.last : @ids.first
|
24
|
+
end
|
25
|
+
|
26
|
+
def all_filters
|
27
|
+
if self.is_leaf?
|
28
|
+
self.filter
|
29
|
+
else
|
30
|
+
self.children.inject('') do |result, item|
|
31
|
+
result = [result, self.filter, item.all_filters].select{|a| !a.blank?}.uniq.join('.')
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# recurse to the leaves and print out all the words, applying all edits along the way
|
37
|
+
def to_s(options={})
|
38
|
+
if self.is_leaf?
|
39
|
+
location = self
|
40
|
+
if !options[:without_edits]
|
41
|
+
while !location.is_root? && location.parent.class < Natural::Alternative do
|
42
|
+
location = location.parent
|
43
|
+
end
|
44
|
+
end
|
45
|
+
location.text
|
46
|
+
else
|
47
|
+
self.children.inject('') {|result, item| (result += item.to_s + ' ')}.strip
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def pretty_to_s(level=0)
|
52
|
+
result = ''
|
53
|
+
|
54
|
+
if is_root? || level == 0
|
55
|
+
result += '*'
|
56
|
+
else
|
57
|
+
result += "|" unless parent.is_last_sibling?
|
58
|
+
result += (' ' * ((level - 1) * 4 + (parent.is_last_sibling? ? 0 : -1)))
|
59
|
+
result += is_last_sibling? ? '+' : '|'
|
60
|
+
result += '---'
|
61
|
+
result += has_children? ? '+' : '>'
|
62
|
+
end
|
63
|
+
|
64
|
+
result += " #{name}\n"
|
65
|
+
children.each {|child| result += child.pretty_to_s(level+1)}
|
66
|
+
|
67
|
+
result
|
68
|
+
end
|
69
|
+
|
70
|
+
def data(context=nil)
|
71
|
+
nil
|
72
|
+
end
|
73
|
+
|
74
|
+
def score
|
75
|
+
self.to_s.split(' ').size ** 2
|
76
|
+
end
|
77
|
+
|
78
|
+
def clone(height=nil)
|
79
|
+
result = self.class.new(:ids => self.ids, :text => self.text)
|
80
|
+
if !height || height > 0
|
81
|
+
self.children.each do |child|
|
82
|
+
result << child.clone(height ? height-1 : nil)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
result
|
86
|
+
end
|
87
|
+
|
88
|
+
def self.find(options)
|
89
|
+
text_to_search = options[:text]
|
90
|
+
looking_for = options[:looking_for]
|
91
|
+
old_matches = options[:matches] || {}
|
92
|
+
if options[:matches] && (options[:merge_results].class == NilClass || options[:merge_results])
|
93
|
+
new_matches = options[:matches]
|
94
|
+
else
|
95
|
+
new_matches = {}
|
96
|
+
end
|
97
|
+
match_class = options[:match_class] || self
|
98
|
+
words = text_to_search.split(' ')
|
99
|
+
|
100
|
+
case
|
101
|
+
when looking_for.class == String || (looking_for.class == Array && looking_for.all? {|a| a.class == String})
|
102
|
+
return old_matches if old_matches[match_class]
|
103
|
+
looking_for = [looking_for] if looking_for.class != Array
|
104
|
+
looking_for = looking_for.map{|a| a.singularize.downcase}
|
105
|
+
|
106
|
+
# look for the longest possible matches and work our way down to the short ones
|
107
|
+
0.upto(words.size-1) do |first|
|
108
|
+
(words.size-1).downto(first) do |last|
|
109
|
+
match = nil
|
110
|
+
selection = words[(first..last)].join(' ').strip.downcase
|
111
|
+
|
112
|
+
if looking_for.include?(selection.singularize.downcase)
|
113
|
+
match = match_class.new(:ids => (first..last).to_a, :text => selection)
|
114
|
+
end
|
115
|
+
|
116
|
+
# didn't find a simple match, try swapping some or all words for alternatives and try again
|
117
|
+
if !match && !(match_class < Natural::Alternative)
|
118
|
+
fragments = old_matches.select {|k,v| k < Natural::Alternative && !v.blank?}.values.flatten.select {|a| a.ids.first >= first && a.ids.last <= last}
|
119
|
+
|
120
|
+
# assemble a list of all the possible, non-overlapping swaps
|
121
|
+
combinations = (1..fragments.size).inject([]) do |memo, i|
|
122
|
+
fragments.combination(i).each do |combo|
|
123
|
+
if !combo.combination(2).any? {|a| (a[0].ids.first..a[0].ids.last).overlaps?(a[1].ids.first..a[1].ids.last)}
|
124
|
+
memo << combo
|
125
|
+
end
|
126
|
+
end
|
127
|
+
memo
|
128
|
+
end
|
129
|
+
|
130
|
+
combinations.each do |combo|
|
131
|
+
alternative_words = words.clone
|
132
|
+
alternative_fragments = []
|
133
|
+
|
134
|
+
combo.each do |fragment|
|
135
|
+
alternative_words.slice!(fragment.ids.first..fragment.ids.last)
|
136
|
+
alternative_words.insert(fragment.ids.first, fragment.to_s)
|
137
|
+
alternative_fragments << fragment
|
138
|
+
end
|
139
|
+
alternative_selection = alternative_words[(first..last)].join(' ').strip.downcase
|
140
|
+
|
141
|
+
if looking_for.include?(alternative_selection.singularize.downcase)
|
142
|
+
match = match_class.new(:ids => (first..last).to_a, :text => alternative_selection)
|
143
|
+
leftovers = ((first..last).to_a - combo.map {|a| a.ids}.flatten).to_ranges
|
144
|
+
leftovers.each do |range|
|
145
|
+
alternative_fragments << Fragment.new(:ids => range.to_a, :text => words[range].join(' '))
|
146
|
+
end
|
147
|
+
alternative_fragments.sort_by {|a| a.ids.first}.each {|a| match << a}
|
148
|
+
end
|
149
|
+
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
new_matches[match_class] = [] if !new_matches[match_class]
|
154
|
+
if match
|
155
|
+
if match_class < Natural::Alternative
|
156
|
+
new_matches = recurse_alternatives(match, options)
|
157
|
+
else
|
158
|
+
new_matches[match_class] << match
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
when looking_for.class <= Fragment
|
165
|
+
|
166
|
+
return old_matches if old_matches[looking_for]
|
167
|
+
new_matches = klass.find(:text => text_to_search, :matches => old_matches, :spellings => options[:spellings], :synonyms => options[:synonyms], :expansions => options[:expansions])
|
168
|
+
|
169
|
+
when (looking_for.class == Hash && looking_for[:or]) || looking_for.class == Array
|
170
|
+
|
171
|
+
looking_for.each do |term|
|
172
|
+
new_matches = Fragment.find(:text => text_to_search, :looking_for => term, :matches => old_matches, :match_class => match_class, :spellings => options[:spellings], :synonyms => options[:synonyms], :expansions => options[:expansions])
|
173
|
+
end
|
174
|
+
|
175
|
+
when looking_for.class == Hash && looking_for[:and] # look for a sequence of strings and/or fragments
|
176
|
+
looking_for = looking_for[:and]
|
177
|
+
# first we find the starting term
|
178
|
+
if looking_for.first.class == Class && looking_for.first <= Fragment
|
179
|
+
new_matches = looking_for.first.find(:text => text_to_search, :matches => old_matches, :spellings => options[:spellings], :synonyms => options[:synonyms], :expansions => options[:expansions])
|
180
|
+
starting_term_matches = old_matches[looking_for.first]
|
181
|
+
else
|
182
|
+
starting_term_matches = Fragment.find(options.merge(:looking_for => looking_for.first, :merge_results => false)).values.first
|
183
|
+
end
|
184
|
+
|
185
|
+
# look for the next string/fragment in the sequence
|
186
|
+
(starting_term_matches || []).each do |first_term|
|
187
|
+
fragments = [first_term]
|
188
|
+
looking_for[1..-1].each do |term|
|
189
|
+
if term.class == Class && term <= Fragment
|
190
|
+
new_matches = term.find(:text => text_to_search, :matches => old_matches, :spellings => options[:spellings], :synonyms => options[:synonyms], :expansions => options[:expansions]) if !old_matches[term]
|
191
|
+
new_matches[term].each do |match|
|
192
|
+
if match.ids.first == fragments.select {|a| a}.last.ids.last + 1
|
193
|
+
fragments << match
|
194
|
+
end
|
195
|
+
end
|
196
|
+
elsif [Array, Hash, String].include?(term.class)
|
197
|
+
term_updated = term.class == String ? [term] : term
|
198
|
+
(Fragment.find(:text => text_to_search, :looking_for => term_updated, :spellings => options[:spellings], :synonyms => options[:synonyms], :expansions => options[:expansions]).values.first || []).each do |match|
|
199
|
+
if match.ids.first == fragments.select {|a| a}.last.ids.last + 1
|
200
|
+
fragments << Fragment.new(:ids => match.ids, :text => match.to_s)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
else # turn nils into fragments
|
204
|
+
last_fragment = fragments.select {|a| a}.last
|
205
|
+
id = last_fragment.ids.last + 1
|
206
|
+
fragments << Fragment.new(:ids => [id], :text => words[id])
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
# found a match
|
211
|
+
looking_for_updated = looking_for.map{|a| [String, Array, Hash, NilClass].include?(a.class) ? Fragment : a}
|
212
|
+
|
213
|
+
if fragments.map{|a| a.class} == looking_for_updated
|
214
|
+
ids = (fragments.first.ids.first..fragments.last.ids.last).to_a
|
215
|
+
text = fragments.inject('') {|memo, fragment| memo += fragment.to_s + ' '}.strip
|
216
|
+
match = match_class.new(:ids => ids, :text => text)
|
217
|
+
fragments.each do |fragment|
|
218
|
+
match << fragment
|
219
|
+
end
|
220
|
+
|
221
|
+
new_matches[match_class] = [] if !new_matches[match_class]
|
222
|
+
if match
|
223
|
+
new_matches[match_class] << match
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
new_matches
|
230
|
+
end
|
231
|
+
|
232
|
+
def self.recurse_alternatives(match, options)
|
233
|
+
new_matches = options[:matches]
|
234
|
+
|
235
|
+
match.replacements(options).each do |replacement|
|
236
|
+
new_matches[match_class] = [] if !new_matches[match.class]
|
237
|
+
new_matches[match.class] << replacement
|
238
|
+
|
239
|
+
unused_alternatives = ObjectSpace.each_object(Class).select {|a| a < Natural::Alternative}
|
240
|
+
replacement.breadth_each {|node| unused_alternatives -= [node.class]}
|
241
|
+
|
242
|
+
unused_alternatives.each do |alternative|
|
243
|
+
next_layer = alternative.find(options.merge(:text => replacement.to_s, :matches => {})).values.first
|
244
|
+
next_layer.each do |frag|
|
245
|
+
new_frag = frag.class.new(:ids => replacement.ids, :text => frag.text)
|
246
|
+
new_frag << replacement.clone
|
247
|
+
new_matches = recurse_alternatives(new_frag, options)
|
248
|
+
end
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
new_matches
|
253
|
+
end
|
254
|
+
|
255
|
+
end
|
256
|
+
|
257
|
+
class Word < Fragment
|
258
|
+
end
|
259
|
+
|
260
|
+
class Unused < Fragment
|
261
|
+
end
|
262
|
+
|
263
|
+
class Alternative < Fragment
|
264
|
+
def score
|
265
|
+
super - 2
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
class Spelling < Alternative
|
270
|
+
def self.find(options)
|
271
|
+
super options.merge(:looking_for => options[:spellings].values.flatten)
|
272
|
+
end
|
273
|
+
def replacements(options)
|
274
|
+
options[:spellings].each do |canonical, alternatives|
|
275
|
+
if alternatives.include?(self.to_s.singularize.downcase)
|
276
|
+
return [canonical].map do |alternative_text|
|
277
|
+
if self.node_height == 0
|
278
|
+
alternative_text = alternative_text.pluralize if self.to_s.plural?
|
279
|
+
alternative = self.class.new(:ids => self.ids, :text => alternative_text)
|
280
|
+
alternative << Fragment.new(:ids => (alternative.ids.first..alternative.ids.last).to_a, :text => options[:text].split(' ')[alternative.ids.first..alternative.ids.last].join(' '))
|
281
|
+
alternative
|
282
|
+
else
|
283
|
+
[self]
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
class Synonym < Alternative
|
292
|
+
def self.find(options)
|
293
|
+
super options.merge(:looking_for => options[:synonyms].values.flatten)
|
294
|
+
end
|
295
|
+
def replacements(options)
|
296
|
+
options[:synonyms].values.each do |alternatives|
|
297
|
+
if alternatives.include?(self.to_s.singularize.downcase)
|
298
|
+
return (alternatives - [self.to_s]).map do |alternative_text|
|
299
|
+
if self.node_height == 0
|
300
|
+
alternative_text = alternative_text.pluralize if self.to_s.plural?
|
301
|
+
alternative = self.class.new(:ids => self.ids, :text => alternative_text)
|
302
|
+
alternative << Fragment.new(:ids => (alternative.ids.first..alternative.ids.last).to_a, :text => options[:text].split(' ')[alternative.ids.first..alternative.ids.last].join(' '))
|
303
|
+
alternative
|
304
|
+
else
|
305
|
+
return [self]
|
306
|
+
end
|
307
|
+
end
|
308
|
+
end
|
309
|
+
end
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
class Expansion < Alternative
|
314
|
+
def self.find(options)
|
315
|
+
super options.merge(:looking_for => options[:expansions].keys)
|
316
|
+
end
|
317
|
+
def replacements(options)
|
318
|
+
alternatives = options[:expansions][self.to_s.singularize.downcase] || []
|
319
|
+
return alternatives.map do |alternative_text|
|
320
|
+
alternative_text = alternative_text.pluralize if self.to_s.plural?
|
321
|
+
if self.node_height == 0
|
322
|
+
alternative = self.class.new(:ids => self.ids, :text => alternative_text)
|
323
|
+
alternative << Fragment.new(:ids => (alternative.ids.first..alternative.ids.last).to_a, :text => options[:text].split(' ')[alternative.ids.first..alternative.ids.last].join(' '))
|
324
|
+
alternative
|
325
|
+
else
|
326
|
+
return [self]
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# how many days of the week start with the letter t
|
2
|
+
|
3
|
+
# e.g. how many
|
4
|
+
class Count < Natural::Fragment
|
5
|
+
def self.find(options)
|
6
|
+
super options.merge(:looking_for => ['how many'])
|
7
|
+
end
|
8
|
+
def aggregator
|
9
|
+
'count'
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
# e.g. days of the week
|
14
|
+
class DayNames < Natural::Fragment
|
15
|
+
def self.find(options)
|
16
|
+
super options.merge(:looking_for => ['days of the week'])
|
17
|
+
end
|
18
|
+
def data(context)
|
19
|
+
Date::DAYNAMES
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# e.g. blu-ray
|
24
|
+
class BluRay < Natural::Fragment
|
25
|
+
def self.find(options)
|
26
|
+
super options.merge(:looking_for => ['blu-ray'])
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# e.g. start with the letter t
|
31
|
+
class StartsWithLetter < Natural::Fragment
|
32
|
+
def self.find(options)
|
33
|
+
super options.merge(:looking_for => {:and => ['start with the letter', {:or => ('a'..'z').to_a}]})
|
34
|
+
end
|
35
|
+
def filter
|
36
|
+
"select {|a| a[0].downcase == '#{self.children.last.to_s.downcase}'}"
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# e.g. last
|
2
|
+
class Determiner < Natural::Fragment
|
3
|
+
def self.find(options)
|
4
|
+
super options.merge(:looking_for => ['this', 'last', 'next'])
|
5
|
+
end
|
6
|
+
end
|
7
|
+
|
8
|
+
# e.g. over
|
9
|
+
class Preposition < Natural::Fragment
|
10
|
+
def self.find(options)
|
11
|
+
super options.merge(:looking_for => ['over', 'during', 'in', 'on', 'through'])
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
# e.g. I, we
|
16
|
+
class Pronoun < Natural::Fragment
|
17
|
+
def self.find(options)
|
18
|
+
super options.merge(:looking_for => ['i', 'we'])
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# e.g. spent, total
|
23
|
+
class Sum < Natural::Fragment
|
24
|
+
def self.find(options)
|
25
|
+
super options.merge(:looking_for => {:or => ['spend', 'spent', 'total', {:and => ['how much did', Pronoun , 'spend']}, {:and => ['how much have', Pronoun, 'spent']}]})
|
26
|
+
end
|
27
|
+
def aggregator
|
28
|
+
'sum'
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
# e.g. yesterday
|
4
|
+
class RelativeDateName < Natural::Fragment
|
5
|
+
def self.find(options)
|
6
|
+
super options.merge(:looking_for => ['yesterday', 'today', 'tomorrow'])
|
7
|
+
end
|
8
|
+
end
|
9
|
+
|
10
|
+
# e.g. January
|
11
|
+
class MonthName < Natural::Fragment
|
12
|
+
def self.find(options)
|
13
|
+
super options.merge(:looking_for => Date::MONTHNAMES.select{|a| a})
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# e.g. Monday
|
18
|
+
class DayName < Natural::Fragment
|
19
|
+
def self.find(options)
|
20
|
+
super options.merge(:looking_for => Date::DAYNAMES)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# e.g. month
|
25
|
+
class UnitOfTime < Natural::Fragment
|
26
|
+
def self.find(options)
|
27
|
+
super options.merge(:looking_for => ['second', 'minute', 'hour', 'day', 'week', 'month', 'quarter', 'year', 'decade', 'century'])
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# e.g. last month
|
32
|
+
class Dut < Natural::Fragment
|
33
|
+
def self.find(options)
|
34
|
+
super options.merge(:looking_for => {:and => [Determiner, UnitOfTime]})
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# e.g. last January
|
39
|
+
class Dmn < Natural::Fragment
|
40
|
+
def self.find(options)
|
41
|
+
super options.merge(:looking_for => {:and => [Determiner, MonthName]})
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# e.g. in January
|
46
|
+
class Pmn < Natural::Fragment
|
47
|
+
def self.find(options)
|
48
|
+
super options.merge(:looking_for => {:and => [Preposition, MonthName]})
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
# e.g. over the last month
|
53
|
+
class Pdut < Natural::Fragment
|
54
|
+
def self.find(options)
|
55
|
+
super options.merge(:looking_for => {:and => [Preposition, 'the', Dut]})
|
56
|
+
end
|
57
|
+
end
|
data/lib/natural.rb
ADDED
@@ -0,0 +1,147 @@
|
|
1
|
+
require 'natural/inflections'
|
2
|
+
require 'natural/string'
|
3
|
+
require 'natural/array'
|
4
|
+
require 'natural/fragment'
|
5
|
+
require 'natural/fragments/timeframes.rb'
|
6
|
+
require 'natural/fragments/misc.rb'
|
7
|
+
require 'natural/fragments/example.rb'
|
8
|
+
|
9
|
+
class Natural
|
10
|
+
require 'map_by_method'
|
11
|
+
require 'active_support/inflector'
|
12
|
+
require 'active_support/core_ext'
|
13
|
+
|
14
|
+
require 'logger'
|
15
|
+
@@logger = Logger.new(STDOUT)
|
16
|
+
@@logger.level = Logger::DEBUG
|
17
|
+
|
18
|
+
GREEN = "\e[32m"
|
19
|
+
RED = "\e[31m"
|
20
|
+
YELLOW = "\e[33m"
|
21
|
+
CLEAR = "\e[0m"
|
22
|
+
|
23
|
+
DEFAULT_SPELLINGS = {'week' => ['wek', 'weeek'], 'begin' => ['beginn', 'beegin']}
|
24
|
+
DEFAULT_SYNONYMS = {'1' => ['start', 'begin', 'commence'], '2' => ['stop', 'end', 'finish', 'conclude']}
|
25
|
+
DEFAULT_EXPANSIONS = {'food' => ['grocery', 'eat out', 'eating out', 'dining out', 'dine out', 'dine in'], 'music' => ['audio cd', 'audio tape'], 'movie' => ['blu-ray', 'dvd', 'video']}
|
26
|
+
|
27
|
+
def initialize(text, options={})
|
28
|
+
@text = text.squeeze(' ').strip
|
29
|
+
@options = options
|
30
|
+
|
31
|
+
@parse = parse
|
32
|
+
end
|
33
|
+
|
34
|
+
def text=(text)
|
35
|
+
@text = text
|
36
|
+
parse
|
37
|
+
end
|
38
|
+
|
39
|
+
def options=(options)
|
40
|
+
@options = options
|
41
|
+
parse
|
42
|
+
end
|
43
|
+
|
44
|
+
def parse
|
45
|
+
return @parse if @parse
|
46
|
+
|
47
|
+
start_at = Time.now
|
48
|
+
# search for all possible matches using all the different fragment classes
|
49
|
+
matches_by_class = {}
|
50
|
+
fragment_classes = @options[:fragment_classes] || ObjectSpace.each_object(Class)
|
51
|
+
fragment_classes = fragment_classes.select {|a| a < Natural::Fragment && a != Natural::Unused}
|
52
|
+
find_options = {
|
53
|
+
:text => @text,
|
54
|
+
:matches => matches_by_class,
|
55
|
+
:spellings => @options[:spellings] || DEFAULT_SPELLINGS,
|
56
|
+
:synonyms => @options[:synonyms] || DEFAULT_SYNONYMS,
|
57
|
+
:expansions => @options[:expansions] || DEFAULT_EXPANSIONS
|
58
|
+
}
|
59
|
+
ObjectSpace.each_object(Class).select {|a| a < Natural::Alternative}.each do |klass|
|
60
|
+
matches_by_class = klass.find(find_options)
|
61
|
+
end
|
62
|
+
# binding.pry
|
63
|
+
fragment_classes.each do |klass|
|
64
|
+
matches_by_class = klass.find(find_options)
|
65
|
+
end
|
66
|
+
|
67
|
+
matching_at = Time.now
|
68
|
+
@@logger.debug "[n][perf] matching took #{(matching_at - start_at).seconds.round(1)} seconds"
|
69
|
+
|
70
|
+
# find all valid combinations, choose the one with the highest score
|
71
|
+
sequences = []
|
72
|
+
sequences = assemble_sequences(matches_by_class.values.flatten)
|
73
|
+
sequences = sequences.uniq.sort {|a,b| b.map_by_score.sum <=> a.map_by_score.sum}
|
74
|
+
fragments = sequences.first || []
|
75
|
+
|
76
|
+
scoring_at = Time.now
|
77
|
+
@@logger.debug "[n][perf] scoring took #{(scoring_at - matching_at).seconds.round(1)} seconds"
|
78
|
+
@@logger.debug "[n]"
|
79
|
+
|
80
|
+
# tag the leftover words as unused
|
81
|
+
remaining_words = (0..@text.split(' ').size-1).to_a - (!fragments.blank? ? fragments.map_by_ids.flatten : [])
|
82
|
+
remaining_words.each do |id|
|
83
|
+
tag_match = Unused.new(:ids => [id], :text => @text.split(' ')[id])
|
84
|
+
fragments << tag_match
|
85
|
+
end
|
86
|
+
|
87
|
+
# put the fragments we are using in order and assemble the final tree
|
88
|
+
fragments = fragments.sort {|a,b| a.ids.first <=> b.ids.first}
|
89
|
+
@parse = Fragment.new(:ids => (0..@text.split(' ').size-1).to_a, :text => @text)
|
90
|
+
fragments.each {|a| @parse << a}
|
91
|
+
|
92
|
+
sequences.each {|a| @@logger.debug "[n][scor] #{a.map_by_score.sum.to_s.rjust(2, '0')} #{a.sort{|b,c| b.ids.first <=> c.ids.first}.join(' | ')}"}
|
93
|
+
@@logger.debug("[n]")
|
94
|
+
@parse.pretty_to_s.each_line do |line|
|
95
|
+
@@logger.debug("[n][tree] #{line.gsub("\n", '')}")
|
96
|
+
end
|
97
|
+
@@logger.debug("[n]")
|
98
|
+
@@logger.info("[n][orig] #{@text}" + (@options[:context] ? " (#{@options[:context]})" : ""))
|
99
|
+
@@logger.info("[n][used] #{interpretation}" + (@options[:context] ? " (#{@options[:context]})" : ""))
|
100
|
+
|
101
|
+
@parse
|
102
|
+
end
|
103
|
+
|
104
|
+
def parse!
|
105
|
+
@parse = nil
|
106
|
+
parse
|
107
|
+
end
|
108
|
+
|
109
|
+
def answer
|
110
|
+
result = @parse.children.map_by_data(@options[:context]).select{|a| !a.blank?}.flatten
|
111
|
+
@parse.children.map_by_all_filters.select{|a| !a.blank?}.each {|f| result = eval("result.#{f}")}
|
112
|
+
@parse.children.map_by_aggregator.select{|a| !a.blank?}.each {|a| result = eval("result.#{a}")}
|
113
|
+
result
|
114
|
+
end
|
115
|
+
|
116
|
+
private
|
117
|
+
|
118
|
+
def interpretation(crossout=true)
|
119
|
+
result = ''
|
120
|
+
@parse.children.each do |node|
|
121
|
+
result += ' '
|
122
|
+
# result += YELLOW if @automatic_words && !(@automatic_words & node.ids).blank?
|
123
|
+
if !node.all_filters.blank? || node.data(@options[:context]) || node.aggregator
|
124
|
+
result += node.to_s(:without_edits => true)
|
125
|
+
elsif crossout == true
|
126
|
+
result += node.to_s.gsub(/[a-zA-Z]/,'-')
|
127
|
+
end
|
128
|
+
# result += CLEAR if @automatic_words && !(@automatic_words & node.ids).blank?
|
129
|
+
end
|
130
|
+
|
131
|
+
result.strip
|
132
|
+
end
|
133
|
+
|
134
|
+
def assemble_sequences(left_to_try, sequence_so_far=[])
|
135
|
+
sequences = []
|
136
|
+
|
137
|
+
new_left_to_try = left_to_try.dup.select{|a| (a.ids & sequence_so_far.map_by_ids.flatten.uniq).blank?}
|
138
|
+
new_left_to_try.each do |fragment|
|
139
|
+
new_sequence_so_far = sequence_so_far.dup << fragment
|
140
|
+
sequences << new_sequence_so_far.sort{|a,b| a.ids.first <=> b.ids.first}
|
141
|
+
sequences += assemble_sequences(new_left_to_try, new_sequence_so_far)
|
142
|
+
end
|
143
|
+
|
144
|
+
return sequences
|
145
|
+
end
|
146
|
+
|
147
|
+
end
|
data/natural.gemspec
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "natural"
|
8
|
+
s.version = "0.1.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Scott Bonds"]
|
12
|
+
s.date = "2012-03-05"
|
13
|
+
s.description = "Natural provides a framework for answering 'naturally' worded questions like 'how many books did I buy last month' or 'list my Facebook friends'."
|
14
|
+
s.email = "scott@ggr.com"
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE.txt",
|
17
|
+
"README.markdown"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
"Gemfile",
|
21
|
+
"Gemfile.lock",
|
22
|
+
"LICENSE.txt",
|
23
|
+
"README.markdown",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"features/natural.feature",
|
27
|
+
"features/step_definitions/natural_steps.rb",
|
28
|
+
"features/support/env.rb",
|
29
|
+
"lib/natural.rb",
|
30
|
+
"lib/natural/array.rb",
|
31
|
+
"lib/natural/fragment.rb",
|
32
|
+
"lib/natural/fragments/example.rb",
|
33
|
+
"lib/natural/fragments/misc.rb",
|
34
|
+
"lib/natural/fragments/timeframes.rb",
|
35
|
+
"lib/natural/inflections.rb",
|
36
|
+
"lib/natural/string.rb",
|
37
|
+
"natural.gemspec",
|
38
|
+
"spec/array_spec.rb",
|
39
|
+
"spec/natural_spec.rb",
|
40
|
+
"spec/spec_helper.rb",
|
41
|
+
"spec/string_spec.rb"
|
42
|
+
]
|
43
|
+
s.homepage = "http://github.com/bonds/natural"
|
44
|
+
s.licenses = ["MIT"]
|
45
|
+
s.require_paths = ["lib"]
|
46
|
+
s.rubygems_version = "1.8.11"
|
47
|
+
s.summary = "natural language parser"
|
48
|
+
|
49
|
+
if s.respond_to? :specification_version then
|
50
|
+
s.specification_version = 3
|
51
|
+
|
52
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
53
|
+
s.add_runtime_dependency(%q<rubytree>, [">= 0"])
|
54
|
+
s.add_runtime_dependency(%q<logger>, [">= 0"])
|
55
|
+
s.add_runtime_dependency(%q<map_by_method>, [">= 0"])
|
56
|
+
s.add_runtime_dependency(%q<activesupport>, [">= 0"])
|
57
|
+
s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
|
58
|
+
s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
|
59
|
+
s.add_development_dependency(%q<cucumber>, [">= 0"])
|
60
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
61
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
|
62
|
+
s.add_development_dependency(%q<simplecov>, [">= 0"])
|
63
|
+
else
|
64
|
+
s.add_dependency(%q<rubytree>, [">= 0"])
|
65
|
+
s.add_dependency(%q<logger>, [">= 0"])
|
66
|
+
s.add_dependency(%q<map_by_method>, [">= 0"])
|
67
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
68
|
+
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
69
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
70
|
+
s.add_dependency(%q<cucumber>, [">= 0"])
|
71
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
72
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
73
|
+
s.add_dependency(%q<simplecov>, [">= 0"])
|
74
|
+
end
|
75
|
+
else
|
76
|
+
s.add_dependency(%q<rubytree>, [">= 0"])
|
77
|
+
s.add_dependency(%q<logger>, [">= 0"])
|
78
|
+
s.add_dependency(%q<map_by_method>, [">= 0"])
|
79
|
+
s.add_dependency(%q<activesupport>, [">= 0"])
|
80
|
+
s.add_dependency(%q<rspec>, ["~> 2.8.0"])
|
81
|
+
s.add_dependency(%q<rdoc>, ["~> 3.12"])
|
82
|
+
s.add_dependency(%q<cucumber>, [">= 0"])
|
83
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
84
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
|
85
|
+
s.add_dependency(%q<simplecov>, [">= 0"])
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
data/spec/array_spec.rb
ADDED
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'simplecov'
|
2
|
+
SimpleCov.start
|
3
|
+
|
4
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
5
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
6
|
+
require 'rspec'
|
7
|
+
require 'natural'
|
8
|
+
|
9
|
+
# Requires supporting files with custom matchers and macros, etc,
|
10
|
+
# in ./support/ and its subdirectories.
|
11
|
+
Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
|
12
|
+
|
13
|
+
RSpec.configure do |config|
|
14
|
+
|
15
|
+
end
|
data/spec/string_spec.rb
ADDED
metadata
ADDED
@@ -0,0 +1,183 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: natural
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Scott Bonds
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-03-05 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rubytree
|
16
|
+
requirement: &70176893369980 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *70176893369980
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: logger
|
27
|
+
requirement: &70176893369320 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *70176893369320
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: map_by_method
|
38
|
+
requirement: &70176893368200 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :runtime
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *70176893368200
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: activesupport
|
49
|
+
requirement: &70176893367300 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :runtime
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *70176893367300
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: rspec
|
60
|
+
requirement: &70176893366660 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ~>
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: 2.8.0
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *70176893366660
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rdoc
|
71
|
+
requirement: &70176893365820 !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ~>
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '3.12'
|
77
|
+
type: :development
|
78
|
+
prerelease: false
|
79
|
+
version_requirements: *70176893365820
|
80
|
+
- !ruby/object:Gem::Dependency
|
81
|
+
name: cucumber
|
82
|
+
requirement: &70176893063820 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ! '>='
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
88
|
+
type: :development
|
89
|
+
prerelease: false
|
90
|
+
version_requirements: *70176893063820
|
91
|
+
- !ruby/object:Gem::Dependency
|
92
|
+
name: bundler
|
93
|
+
requirement: &70176893063100 !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ~>
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: 1.0.0
|
99
|
+
type: :development
|
100
|
+
prerelease: false
|
101
|
+
version_requirements: *70176893063100
|
102
|
+
- !ruby/object:Gem::Dependency
|
103
|
+
name: jeweler
|
104
|
+
requirement: &70176893062300 !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ~>
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 1.8.3
|
110
|
+
type: :development
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: *70176893062300
|
113
|
+
- !ruby/object:Gem::Dependency
|
114
|
+
name: simplecov
|
115
|
+
requirement: &70176893061480 !ruby/object:Gem::Requirement
|
116
|
+
none: false
|
117
|
+
requirements:
|
118
|
+
- - ! '>='
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0'
|
121
|
+
type: :development
|
122
|
+
prerelease: false
|
123
|
+
version_requirements: *70176893061480
|
124
|
+
description: Natural provides a framework for answering 'naturally' worded questions
|
125
|
+
like 'how many books did I buy last month' or 'list my Facebook friends'.
|
126
|
+
email: scott@ggr.com
|
127
|
+
executables: []
|
128
|
+
extensions: []
|
129
|
+
extra_rdoc_files:
|
130
|
+
- LICENSE.txt
|
131
|
+
- README.markdown
|
132
|
+
files:
|
133
|
+
- Gemfile
|
134
|
+
- Gemfile.lock
|
135
|
+
- LICENSE.txt
|
136
|
+
- README.markdown
|
137
|
+
- Rakefile
|
138
|
+
- VERSION
|
139
|
+
- features/natural.feature
|
140
|
+
- features/step_definitions/natural_steps.rb
|
141
|
+
- features/support/env.rb
|
142
|
+
- lib/natural.rb
|
143
|
+
- lib/natural/array.rb
|
144
|
+
- lib/natural/fragment.rb
|
145
|
+
- lib/natural/fragments/example.rb
|
146
|
+
- lib/natural/fragments/misc.rb
|
147
|
+
- lib/natural/fragments/timeframes.rb
|
148
|
+
- lib/natural/inflections.rb
|
149
|
+
- lib/natural/string.rb
|
150
|
+
- natural.gemspec
|
151
|
+
- spec/array_spec.rb
|
152
|
+
- spec/natural_spec.rb
|
153
|
+
- spec/spec_helper.rb
|
154
|
+
- spec/string_spec.rb
|
155
|
+
homepage: http://github.com/bonds/natural
|
156
|
+
licenses:
|
157
|
+
- MIT
|
158
|
+
post_install_message:
|
159
|
+
rdoc_options: []
|
160
|
+
require_paths:
|
161
|
+
- lib
|
162
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
163
|
+
none: false
|
164
|
+
requirements:
|
165
|
+
- - ! '>='
|
166
|
+
- !ruby/object:Gem::Version
|
167
|
+
version: '0'
|
168
|
+
segments:
|
169
|
+
- 0
|
170
|
+
hash: -4600966347874648454
|
171
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
172
|
+
none: false
|
173
|
+
requirements:
|
174
|
+
- - ! '>='
|
175
|
+
- !ruby/object:Gem::Version
|
176
|
+
version: '0'
|
177
|
+
requirements: []
|
178
|
+
rubyforge_project:
|
179
|
+
rubygems_version: 1.8.11
|
180
|
+
signing_key:
|
181
|
+
specification_version: 3
|
182
|
+
summary: natural language parser
|
183
|
+
test_files: []
|