semr 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/License +20 -0
- data/Manifest.txt +29 -0
- data/README.txt +54 -0
- data/Rakefile +7 -0
- data/example.rb +23 -0
- data/lib/semr.rb +21 -0
- data/lib/semr/concept.rb +30 -0
- data/lib/semr/dictionary.rb +31 -0
- data/lib/semr/expressions.rb +33 -0
- data/lib/semr/extensions/object.rb +12 -0
- data/lib/semr/extensions/string.rb +30 -0
- data/lib/semr/language.rb +47 -0
- data/lib/semr/normalizers.rb +31 -0
- data/lib/semr/phrase.rb +93 -0
- data/lib/semr/rails/model_inflector.rb +63 -0
- data/lib/semr/rails/model_synonym.rb +12 -0
- data/lib/semr/translation.rb +19 -0
- data/lib/semr/version.rb +9 -0
- data/setup.rb +1585 -0
- data/tasks/deployment.rake +34 -0
- data/tasks/environment.rake +7 -0
- data/tasks/rspec.rake +21 -0
- data/tasks/website.rake +17 -0
- data/website/index.html +141 -0
- data/website/index.txt +83 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.html.erb +48 -0
- metadata +86 -0
data/History.txt
ADDED
data/License
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2008 Matthew Deiters
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Manifest.txt
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
History.txt
|
2
|
+
License
|
3
|
+
Manifest.txt
|
4
|
+
README.txt
|
5
|
+
Rakefile
|
6
|
+
example.rb
|
7
|
+
lib/semr.rb
|
8
|
+
lib/semr/concept.rb
|
9
|
+
lib/semr/dictionary.rb
|
10
|
+
lib/semr/expressions.rb
|
11
|
+
lib/semr/extensions/object.rb
|
12
|
+
lib/semr/extensions/string.rb
|
13
|
+
lib/semr/language.rb
|
14
|
+
lib/semr/normalizers.rb
|
15
|
+
lib/semr/phrase.rb
|
16
|
+
lib/semr/rails/model_inflector.rb
|
17
|
+
lib/semr/rails/model_synonym.rb
|
18
|
+
lib/semr/translation.rb
|
19
|
+
lib/semr/version.rb
|
20
|
+
setup.rb
|
21
|
+
tasks/deployment.rake
|
22
|
+
tasks/environment.rake
|
23
|
+
tasks/rspec.rake
|
24
|
+
tasks/website.rake
|
25
|
+
website/index.html
|
26
|
+
website/index.txt
|
27
|
+
website/javascripts/rounded_corners_lite.inc.js
|
28
|
+
website/stylesheets/screen.css
|
29
|
+
website/template.html.erb
|
data/README.txt
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
Prerequisites
|
2
|
+
=============
|
3
|
+
|
4
|
+
The semr gem uses the oniguruma library to leverage more mature regular expression features. This library is part of ruby 1.9 but we need to install it if running ruby 1.8.
|
5
|
+
More info on gem: http://oniguruma.rubyforge.org/
|
6
|
+
|
7
|
+
On Windows
|
8
|
+
1. gem install oniguruma
|
9
|
+
|
10
|
+
On Mac
|
11
|
+
1. Unzip: /install/onig-5.9.1.tar
|
12
|
+
2. cd to /install/onig-5.9.1
|
13
|
+
3. Execute: ./configure
|
14
|
+
4. Execute: make
|
15
|
+
5. Execute: sudo make install
|
16
|
+
6. gem install oniguruma
|
17
|
+
|
18
|
+
Basics
|
19
|
+
======
|
20
|
+
|
21
|
+
See the example.rb for an example of creating a language (grammar).
|
22
|
+
|
23
|
+
Describe:
|
24
|
+
* Language
|
25
|
+
* Concept
|
26
|
+
- normalizers
|
27
|
+
- expressions
|
28
|
+
* Phrase
|
29
|
+
|
30
|
+
|
31
|
+
== LICENSE:
|
32
|
+
|
33
|
+
(The MIT License)
|
34
|
+
|
35
|
+
Copyright (c) 2008 Matthew Deiters
|
36
|
+
|
37
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
38
|
+
a copy of this software and associated documentation files (the
|
39
|
+
'Software'), to deal in the Software without restriction, including
|
40
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
41
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
42
|
+
permit persons to whom the Software is furnished to do so, subject to
|
43
|
+
the following conditions:
|
44
|
+
|
45
|
+
The above copyright notice and this permission notice shall be
|
46
|
+
included in all copies or substantial portions of the Software.
|
47
|
+
|
48
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
49
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
50
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
51
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
52
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
53
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
54
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
data/example.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'semr'
|
3
|
+
|
4
|
+
language = Semr::Language.create do
|
5
|
+
concept :number, any_number, :normalize => as_fixnum
|
6
|
+
concept :greeting, words('hi', 'goodbye', 'hello')
|
7
|
+
|
8
|
+
phrase 'say :greeting :number times' do |greeting, number|
|
9
|
+
number.times { puts greeting }
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
language.parse('say hello 6 times')
|
14
|
+
# hello
|
15
|
+
# hello
|
16
|
+
# hello
|
17
|
+
# hello
|
18
|
+
# hello
|
19
|
+
# hello
|
20
|
+
|
21
|
+
language.parse('say goodbye 2 times')
|
22
|
+
# goodbye
|
23
|
+
# goodbye
|
data/lib/semr.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'oniguruma' #http://oniguruma.rubyforge.org
|
3
|
+
|
4
|
+
|
5
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
6
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
7
|
+
|
8
|
+
require File.expand_path(File.dirname(__FILE__) + "/semr/dictionary")
|
9
|
+
require File.expand_path(File.dirname(__FILE__) + "/semr/translation")
|
10
|
+
require File.expand_path(File.dirname(__FILE__) + "/semr/expressions")
|
11
|
+
require File.expand_path(File.dirname(__FILE__) + "/semr/normalizers")
|
12
|
+
require File.expand_path(File.dirname(__FILE__) + "/semr/language")
|
13
|
+
require File.expand_path(File.dirname(__FILE__) + "/semr/concept")
|
14
|
+
require File.expand_path(File.dirname(__FILE__) + "/semr/phrase")
|
15
|
+
require File.expand_path(File.dirname(__FILE__) + "/semr/extensions/string")
|
16
|
+
require File.expand_path(File.dirname(__FILE__) + "/semr/extensions/object")
|
17
|
+
if defined? ActiveRecord
|
18
|
+
require File.expand_path(File.dirname(__FILE__) + "/semr/rails/model_inflector")
|
19
|
+
require File.expand_path(File.dirname(__FILE__) + "/semr/rails/model_synonym")
|
20
|
+
ActiveRecord::Base.extend Semr::Rails::ModelSynonym
|
21
|
+
end
|
data/lib/semr/concept.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
module Semr
|
2
|
+
class Concept
|
3
|
+
attr_reader :name, :definition
|
4
|
+
|
5
|
+
def initialize(name, definition, options={})
|
6
|
+
@name, @definition = name, definition
|
7
|
+
@options = options
|
8
|
+
end
|
9
|
+
|
10
|
+
def normalize(match)
|
11
|
+
result = arrayify(match)
|
12
|
+
if @options[:normalize]
|
13
|
+
normalizers = @options[:normalize]
|
14
|
+
normalizers = [normalizers] unless normalizers.is_a? Array
|
15
|
+
normalizers.each do |normalizer|
|
16
|
+
result = normalizer.call(result)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
result
|
20
|
+
end
|
21
|
+
|
22
|
+
def arrayify(match)
|
23
|
+
return match if match.kind_of? String
|
24
|
+
matches = match[1..match.end]
|
25
|
+
matches.delete(nil)
|
26
|
+
matches = matches.first if matches.size == 1
|
27
|
+
matches
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Semr
|
2
|
+
class Dictionary
|
3
|
+
class << self
|
4
|
+
def internal_dictionary
|
5
|
+
@internal_dictionary ||= {}
|
6
|
+
end
|
7
|
+
|
8
|
+
def lookup(term)
|
9
|
+
internal_dictionary[term] || term
|
10
|
+
end
|
11
|
+
|
12
|
+
def find_root(term)
|
13
|
+
# TODO: Refactor
|
14
|
+
# peoples => people
|
15
|
+
# people => person
|
16
|
+
# person => person DONE
|
17
|
+
root = lookup(term)
|
18
|
+
until root == term do
|
19
|
+
term = root
|
20
|
+
root = lookup(term)
|
21
|
+
end
|
22
|
+
root
|
23
|
+
end
|
24
|
+
|
25
|
+
def register(term, root)
|
26
|
+
# puts "TERM: #{term} ROOT: #{root}" if term == 'event' || root == 'event'
|
27
|
+
internal_dictionary[term] = root
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Semr
|
2
|
+
module Expressions
|
3
|
+
def word(*args)
|
4
|
+
'(\b' + args.join('|') + '\b)'
|
5
|
+
end
|
6
|
+
alias :words :word
|
7
|
+
alias :possible_words :word
|
8
|
+
|
9
|
+
def any_word
|
10
|
+
# '(\b\w+\b)'
|
11
|
+
'(\w+)'
|
12
|
+
end
|
13
|
+
|
14
|
+
def any_number
|
15
|
+
'([0-9]*)'
|
16
|
+
end
|
17
|
+
|
18
|
+
def words_in_quotes
|
19
|
+
'\'([\w\s]+)\''
|
20
|
+
end
|
21
|
+
|
22
|
+
def multiple_occurrences_of(*words)
|
23
|
+
words = words.collect{|word| "(\\b#{word})" }
|
24
|
+
# '(?:(?:\s|,|and)|' + words.join('|') + ')*'
|
25
|
+
# '(?:(?:\s|,|and)|' + words.join('|') + ')*'
|
26
|
+
'(?:(?:\s|,|and)|' + words.join('|') + ')*'
|
27
|
+
end
|
28
|
+
|
29
|
+
def all_models
|
30
|
+
Rails::ModelInflector.all
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
Object.class_eval do
|
2
|
+
def instance_exec(*args, &block)
|
3
|
+
mname = "__instance_exec_#{Thread.current.object_id.abs}"
|
4
|
+
class << self; self end.class_eval{ define_method(mname, &block) }
|
5
|
+
begin
|
6
|
+
ret = send(mname, *args)
|
7
|
+
ensure
|
8
|
+
class << self; self end.class_eval{ undef_method(mname) } rescue nil
|
9
|
+
end
|
10
|
+
ret
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
String.class_eval do
|
2
|
+
|
3
|
+
def symbols
|
4
|
+
#TODO: Enhance to handle completely with regex
|
5
|
+
found_symbols = []
|
6
|
+
self.scan(/[:a-zA-Z0-9]+/).each do |match|
|
7
|
+
found_symbols << match.symbolize if match.starts_with?(':')
|
8
|
+
end
|
9
|
+
found_symbols
|
10
|
+
end
|
11
|
+
|
12
|
+
def symbolize
|
13
|
+
gsub(':', '').to_sym
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_regexp
|
17
|
+
to_s
|
18
|
+
end
|
19
|
+
|
20
|
+
def ends_with?(substr)
|
21
|
+
self.reverse() [0..substr.length-1].reverse == substr
|
22
|
+
end
|
23
|
+
|
24
|
+
def starts_with?(substr)
|
25
|
+
self[0..substr.length-1] == substr
|
26
|
+
end
|
27
|
+
alias begins_with? starts_with?
|
28
|
+
alias start_with? starts_with?
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
#support setting @instance variables in phrase blocks
|
2
|
+
module Semr
|
3
|
+
class Language
|
4
|
+
include Expressions
|
5
|
+
include Normalizers
|
6
|
+
|
7
|
+
class << self
|
8
|
+
def create(grammer_file = nil, &block)
|
9
|
+
language = Language.new
|
10
|
+
language.instance_eval(&block) if block_given?
|
11
|
+
language.instance_eval(IO.readlines(grammer_file).join("\n")) unless grammer_file.nil?
|
12
|
+
language
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def concepts
|
17
|
+
@concepts ||= {}
|
18
|
+
end
|
19
|
+
|
20
|
+
def phrases
|
21
|
+
@phrases ||= []
|
22
|
+
end
|
23
|
+
|
24
|
+
def concept(keyword, definition, options = {})
|
25
|
+
concepts[keyword] = Concept.new(keyword, definition, options)
|
26
|
+
end
|
27
|
+
|
28
|
+
def phrase(phrase, &block)
|
29
|
+
phrases << Phrase.new(concepts, phrase, &block)
|
30
|
+
end
|
31
|
+
|
32
|
+
def parse(statement)
|
33
|
+
translation = Translation.new
|
34
|
+
statements = statement.split('.').map{|stmt| stmt.strip } #downcase.
|
35
|
+
statements.each do |statement|
|
36
|
+
phrases.each do |phrase|
|
37
|
+
if phrase.handles?(statement)
|
38
|
+
translation.phrases_translated << phrase
|
39
|
+
phrase.interpret(statement, translation)
|
40
|
+
break #break loop and process next statement
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
translation
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Semr
|
2
|
+
module Normalizers
|
3
|
+
def by_removing_outer_quotes
|
4
|
+
proc { |value| value.gsub("'", "") }
|
5
|
+
end
|
6
|
+
|
7
|
+
def as_class
|
8
|
+
proc { |value| value.classify.constantize }
|
9
|
+
end
|
10
|
+
|
11
|
+
def as_fixnum
|
12
|
+
proc { |value| value.to_i }
|
13
|
+
end
|
14
|
+
|
15
|
+
def as_list
|
16
|
+
proc { |value| value.split(/,|and/).map{|item| item.strip} }
|
17
|
+
end
|
18
|
+
|
19
|
+
def as_list_of_classes
|
20
|
+
proc { |value| value.split(/,|and/).map{|item| item.strip.classify.constantize } }
|
21
|
+
end
|
22
|
+
|
23
|
+
def lookup_synonyms
|
24
|
+
proc { |value| Dictionary.find_root(value) }
|
25
|
+
end
|
26
|
+
|
27
|
+
def each_item(block)
|
28
|
+
proc { |value| value.map{|item| block.call(item) } }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/semr/phrase.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
module Semr
|
2
|
+
class InvalidConceptError < RuntimeError; end;
|
3
|
+
class Phrase
|
4
|
+
attr_reader :regex, :block
|
5
|
+
|
6
|
+
# ^ matches phrase from beginning, should we use $
|
7
|
+
# regex = Regexp.new(phrase, Regexp::IGNORECASE) <- fall back when oniguruma not installed
|
8
|
+
def initialize(all_concepts, phrase, &block)
|
9
|
+
refined_phrase = remove_optional_words(phrase)
|
10
|
+
phrase.symbols.each do |symbol|
|
11
|
+
if all_concepts[symbol].nil?
|
12
|
+
raise InvalidConceptError.new("Unable to create phrase because :#{symbol} concept has not been defined.")
|
13
|
+
else
|
14
|
+
concept = all_concepts[symbol]
|
15
|
+
concepts << concept
|
16
|
+
concept_matcher = "(?<#{symbol}>#{concept.definition.to_regexp})"
|
17
|
+
refined_phrase = refined_phrase.gsub(":#{symbol}", concept_matcher)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
@original = "^#{refined_phrase}"
|
21
|
+
@regex, @block = Oniguruma::ORegexp.new(@original, :options => Oniguruma::OPTION_IGNORECASE), block
|
22
|
+
end
|
23
|
+
|
24
|
+
def concepts
|
25
|
+
@concepts ||= []
|
26
|
+
end
|
27
|
+
|
28
|
+
def remove_optional_words(phrase)
|
29
|
+
phrase.gsub(/\<([\w]*)\>\s?/, '(?:\1)?\s?')
|
30
|
+
end
|
31
|
+
|
32
|
+
def handles?(statement)
|
33
|
+
match = regex.match(statement)
|
34
|
+
!match.nil?
|
35
|
+
end
|
36
|
+
|
37
|
+
def interpret(statement, translation)
|
38
|
+
args = []
|
39
|
+
regex.scan(statement) do |match|
|
40
|
+
@concepts.each do |concept|
|
41
|
+
actual_match = match[concept.name]
|
42
|
+
args << concept.normalize(actual_match)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
# args = args.first if args.size == 1
|
46
|
+
translation.instance_exec(*args, &block)
|
47
|
+
end
|
48
|
+
|
49
|
+
def debug(match)
|
50
|
+
matches = match[0..match.end]
|
51
|
+
matches.each do |match|
|
52
|
+
puts match
|
53
|
+
puts ' ---- '
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def to_regexp
|
58
|
+
"(#{@original})"
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
# module Semr
|
63
|
+
# class Phrase
|
64
|
+
# attr_reader :regex, :block
|
65
|
+
#
|
66
|
+
# def initialize(phrase, &block)
|
67
|
+
# @original = phrase
|
68
|
+
# phrase = "^#{phrase}" #match phrase from beginning..$
|
69
|
+
# #@regex, @block = Regexp.new(phrase, Regexp::IGNORECASE), block
|
70
|
+
# @regex, @block = Oniguruma::ORegexp.new(phrase, :options => Oniguruma::OPTION_IGNORECASE), block
|
71
|
+
# end
|
72
|
+
#
|
73
|
+
# def handles?(statement)
|
74
|
+
# match = statement.match(regex)
|
75
|
+
# !match.nil?
|
76
|
+
# end
|
77
|
+
#
|
78
|
+
# def interpret(statement, translation)
|
79
|
+
# args = []
|
80
|
+
# statement.scan(regex) do |match|
|
81
|
+
# match = match.flatten.first if match.flatten.size == 1
|
82
|
+
# match.delete(nil) if match.kind_of?(Array)
|
83
|
+
# args << match
|
84
|
+
# end
|
85
|
+
# # puts args.inspect
|
86
|
+
# translation.instance_exec(*args.flatten, &block)
|
87
|
+
# end
|
88
|
+
#
|
89
|
+
# def to_regexp
|
90
|
+
# "(#{@original})"
|
91
|
+
# end
|
92
|
+
# end
|
93
|
+
# end
|