semr 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ == 0.0.1 2008-05-08
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
data/License ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Matthew Deiters
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,29 @@
1
+ History.txt
2
+ License
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ example.rb
7
+ lib/semr.rb
8
+ lib/semr/concept.rb
9
+ lib/semr/dictionary.rb
10
+ lib/semr/expressions.rb
11
+ lib/semr/extensions/object.rb
12
+ lib/semr/extensions/string.rb
13
+ lib/semr/language.rb
14
+ lib/semr/normalizers.rb
15
+ lib/semr/phrase.rb
16
+ lib/semr/rails/model_inflector.rb
17
+ lib/semr/rails/model_synonym.rb
18
+ lib/semr/translation.rb
19
+ lib/semr/version.rb
20
+ setup.rb
21
+ tasks/deployment.rake
22
+ tasks/environment.rake
23
+ tasks/rspec.rake
24
+ tasks/website.rake
25
+ website/index.html
26
+ website/index.txt
27
+ website/javascripts/rounded_corners_lite.inc.js
28
+ website/stylesheets/screen.css
29
+ website/template.html.erb
@@ -0,0 +1,54 @@
1
+ Prerequisites
2
+ =============
3
+
4
+ The semr gem uses the oniguruma library to leverage more mature regular expression features. This library is part of ruby 1.9 but we need to install it if running ruby 1.8.
5
+ More info on gem: http://oniguruma.rubyforge.org/
6
+
7
+ On Windows
8
+ 1. gem install oniguruma
9
+
10
+ On Mac
11
+ 1. Unzip: /install/onig-5.9.1.tar
12
+ 2. cd to /install/onig-5.9.1
13
+ 3. Execute: ./configure
14
+ 4. Execute: make
15
+ 5. Execute: sudo make install
16
+ 6. gem install oniguruma
17
+
18
+ Basics
19
+ ======
20
+
21
+ See the example.rb for an example of creating a language (grammar).
22
+
23
+ Describe:
24
+ * Language
25
+ * Concept
26
+ - normalizers
27
+ - expressions
28
+ * Phrase
29
+
30
+
31
+ == LICENSE:
32
+
33
+ (The MIT License)
34
+
35
+ Copyright (c) 2008 Matthew Deiters
36
+
37
+ Permission is hereby granted, free of charge, to any person obtaining
38
+ a copy of this software and associated documentation files (the
39
+ 'Software'), to deal in the Software without restriction, including
40
+ without limitation the rights to use, copy, modify, merge, publish,
41
+ distribute, sublicense, and/or sell copies of the Software, and to
42
+ permit persons to whom the Software is furnished to do so, subject to
43
+ the following conditions:
44
+
45
+ The above copyright notice and this permission notice shall be
46
+ included in all copies or substantial portions of the Software.
47
+
48
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
49
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
50
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
51
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
52
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
53
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
54
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,7 @@
1
+ require 'config/requirements'
2
+ require 'config/hoe' # setup Hoe + all gem configuration
3
+
4
+ Dir['tasks/**/*.rake'].each { |rake| load rake }
5
+
6
+ Rake::Task[:default].prerequisites.clear #remove testunit
7
+ task :default => :spec
@@ -0,0 +1,23 @@
1
+ require 'rubygems'
2
+ require 'semr'
3
+
4
+ language = Semr::Language.create do
5
+ concept :number, any_number, :normalize => as_fixnum
6
+ concept :greeting, words('hi', 'goodbye', 'hello')
7
+
8
+ phrase 'say :greeting :number times' do |greeting, number|
9
+ number.times { puts greeting }
10
+ end
11
+ end
12
+
13
+ language.parse('say hello 6 times')
14
+ # hello
15
+ # hello
16
+ # hello
17
+ # hello
18
+ # hello
19
+ # hello
20
+
21
+ language.parse('say goodbye 2 times')
22
+ # goodbye
23
+ # goodbye
@@ -0,0 +1,21 @@
1
+ require 'rubygems'
2
+ require 'oniguruma' #http://oniguruma.rubyforge.org
3
+
4
+
5
+ $:.unshift(File.dirname(__FILE__)) unless
6
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
7
+
8
+ require File.expand_path(File.dirname(__FILE__) + "/semr/dictionary")
9
+ require File.expand_path(File.dirname(__FILE__) + "/semr/translation")
10
+ require File.expand_path(File.dirname(__FILE__) + "/semr/expressions")
11
+ require File.expand_path(File.dirname(__FILE__) + "/semr/normalizers")
12
+ require File.expand_path(File.dirname(__FILE__) + "/semr/language")
13
+ require File.expand_path(File.dirname(__FILE__) + "/semr/concept")
14
+ require File.expand_path(File.dirname(__FILE__) + "/semr/phrase")
15
+ require File.expand_path(File.dirname(__FILE__) + "/semr/extensions/string")
16
+ require File.expand_path(File.dirname(__FILE__) + "/semr/extensions/object")
17
+ if defined? ActiveRecord
18
+ require File.expand_path(File.dirname(__FILE__) + "/semr/rails/model_inflector")
19
+ require File.expand_path(File.dirname(__FILE__) + "/semr/rails/model_synonym")
20
+ ActiveRecord::Base.extend Semr::Rails::ModelSynonym
21
+ end
@@ -0,0 +1,30 @@
1
+ module Semr
2
+ class Concept
3
+ attr_reader :name, :definition
4
+
5
+ def initialize(name, definition, options={})
6
+ @name, @definition = name, definition
7
+ @options = options
8
+ end
9
+
10
+ def normalize(match)
11
+ result = arrayify(match)
12
+ if @options[:normalize]
13
+ normalizers = @options[:normalize]
14
+ normalizers = [normalizers] unless normalizers.is_a? Array
15
+ normalizers.each do |normalizer|
16
+ result = normalizer.call(result)
17
+ end
18
+ end
19
+ result
20
+ end
21
+
22
+ def arrayify(match)
23
+ return match if match.kind_of? String
24
+ matches = match[1..match.end]
25
+ matches.delete(nil)
26
+ matches = matches.first if matches.size == 1
27
+ matches
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,31 @@
1
+ module Semr
2
+ class Dictionary
3
+ class << self
4
+ def internal_dictionary
5
+ @internal_dictionary ||= {}
6
+ end
7
+
8
+ def lookup(term)
9
+ internal_dictionary[term] || term
10
+ end
11
+
12
+ def find_root(term)
13
+ # TODO: Refactor
14
+ # peoples => people
15
+ # people => person
16
+ # person => person DONE
17
+ root = lookup(term)
18
+ until root == term do
19
+ term = root
20
+ root = lookup(term)
21
+ end
22
+ root
23
+ end
24
+
25
+ def register(term, root)
26
+ # puts "TERM: #{term} ROOT: #{root}" if term == 'event' || root == 'event'
27
+ internal_dictionary[term] = root
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,33 @@
1
+ module Semr
2
+ module Expressions
3
+ def word(*args)
4
+ '(\b' + args.join('|') + '\b)'
5
+ end
6
+ alias :words :word
7
+ alias :possible_words :word
8
+
9
+ def any_word
10
+ # '(\b\w+\b)'
11
+ '(\w+)'
12
+ end
13
+
14
+ def any_number
15
+ '([0-9]*)'
16
+ end
17
+
18
+ def words_in_quotes
19
+ '\'([\w\s]+)\''
20
+ end
21
+
22
+ def multiple_occurrences_of(*words)
23
+ words = words.collect{|word| "(\\b#{word})" }
24
+ # '(?:(?:\s|,|and)|' + words.join('|') + ')*'
25
+ # '(?:(?:\s|,|and)|' + words.join('|') + ')*'
26
+ '(?:(?:\s|,|and)|' + words.join('|') + ')*'
27
+ end
28
+
29
+ def all_models
30
+ Rails::ModelInflector.all
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,12 @@
1
+ Object.class_eval do
2
+ def instance_exec(*args, &block)
3
+ mname = "__instance_exec_#{Thread.current.object_id.abs}"
4
+ class << self; self end.class_eval{ define_method(mname, &block) }
5
+ begin
6
+ ret = send(mname, *args)
7
+ ensure
8
+ class << self; self end.class_eval{ undef_method(mname) } rescue nil
9
+ end
10
+ ret
11
+ end
12
+ end
@@ -0,0 +1,30 @@
1
+ String.class_eval do
2
+
3
+ def symbols
4
+ #TODO: Enhance to handle completely with regex
5
+ found_symbols = []
6
+ self.scan(/[:a-zA-Z0-9]+/).each do |match|
7
+ found_symbols << match.symbolize if match.starts_with?(':')
8
+ end
9
+ found_symbols
10
+ end
11
+
12
+ def symbolize
13
+ gsub(':', '').to_sym
14
+ end
15
+
16
+ def to_regexp
17
+ to_s
18
+ end
19
+
20
+ def ends_with?(substr)
21
+ self.reverse() [0..substr.length-1].reverse == substr
22
+ end
23
+
24
+ def starts_with?(substr)
25
+ self[0..substr.length-1] == substr
26
+ end
27
+ alias begins_with? starts_with?
28
+ alias start_with? starts_with?
29
+
30
+ end
@@ -0,0 +1,47 @@
1
+ #support setting @instance variables in phrase blocks
2
+ module Semr
3
+ class Language
4
+ include Expressions
5
+ include Normalizers
6
+
7
+ class << self
8
+ def create(grammer_file = nil, &block)
9
+ language = Language.new
10
+ language.instance_eval(&block) if block_given?
11
+ language.instance_eval(IO.readlines(grammer_file).join("\n")) unless grammer_file.nil?
12
+ language
13
+ end
14
+ end
15
+
16
+ def concepts
17
+ @concepts ||= {}
18
+ end
19
+
20
+ def phrases
21
+ @phrases ||= []
22
+ end
23
+
24
+ def concept(keyword, definition, options = {})
25
+ concepts[keyword] = Concept.new(keyword, definition, options)
26
+ end
27
+
28
+ def phrase(phrase, &block)
29
+ phrases << Phrase.new(concepts, phrase, &block)
30
+ end
31
+
32
+ def parse(statement)
33
+ translation = Translation.new
34
+ statements = statement.split('.').map{|stmt| stmt.strip } #downcase.
35
+ statements.each do |statement|
36
+ phrases.each do |phrase|
37
+ if phrase.handles?(statement)
38
+ translation.phrases_translated << phrase
39
+ phrase.interpret(statement, translation)
40
+ break #break loop and process next statement
41
+ end
42
+ end
43
+ end
44
+ translation
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,31 @@
1
+ module Semr
2
+ module Normalizers
3
+ def by_removing_outer_quotes
4
+ proc { |value| value.gsub("'", "") }
5
+ end
6
+
7
+ def as_class
8
+ proc { |value| value.classify.constantize }
9
+ end
10
+
11
+ def as_fixnum
12
+ proc { |value| value.to_i }
13
+ end
14
+
15
+ def as_list
16
+ proc { |value| value.split(/,|and/).map{|item| item.strip} }
17
+ end
18
+
19
+ def as_list_of_classes
20
+ proc { |value| value.split(/,|and/).map{|item| item.strip.classify.constantize } }
21
+ end
22
+
23
+ def lookup_synonyms
24
+ proc { |value| Dictionary.find_root(value) }
25
+ end
26
+
27
+ def each_item(block)
28
+ proc { |value| value.map{|item| block.call(item) } }
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,93 @@
1
+ module Semr
2
+ class InvalidConceptError < RuntimeError; end;
3
+ class Phrase
4
+ attr_reader :regex, :block
5
+
6
+ # ^ matches phrase from beginning, should we use $
7
+ # regex = Regexp.new(phrase, Regexp::IGNORECASE) <- fall back when oniguruma not installed
8
+ def initialize(all_concepts, phrase, &block)
9
+ refined_phrase = remove_optional_words(phrase)
10
+ phrase.symbols.each do |symbol|
11
+ if all_concepts[symbol].nil?
12
+ raise InvalidConceptError.new("Unable to create phrase because :#{symbol} concept has not been defined.")
13
+ else
14
+ concept = all_concepts[symbol]
15
+ concepts << concept
16
+ concept_matcher = "(?<#{symbol}>#{concept.definition.to_regexp})"
17
+ refined_phrase = refined_phrase.gsub(":#{symbol}", concept_matcher)
18
+ end
19
+ end
20
+ @original = "^#{refined_phrase}"
21
+ @regex, @block = Oniguruma::ORegexp.new(@original, :options => Oniguruma::OPTION_IGNORECASE), block
22
+ end
23
+
24
+ def concepts
25
+ @concepts ||= []
26
+ end
27
+
28
+ def remove_optional_words(phrase)
29
+ phrase.gsub(/\<([\w]*)\>\s?/, '(?:\1)?\s?')
30
+ end
31
+
32
+ def handles?(statement)
33
+ match = regex.match(statement)
34
+ !match.nil?
35
+ end
36
+
37
+ def interpret(statement, translation)
38
+ args = []
39
+ regex.scan(statement) do |match|
40
+ @concepts.each do |concept|
41
+ actual_match = match[concept.name]
42
+ args << concept.normalize(actual_match)
43
+ end
44
+ end
45
+ # args = args.first if args.size == 1
46
+ translation.instance_exec(*args, &block)
47
+ end
48
+
49
+ def debug(match)
50
+ matches = match[0..match.end]
51
+ matches.each do |match|
52
+ puts match
53
+ puts ' ---- '
54
+ end
55
+ end
56
+
57
+ def to_regexp
58
+ "(#{@original})"
59
+ end
60
+ end
61
+ end
62
+ # module Semr
63
+ # class Phrase
64
+ # attr_reader :regex, :block
65
+ #
66
+ # def initialize(phrase, &block)
67
+ # @original = phrase
68
+ # phrase = "^#{phrase}" #match phrase from beginning..$
69
+ # #@regex, @block = Regexp.new(phrase, Regexp::IGNORECASE), block
70
+ # @regex, @block = Oniguruma::ORegexp.new(phrase, :options => Oniguruma::OPTION_IGNORECASE), block
71
+ # end
72
+ #
73
+ # def handles?(statement)
74
+ # match = statement.match(regex)
75
+ # !match.nil?
76
+ # end
77
+ #
78
+ # def interpret(statement, translation)
79
+ # args = []
80
+ # statement.scan(regex) do |match|
81
+ # match = match.flatten.first if match.flatten.size == 1
82
+ # match.delete(nil) if match.kind_of?(Array)
83
+ # args << match
84
+ # end
85
+ # # puts args.inspect
86
+ # translation.instance_exec(*args.flatten, &block)
87
+ # end
88
+ #
89
+ # def to_regexp
90
+ # "(#{@original})"
91
+ # end
92
+ # end
93
+ # end