linguify 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.1
1
+ 0.5.0
@@ -51,7 +51,7 @@ module Linguify
51
51
  :regexp => rule[:match].inspect,
52
52
  :args => rule[:match].match(str).to_a[1..-1]
53
53
 
54
- str.gsub!(rule[:match],reduction.to_rexp)
54
+ str = reduce_string(str,rule[:match],reduction.to_rexp)
55
55
  break if /^{.*}$/ =~ str
56
56
  end
57
57
 
@@ -84,7 +84,66 @@ module Linguify
84
84
  raise "hell"
85
85
  end
86
86
  end
87
-
87
+
88
+ # Reduce a string with a matching reduction expression
89
+ #
90
+ # @param [ String ] the sentence to reduce (haystack)
91
+ # @param [ Regexp ] the reduction expression (search needle)
92
+ # @param [ String ] the replacement
93
+ # @returns [ String ] the reduced string
94
+ #
95
+ def reduce_string str,match_expression,reduction
96
+ match = match_expression.match(str).to_a
97
+ if match.size == 1
98
+ str.gsub(match_expression,reduction)
99
+ else
100
+ needle = match[0]
101
+ splitted = Linguified.informative_split(str,needle)
102
+
103
+ splitted.map{ |split| split.kind_of?(Symbol) ? reduction : split }.join
104
+ end
105
+ end
106
+
107
+ # Split a string by given search needle into an array with split indicators
108
+ #
109
+ # @param [ String ] the string to search (haystack)
110
+ # @param [ String ] needle (search needle)
111
+ # @returns [ Array ] the remaining pieces and needle tags
112
+ #
113
+ def self.informative_split str,needle
114
+ splitted = str.split(needle)
115
+ if str.index(needle) > 0
116
+ if splitted.size & 1 == 0
117
+ splitted.map{ |m| [m,:needle] }.flatten[0..-2]
118
+ else
119
+ splitted.map{ |m| [m,:needle] }.flatten
120
+ end
121
+ else
122
+ if splitted.size > 0
123
+ splitted.map{ |m| [m,:needle] }.flatten[1..-2]
124
+ elsif str == needle
125
+ [ :needle ]
126
+ else
127
+ []
128
+ end
129
+ end
130
+ end
131
+
132
+ # Test if a informative split contains needles on word boundaries
133
+ #
134
+ # @param [ Array ] the splitted string
135
+ # @returns [ Boolean ] true if so
136
+ #
137
+ def self.has_needle_on_word_boundary? splitted
138
+ splitted.each_with_index do |split,i|
139
+ if split.kind_of? String
140
+ word_bound = i == 0 ? split[-1] == ' ' : split[0] == ' ' || split[-1] == ' '
141
+ return true if word_bound
142
+ end
143
+ end
144
+ false
145
+ end
146
+
88
147
  # Find a reduction rule for the string
89
148
  #
90
149
  # @param [ String ] string A plain English string, or a plain English string with reductions in it.
@@ -92,7 +151,17 @@ module Linguify
92
151
  def find_rule str
93
152
  found = Linguify.rules.select do |rule|
94
153
  if rule[:match] =~ str
95
- true
154
+ # ok, it matched, but only alow matches with word boundaries
155
+ match = rule[:match].match(str).to_a
156
+ if match.size == 1
157
+ # one match means the search space contains just the needle, so its a perfect match
158
+ true
159
+ else
160
+ # multiple matches, check if the needle is found on word boundaries
161
+ raise "uh?" unless match.size == 2
162
+ needle = match[1]
163
+ Linguified.has_needle_on_word_boundary? Linguified.informative_split(str,needle)
164
+ end
96
165
  else
97
166
  false
98
167
  end
data/linguify.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "linguify"
8
- s.version = "0.4.1"
8
+ s.version = "0.5.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Patrick Hanevold"]
12
- s.date = "2011-12-11"
12
+ s.date = "2012-01-13"
13
13
  s.description = "Linguify is a linguistic compiler allowing you to compile and execute plain english."
14
14
  s.email = "patrick.hanevold@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -25,6 +25,20 @@ require 'linguify'
25
25
 
26
26
  describe Linguify::Linguified, "#linguify" do
27
27
 
28
+ it "finds words in sentences" do
29
+ Linguify::Linguified.informative_split("I fight for the users","users").should == ["I fight for the ", :needle]
30
+ Linguify::Linguified.informative_split("I fight for the users","for the").should == ["I fight ", :needle, " users"]
31
+ Linguify::Linguified.informative_split("I fight for the users","t for the u").should == ["I figh", :needle, "sers"]
32
+ Linguify::Linguified.informative_split("I fight for the users","I").should == [:needle, " fight for the users"]
33
+ end
34
+
35
+ it "respects word boundaries" do
36
+ Linguify::Linguified.has_needle_on_word_boundary?(Linguify::Linguified.informative_split("I fight for the users","users")).should == true
37
+ Linguify::Linguified.has_needle_on_word_boundary?(Linguify::Linguified.informative_split("I fight for the users","for the")).should == true
38
+ Linguify::Linguified.has_needle_on_word_boundary?(Linguify::Linguified.informative_split("I fight for the users","t for the u")).should == false
39
+ Linguify::Linguified.has_needle_on_word_boundary?(Linguify::Linguified.informative_split("I fight for the users","I")).should == true
40
+ end
41
+
28
42
  it "should reduce multiple rules into ruby code" do
29
43
 
30
44
  reduce /all directories/ => 'directories' do
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: linguify
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.4.1
5
+ version: 0.5.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - Patrick Hanevold
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-12-11 00:00:00 Z
13
+ date: 2012-01-13 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: sourcify
@@ -115,7 +115,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
115
115
  requirements:
116
116
  - - ">="
117
117
  - !ruby/object:Gem::Version
118
- hash: -1257919837541355305
118
+ hash: 2420952439541187421
119
119
  segments:
120
120
  - 0
121
121
  version: "0"