linguify 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.4.1
1
+ 0.5.0
@@ -51,7 +51,7 @@ module Linguify
51
51
  :regexp => rule[:match].inspect,
52
52
  :args => rule[:match].match(str).to_a[1..-1]
53
53
 
54
- str.gsub!(rule[:match],reduction.to_rexp)
54
+ str = reduce_string(str,rule[:match],reduction.to_rexp)
55
55
  break if /^{.*}$/ =~ str
56
56
  end
57
57
 
@@ -84,7 +84,66 @@ module Linguify
84
84
  raise "hell"
85
85
  end
86
86
  end
87
-
87
+
88
+ # Reduce a string with a matching reduction expression
89
+ #
90
+ # @param [ String ] the sentence to reduce (haystack)
91
+ # @param [ Regexp ] the reduction expression (search needle)
92
+ # @param [ String ] the replacement
93
+ # @returns [ String ] the reduced string
94
+ #
95
+ def reduce_string str,match_expression,reduction
96
+ match = match_expression.match(str).to_a
97
+ if match.size == 1
98
+ str.gsub(match_expression,reduction)
99
+ else
100
+ needle = match[0]
101
+ splitted = Linguified.informative_split(str,needle)
102
+
103
+ splitted.map{ |split| split.kind_of?(Symbol) ? reduction : split }.join
104
+ end
105
+ end
106
+
107
+ # Split a string by given search needle into an array with split indicators
108
+ #
109
+ # @param [ String ] the string to search (haystack)
110
+ # @param [ String ] needle (search needle)
111
+ # @returns [ Array ] the remaining pieces and needle tags
112
+ #
113
+ def self.informative_split str,needle
114
+ splitted = str.split(needle)
115
+ if str.index(needle) > 0
116
+ if splitted.size & 1 == 0
117
+ splitted.map{ |m| [m,:needle] }.flatten[0..-2]
118
+ else
119
+ splitted.map{ |m| [m,:needle] }.flatten
120
+ end
121
+ else
122
+ if splitted.size > 0
123
+ splitted.map{ |m| [m,:needle] }.flatten[1..-2]
124
+ elsif str == needle
125
+ [ :needle ]
126
+ else
127
+ []
128
+ end
129
+ end
130
+ end
131
+
132
+ # Test if a informative split contains needles on word boundaries
133
+ #
134
+ # @param [ Array ] the splitted string
135
+ # @returns [ Boolean ] true if so
136
+ #
137
+ def self.has_needle_on_word_boundary? splitted
138
+ splitted.each_with_index do |split,i|
139
+ if split.kind_of? String
140
+ word_bound = i == 0 ? split[-1] == ' ' : split[0] == ' ' || split[-1] == ' '
141
+ return true if word_bound
142
+ end
143
+ end
144
+ false
145
+ end
146
+
88
147
  # Find a reduction rule for the string
89
148
  #
90
149
  # @param [ String ] string A plain English string, or a plain English string with reductions in it.
@@ -92,7 +151,17 @@ module Linguify
92
151
  def find_rule str
93
152
  found = Linguify.rules.select do |rule|
94
153
  if rule[:match] =~ str
95
- true
154
+ # ok, it matched, but only alow matches with word boundaries
155
+ match = rule[:match].match(str).to_a
156
+ if match.size == 1
157
+ # one match means the search space contains just the needle, so its a perfect match
158
+ true
159
+ else
160
+ # multiple matches, check if the needle is found on word boundaries
161
+ raise "uh?" unless match.size == 2
162
+ needle = match[1]
163
+ Linguified.has_needle_on_word_boundary? Linguified.informative_split(str,needle)
164
+ end
96
165
  else
97
166
  false
98
167
  end
data/linguify.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "linguify"
8
- s.version = "0.4.1"
8
+ s.version = "0.5.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Patrick Hanevold"]
12
- s.date = "2011-12-11"
12
+ s.date = "2012-01-13"
13
13
  s.description = "Linguify is a linguistic compiler allowing you to compile and execute plain english."
14
14
  s.email = "patrick.hanevold@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -25,6 +25,20 @@ require 'linguify'
25
25
 
26
26
  describe Linguify::Linguified, "#linguify" do
27
27
 
28
+ it "finds words in sentences" do
29
+ Linguify::Linguified.informative_split("I fight for the users","users").should == ["I fight for the ", :needle]
30
+ Linguify::Linguified.informative_split("I fight for the users","for the").should == ["I fight ", :needle, " users"]
31
+ Linguify::Linguified.informative_split("I fight for the users","t for the u").should == ["I figh", :needle, "sers"]
32
+ Linguify::Linguified.informative_split("I fight for the users","I").should == [:needle, " fight for the users"]
33
+ end
34
+
35
+ it "respects word boundaries" do
36
+ Linguify::Linguified.has_needle_on_word_boundary?(Linguify::Linguified.informative_split("I fight for the users","users")).should == true
37
+ Linguify::Linguified.has_needle_on_word_boundary?(Linguify::Linguified.informative_split("I fight for the users","for the")).should == true
38
+ Linguify::Linguified.has_needle_on_word_boundary?(Linguify::Linguified.informative_split("I fight for the users","t for the u")).should == false
39
+ Linguify::Linguified.has_needle_on_word_boundary?(Linguify::Linguified.informative_split("I fight for the users","I")).should == true
40
+ end
41
+
28
42
  it "should reduce multiple rules into ruby code" do
29
43
 
30
44
  reduce /all directories/ => 'directories' do
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: linguify
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.4.1
5
+ version: 0.5.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - Patrick Hanevold
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-12-11 00:00:00 Z
13
+ date: 2012-01-13 00:00:00 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: sourcify
@@ -115,7 +115,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
115
115
  requirements:
116
116
  - - ">="
117
117
  - !ruby/object:Gem::Version
118
- hash: -1257919837541355305
118
+ hash: 2420952439541187421
119
119
  segments:
120
120
  - 0
121
121
  version: "0"