blackwinter-perseus_match 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to perseus_match version 0.0.4
5
+ This documentation refers to perseus_match version 0.0.6
6
6
 
7
7
 
8
8
  == DESCRIPTION
@@ -8,6 +8,7 @@ require 'set'
8
8
  require 'rubygems'
9
9
  require 'nuggets/enumerable/minmax'
10
10
  require 'nuggets/numeric/duration'
11
+ require 'nuggets/string/evaluate'
11
12
 
12
13
  $: << File.join(File.dirname(__FILE__), '..', 'lib')
13
14
 
@@ -26,6 +27,7 @@ options = {
26
27
  :minimal => false,
27
28
  :separate => false,
28
29
  :lingo => false,
30
+ :format => nil,
29
31
  :check => false,
30
32
  :failed_only => false,
31
33
  :align => false,
@@ -84,6 +86,10 @@ OptionParser.new { |opts|
84
86
  options[:lingo] = true
85
87
  }
86
88
 
89
+ opts.on('-F', '--format FORMAT', 'Custom output format. Available placeholders:', ' %p = phrase', ' %P = phrase, CSV-ready', ' %t = target', ' %T = target, CSV-ready', ' %d = distance', ' %s = similarity') { |f|
90
+ options[:format] = f
91
+ }
92
+
87
93
  opts.separator ' '
88
94
  opts.separator ' * Checking pairs'
89
95
  opts.separator ' '
@@ -265,10 +271,37 @@ action = if options[:check]
265
271
  _action
266
272
  end
267
273
  else
268
- format =
269
- options[:lingo] ? lambda { |pm| "#{pm.phrase}*#{pm.target}" } :
270
- options[:sort] ? lambda { |pm| " #{[pm.target, pm.distance, pm.similarity].inspect}" } :
271
- lambda { |pm| [pm.phrase, pm.target, pm.distance, pm.similarity].inspect }
274
+ format = if _format = options[:format]
275
+ substitutions = {
276
+ 'p' => ['#{pm.phrase}', 's'],
277
+ 'P' => ['"#{pm.phrase.gsub(/"/, %q{""})}"', 's'],
278
+ 't' => ['#{pm.target}', 's'],
279
+ 'T' => ['"#{pm.target.gsub(/"/, %q{""})}"', 's'],
280
+ 'd' => ['#{pm.distance}', 'd'],
281
+ 's' => ['#{pm.similarity}', 'f']
282
+ }
283
+
284
+ lambda { |pm|
285
+ _format.gsub(/(%-?[.\d]*)([pPtTds])/) {
286
+ value, field = substitutions[$2]
287
+ "#{$1}#{field}" % value.evaluate(binding)
288
+ }
289
+ }
290
+ else
291
+ if options[:lingo]
292
+ if options[:minimal]
293
+ lambda { |pm| ["#{pm.phrase}*#{pm.target}", "#{pm.target}*#{pm.phrase}"] }
294
+ else
295
+ lambda { |pm| "#{pm.phrase}*#{pm.target}" }
296
+ end
297
+ else
298
+ if options[:sort]
299
+ lambda { |pm| " #{[pm.target, pm.distance, pm.similarity].inspect}" }
300
+ else
301
+ lambda { |pm| [pm.phrase, pm.target, pm.distance, pm.similarity].inspect }
302
+ end
303
+ end
304
+ end
272
305
 
273
306
  if options[:sort]
274
307
  lambda {
@@ -290,13 +323,14 @@ else
290
323
  PerseusMatch::List.pair(phrases, pm_options, list_options) { |pm|
291
324
  count_all += 1
292
325
 
293
- if separator && pm.phrase != previous_phrase ||= pm.phrase
294
- puts separator
295
- previous_phrase = pm.phrase
296
- end
297
-
298
326
  if pm.similarity >= threshold
299
327
  count += 1
328
+
329
+ if separator && pm.phrase != previous_phrase ||= pm.phrase
330
+ puts separator
331
+ previous_phrase = pm.phrase
332
+ end
333
+
300
334
  puts format[pm]
301
335
  end
302
336
  }
@@ -26,7 +26,7 @@
26
26
  ###############################################################################
27
27
  #++
28
28
 
29
- $KCODE = 'u'
29
+ $KCODE = 'u' unless RUBY_VERSION >= '1.9'
30
30
 
31
31
  require 'pathname'
32
32
  require 'rbconfig'
@@ -81,12 +81,16 @@ class PerseusMatch
81
81
  def self.tokenize(form, unknowns = false)
82
82
  return @tokens[form] if @tokens
83
83
 
84
- @_tokens, @tokens = {}, Hash.new { |h, k| h[k] = new(
85
- k, (@_tokens[k] || []) | k.scan(/\w+/).map { |i| @_tokens[i] }.flatten.compact
86
- )}
84
+ @_tokens, @tokens = {}, Hash.new { |h, k|
85
+ h[k] = new(
86
+ k, (@_tokens[k] || []) | (
87
+ k.scan(/\w+/) + k.scan(/[\w-]+/)
88
+ ).map { |i| @_tokens[i] }.flatten.compact
89
+ )
90
+ }
87
91
 
88
92
  parse = lambda { |x|
89
- x.each { |res|
93
+ x.each_line { |res|
90
94
  case res
91
95
  when /<(.*?)\s=\s\[(.*)\]>/
92
96
  a, b = $1, $2
@@ -130,10 +134,12 @@ class PerseusMatch
130
134
  file = temp.path
131
135
  end
132
136
 
137
+ ruby = Config::CONFIG.values_at('RUBY_INSTALL_NAME', 'EXEEXT').join
138
+
133
139
  begin
134
- Dir.chdir(LINGO_BASE) { parse[%x{
135
- #{Config::CONFIG['ruby_install_name']} lingo.rb -c "#{cfg.path}" < "#{file}"
136
- }] }
140
+ Dir.chdir(LINGO_BASE) {
141
+ parse[%x{#{ruby} lingo.rb -c "#{cfg.path}" < "#{file}"}]
142
+ }
137
143
  ensure
138
144
  cfg.unlink
139
145
  temp.unlink if temp
@@ -4,7 +4,7 @@ class PerseusMatch
4
4
 
5
5
  MAJOR = 0
6
6
  MINOR = 0
7
- TINY = 4
7
+ TINY = 6
8
8
 
9
9
  class << self
10
10
 
@@ -3,14 +3,14 @@ describe PerseusMatch::Cluster do
3
3
  it 'should accept limit option in sort_by' do
4
4
  PerseusMatch::Cluster.new(%w[foo bar]).sort_by(:similarity, :limit => 1).all? { |phrase, matches|
5
5
  matches.size.should == 1
6
- matches.size.should == matches.nitems
6
+ matches.should_not include(nil)
7
7
  }
8
8
  end
9
9
 
10
10
  it 'should accept threshold option in sort_by (1a)' do
11
11
  PerseusMatch::Cluster.new(%w[foo bar]).sort_by(:similarity, :threshold => 0.1).all? { |phrase, matches|
12
12
  matches.size.should == 1
13
- matches.size.should == matches.nitems
13
+ matches.should_not include(nil)
14
14
  matches.each { |match| match.target.should == phrase }
15
15
  }
16
16
  end
@@ -18,28 +18,28 @@ describe PerseusMatch::Cluster do
18
18
  it 'should accept threshold option in sort_by (1b)' do
19
19
  PerseusMatch::Cluster.new(%w[foo bar]).sort_by(:similarity, :threshold => 0).all? { |phrase, matches|
20
20
  matches.size.should == 2
21
- matches.size.should == matches.nitems
21
+ matches.should_not include(nil)
22
22
  }
23
23
  end
24
24
 
25
25
  it 'should accept threshold option in sort_by (2)' do
26
26
  PerseusMatch::Cluster.new(%w[foo bar]).sort_by(:target, :threshold => 'c').all? { |phrase, matches|
27
27
  matches.size.should == 1
28
- matches.size.should == matches.nitems
28
+ matches.should_not include(nil)
29
29
  }
30
30
  end
31
31
 
32
32
  it 'should accept both limit and threshold options in sort_by (1)' do
33
33
  PerseusMatch::Cluster.new(%w[foo bar]).sort_by(:target, :threshold => 'z', :limit => 1).all? { |phrase, matches|
34
34
  matches.size.should == 1
35
- matches.size.should == matches.nitems
35
+ matches.should_not include(nil)
36
36
  }
37
37
  end
38
38
 
39
39
  it 'should accept both limit and threshold options in sort_by (2)' do
40
40
  PerseusMatch::Cluster.new(%w[foo bar]).sort_by(:target, :threshold => 'a', :limit => 1).all? { |phrase, matches|
41
41
  matches.size.should be_zero
42
- matches.size.should == matches.nitems
42
+ matches.should_not include(nil)
43
43
  }
44
44
  end
45
45
 
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+
1
3
  require 'rubygems'
2
4
  require 'nuggets/tempfile/open'
3
5
  require 'nuggets/util/i18n'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: blackwinter-perseus_match
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-01-13 00:00:00 -08:00
12
+ date: 2009-01-26 00:00:00 -08:00
13
13
  default_executable: perseus_match
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency