blackwinter-perseus_match 0.0.4 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to perseus_match version 0.0.4
5
+ This documentation refers to perseus_match version 0.0.6
6
6
 
7
7
 
8
8
  == DESCRIPTION
@@ -8,6 +8,7 @@ require 'set'
8
8
  require 'rubygems'
9
9
  require 'nuggets/enumerable/minmax'
10
10
  require 'nuggets/numeric/duration'
11
+ require 'nuggets/string/evaluate'
11
12
 
12
13
  $: << File.join(File.dirname(__FILE__), '..', 'lib')
13
14
 
@@ -26,6 +27,7 @@ options = {
26
27
  :minimal => false,
27
28
  :separate => false,
28
29
  :lingo => false,
30
+ :format => nil,
29
31
  :check => false,
30
32
  :failed_only => false,
31
33
  :align => false,
@@ -84,6 +86,10 @@ OptionParser.new { |opts|
84
86
  options[:lingo] = true
85
87
  }
86
88
 
89
+ opts.on('-F', '--format FORMAT', 'Custom output format. Available placeholders:', ' %p = phrase', ' %P = phrase, CSV-ready', ' %t = target', ' %T = target, CSV-ready', ' %d = distance', ' %s = similarity') { |f|
90
+ options[:format] = f
91
+ }
92
+
87
93
  opts.separator ' '
88
94
  opts.separator ' * Checking pairs'
89
95
  opts.separator ' '
@@ -265,10 +271,37 @@ action = if options[:check]
265
271
  _action
266
272
  end
267
273
  else
268
- format =
269
- options[:lingo] ? lambda { |pm| "#{pm.phrase}*#{pm.target}" } :
270
- options[:sort] ? lambda { |pm| " #{[pm.target, pm.distance, pm.similarity].inspect}" } :
271
- lambda { |pm| [pm.phrase, pm.target, pm.distance, pm.similarity].inspect }
274
+ format = if _format = options[:format]
275
+ substitutions = {
276
+ 'p' => ['#{pm.phrase}', 's'],
277
+ 'P' => ['"#{pm.phrase.gsub(/"/, %q{""})}"', 's'],
278
+ 't' => ['#{pm.target}', 's'],
279
+ 'T' => ['"#{pm.target.gsub(/"/, %q{""})}"', 's'],
280
+ 'd' => ['#{pm.distance}', 'd'],
281
+ 's' => ['#{pm.similarity}', 'f']
282
+ }
283
+
284
+ lambda { |pm|
285
+ _format.gsub(/(%-?[.\d]*)([pPtTds])/) {
286
+ value, field = substitutions[$2]
287
+ "#{$1}#{field}" % value.evaluate(binding)
288
+ }
289
+ }
290
+ else
291
+ if options[:lingo]
292
+ if options[:minimal]
293
+ lambda { |pm| ["#{pm.phrase}*#{pm.target}", "#{pm.target}*#{pm.phrase}"] }
294
+ else
295
+ lambda { |pm| "#{pm.phrase}*#{pm.target}" }
296
+ end
297
+ else
298
+ if options[:sort]
299
+ lambda { |pm| " #{[pm.target, pm.distance, pm.similarity].inspect}" }
300
+ else
301
+ lambda { |pm| [pm.phrase, pm.target, pm.distance, pm.similarity].inspect }
302
+ end
303
+ end
304
+ end
272
305
 
273
306
  if options[:sort]
274
307
  lambda {
@@ -290,13 +323,14 @@ else
290
323
  PerseusMatch::List.pair(phrases, pm_options, list_options) { |pm|
291
324
  count_all += 1
292
325
 
293
- if separator && pm.phrase != previous_phrase ||= pm.phrase
294
- puts separator
295
- previous_phrase = pm.phrase
296
- end
297
-
298
326
  if pm.similarity >= threshold
299
327
  count += 1
328
+
329
+ if separator && pm.phrase != previous_phrase ||= pm.phrase
330
+ puts separator
331
+ previous_phrase = pm.phrase
332
+ end
333
+
300
334
  puts format[pm]
301
335
  end
302
336
  }
@@ -26,7 +26,7 @@
26
26
  ###############################################################################
27
27
  #++
28
28
 
29
- $KCODE = 'u'
29
+ $KCODE = 'u' unless RUBY_VERSION >= '1.9'
30
30
 
31
31
  require 'pathname'
32
32
  require 'rbconfig'
@@ -81,12 +81,16 @@ class PerseusMatch
81
81
  def self.tokenize(form, unknowns = false)
82
82
  return @tokens[form] if @tokens
83
83
 
84
- @_tokens, @tokens = {}, Hash.new { |h, k| h[k] = new(
85
- k, (@_tokens[k] || []) | k.scan(/\w+/).map { |i| @_tokens[i] }.flatten.compact
86
- )}
84
+ @_tokens, @tokens = {}, Hash.new { |h, k|
85
+ h[k] = new(
86
+ k, (@_tokens[k] || []) | (
87
+ k.scan(/\w+/) + k.scan(/[\w-]+/)
88
+ ).map { |i| @_tokens[i] }.flatten.compact
89
+ )
90
+ }
87
91
 
88
92
  parse = lambda { |x|
89
- x.each { |res|
93
+ x.each_line { |res|
90
94
  case res
91
95
  when /<(.*?)\s=\s\[(.*)\]>/
92
96
  a, b = $1, $2
@@ -130,10 +134,12 @@ class PerseusMatch
130
134
  file = temp.path
131
135
  end
132
136
 
137
+ ruby = Config::CONFIG.values_at('RUBY_INSTALL_NAME', 'EXEEXT').join
138
+
133
139
  begin
134
- Dir.chdir(LINGO_BASE) { parse[%x{
135
- #{Config::CONFIG['ruby_install_name']} lingo.rb -c "#{cfg.path}" < "#{file}"
136
- }] }
140
+ Dir.chdir(LINGO_BASE) {
141
+ parse[%x{#{ruby} lingo.rb -c "#{cfg.path}" < "#{file}"}]
142
+ }
137
143
  ensure
138
144
  cfg.unlink
139
145
  temp.unlink if temp
@@ -4,7 +4,7 @@ class PerseusMatch
4
4
 
5
5
  MAJOR = 0
6
6
  MINOR = 0
7
- TINY = 4
7
+ TINY = 6
8
8
 
9
9
  class << self
10
10
 
@@ -3,14 +3,14 @@ describe PerseusMatch::Cluster do
3
3
  it 'should accept limit option in sort_by' do
4
4
  PerseusMatch::Cluster.new(%w[foo bar]).sort_by(:similarity, :limit => 1).all? { |phrase, matches|
5
5
  matches.size.should == 1
6
- matches.size.should == matches.nitems
6
+ matches.should_not include(nil)
7
7
  }
8
8
  end
9
9
 
10
10
  it 'should accept threshold option in sort_by (1a)' do
11
11
  PerseusMatch::Cluster.new(%w[foo bar]).sort_by(:similarity, :threshold => 0.1).all? { |phrase, matches|
12
12
  matches.size.should == 1
13
- matches.size.should == matches.nitems
13
+ matches.should_not include(nil)
14
14
  matches.each { |match| match.target.should == phrase }
15
15
  }
16
16
  end
@@ -18,28 +18,28 @@ describe PerseusMatch::Cluster do
18
18
  it 'should accept threshold option in sort_by (1b)' do
19
19
  PerseusMatch::Cluster.new(%w[foo bar]).sort_by(:similarity, :threshold => 0).all? { |phrase, matches|
20
20
  matches.size.should == 2
21
- matches.size.should == matches.nitems
21
+ matches.should_not include(nil)
22
22
  }
23
23
  end
24
24
 
25
25
  it 'should accept threshold option in sort_by (2)' do
26
26
  PerseusMatch::Cluster.new(%w[foo bar]).sort_by(:target, :threshold => 'c').all? { |phrase, matches|
27
27
  matches.size.should == 1
28
- matches.size.should == matches.nitems
28
+ matches.should_not include(nil)
29
29
  }
30
30
  end
31
31
 
32
32
  it 'should accept both limit and threshold options in sort_by (1)' do
33
33
  PerseusMatch::Cluster.new(%w[foo bar]).sort_by(:target, :threshold => 'z', :limit => 1).all? { |phrase, matches|
34
34
  matches.size.should == 1
35
- matches.size.should == matches.nitems
35
+ matches.should_not include(nil)
36
36
  }
37
37
  end
38
38
 
39
39
  it 'should accept both limit and threshold options in sort_by (2)' do
40
40
  PerseusMatch::Cluster.new(%w[foo bar]).sort_by(:target, :threshold => 'a', :limit => 1).all? { |phrase, matches|
41
41
  matches.size.should be_zero
42
- matches.size.should == matches.nitems
42
+ matches.should_not include(nil)
43
43
  }
44
44
  end
45
45
 
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+
1
3
  require 'rubygems'
2
4
  require 'nuggets/tempfile/open'
3
5
  require 'nuggets/util/i18n'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: blackwinter-perseus_match
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-01-13 00:00:00 -08:00
12
+ date: 2009-01-26 00:00:00 -08:00
13
13
  default_executable: perseus_match
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency