perseus_match 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to perseus_match version 0.0.5
5
+ This documentation refers to perseus_match version 0.0.6
6
6
 
7
7
 
8
8
  == DESCRIPTION
data/bin/perseus_match CHANGED
@@ -8,6 +8,7 @@ require 'set'
8
8
  require 'rubygems'
9
9
  require 'nuggets/enumerable/minmax'
10
10
  require 'nuggets/numeric/duration'
11
+ require 'nuggets/string/evaluate'
11
12
 
12
13
  $: << File.join(File.dirname(__FILE__), '..', 'lib')
13
14
 
@@ -26,6 +27,7 @@ options = {
26
27
  :minimal => false,
27
28
  :separate => false,
28
29
  :lingo => false,
30
+ :format => nil,
29
31
  :check => false,
30
32
  :failed_only => false,
31
33
  :align => false,
@@ -84,6 +86,10 @@ OptionParser.new { |opts|
84
86
  options[:lingo] = true
85
87
  }
86
88
 
89
+ opts.on('-F', '--format FORMAT', 'Custom output format. Available placeholders:', ' %p = phrase', ' %P = phrase, CSV-ready', ' %t = target', ' %T = target, CSV-ready', ' %d = distance', ' %s = similarity') { |f|
90
+ options[:format] = f
91
+ }
92
+
87
93
  opts.separator ' '
88
94
  opts.separator ' * Checking pairs'
89
95
  opts.separator ' '
@@ -265,10 +271,37 @@ action = if options[:check]
265
271
  _action
266
272
  end
267
273
  else
268
- format =
269
- options[:lingo] ? lambda { |pm| "#{pm.phrase}*#{pm.target}" } :
270
- options[:sort] ? lambda { |pm| " #{[pm.target, pm.distance, pm.similarity].inspect}" } :
271
- lambda { |pm| [pm.phrase, pm.target, pm.distance, pm.similarity].inspect }
274
+ format = if _format = options[:format]
275
+ substitutions = {
276
+ 'p' => ['#{pm.phrase}', 's'],
277
+ 'P' => ['"#{pm.phrase.gsub(/"/, %q{""})}"', 's'],
278
+ 't' => ['#{pm.target}', 's'],
279
+ 'T' => ['"#{pm.target.gsub(/"/, %q{""})}"', 's'],
280
+ 'd' => ['#{pm.distance}', 'd'],
281
+ 's' => ['#{pm.similarity}', 'f']
282
+ }
283
+
284
+ lambda { |pm|
285
+ _format.gsub(/(%-?[.\d]*)([pPtTds])/) {
286
+ value, field = substitutions[$2]
287
+ "#{$1}#{field}" % value.evaluate(binding)
288
+ }
289
+ }
290
+ else
291
+ if options[:lingo]
292
+ if options[:minimal]
293
+ lambda { |pm| ["#{pm.phrase}*#{pm.target}", "#{pm.target}*#{pm.phrase}"] }
294
+ else
295
+ lambda { |pm| "#{pm.phrase}*#{pm.target}" }
296
+ end
297
+ else
298
+ if options[:sort]
299
+ lambda { |pm| " #{[pm.target, pm.distance, pm.similarity].inspect}" }
300
+ else
301
+ lambda { |pm| [pm.phrase, pm.target, pm.distance, pm.similarity].inspect }
302
+ end
303
+ end
304
+ end
272
305
 
273
306
  if options[:sort]
274
307
  lambda {
@@ -290,13 +323,14 @@ else
290
323
  PerseusMatch::List.pair(phrases, pm_options, list_options) { |pm|
291
324
  count_all += 1
292
325
 
293
- if separator && pm.phrase != previous_phrase ||= pm.phrase
294
- puts separator
295
- previous_phrase = pm.phrase
296
- end
297
-
298
326
  if pm.similarity >= threshold
299
327
  count += 1
328
+
329
+ if separator && pm.phrase != previous_phrase ||= pm.phrase
330
+ puts separator
331
+ previous_phrase = pm.phrase
332
+ end
333
+
300
334
  puts format[pm]
301
335
  end
302
336
  }
@@ -81,9 +81,13 @@ class PerseusMatch
81
81
  def self.tokenize(form, unknowns = false)
82
82
  return @tokens[form] if @tokens
83
83
 
84
- @_tokens, @tokens = {}, Hash.new { |h, k| h[k] = new(
85
- k, (@_tokens[k] || []) | k.scan(/\w+/).map { |i| @_tokens[i] }.flatten.compact
86
- )}
84
+ @_tokens, @tokens = {}, Hash.new { |h, k|
85
+ h[k] = new(
86
+ k, (@_tokens[k] || []) | (
87
+ k.scan(/\w+/) + k.scan(/[\w-]+/)
88
+ ).map { |i| @_tokens[i] }.flatten.compact
89
+ )
90
+ }
87
91
 
88
92
  parse = lambda { |x|
89
93
  x.each_line { |res|
@@ -130,10 +134,12 @@ class PerseusMatch
130
134
  file = temp.path
131
135
  end
132
136
 
137
+ ruby = Config::CONFIG.values_at('RUBY_INSTALL_NAME', 'EXEEXT').join
138
+
133
139
  begin
134
- Dir.chdir(LINGO_BASE) { parse[%x{
135
- #{Config::CONFIG['ruby_install_name']} lingo.rb -c "#{cfg.path}" < "#{file}"
136
- }] }
140
+ Dir.chdir(LINGO_BASE) {
141
+ parse[%x{#{ruby} lingo.rb -c "#{cfg.path}" < "#{file}"}]
142
+ }
137
143
  ensure
138
144
  cfg.unlink
139
145
  temp.unlink if temp
@@ -4,7 +4,7 @@ class PerseusMatch
4
4
 
5
5
  MAJOR = 0
6
6
  MINOR = 0
7
- TINY = 5
7
+ TINY = 6
8
8
 
9
9
  class << self
10
10
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: perseus_match
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-01-22 00:00:00 +01:00
12
+ date: 2009-01-26 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency