perseus_match 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to perseus_match version 0.0.5
5
+ This documentation refers to perseus_match version 0.0.6
6
6
 
7
7
 
8
8
  == DESCRIPTION
data/bin/perseus_match CHANGED
@@ -8,6 +8,7 @@ require 'set'
8
8
  require 'rubygems'
9
9
  require 'nuggets/enumerable/minmax'
10
10
  require 'nuggets/numeric/duration'
11
+ require 'nuggets/string/evaluate'
11
12
 
12
13
  $: << File.join(File.dirname(__FILE__), '..', 'lib')
13
14
 
@@ -26,6 +27,7 @@ options = {
26
27
  :minimal => false,
27
28
  :separate => false,
28
29
  :lingo => false,
30
+ :format => nil,
29
31
  :check => false,
30
32
  :failed_only => false,
31
33
  :align => false,
@@ -84,6 +86,10 @@ OptionParser.new { |opts|
84
86
  options[:lingo] = true
85
87
  }
86
88
 
89
+ opts.on('-F', '--format FORMAT', 'Custom output format. Available placeholders:', ' %p = phrase', ' %P = phrase, CSV-ready', ' %t = target', ' %T = target, CSV-ready', ' %d = distance', ' %s = similarity') { |f|
90
+ options[:format] = f
91
+ }
92
+
87
93
  opts.separator ' '
88
94
  opts.separator ' * Checking pairs'
89
95
  opts.separator ' '
@@ -265,10 +271,37 @@ action = if options[:check]
265
271
  _action
266
272
  end
267
273
  else
268
- format =
269
- options[:lingo] ? lambda { |pm| "#{pm.phrase}*#{pm.target}" } :
270
- options[:sort] ? lambda { |pm| " #{[pm.target, pm.distance, pm.similarity].inspect}" } :
271
- lambda { |pm| [pm.phrase, pm.target, pm.distance, pm.similarity].inspect }
274
+ format = if _format = options[:format]
275
+ substitutions = {
276
+ 'p' => ['#{pm.phrase}', 's'],
277
+ 'P' => ['"#{pm.phrase.gsub(/"/, %q{""})}"', 's'],
278
+ 't' => ['#{pm.target}', 's'],
279
+ 'T' => ['"#{pm.target.gsub(/"/, %q{""})}"', 's'],
280
+ 'd' => ['#{pm.distance}', 'd'],
281
+ 's' => ['#{pm.similarity}', 'f']
282
+ }
283
+
284
+ lambda { |pm|
285
+ _format.gsub(/(%-?[.\d]*)([pPtTds])/) {
286
+ value, field = substitutions[$2]
287
+ "#{$1}#{field}" % value.evaluate(binding)
288
+ }
289
+ }
290
+ else
291
+ if options[:lingo]
292
+ if options[:minimal]
293
+ lambda { |pm| ["#{pm.phrase}*#{pm.target}", "#{pm.target}*#{pm.phrase}"] }
294
+ else
295
+ lambda { |pm| "#{pm.phrase}*#{pm.target}" }
296
+ end
297
+ else
298
+ if options[:sort]
299
+ lambda { |pm| " #{[pm.target, pm.distance, pm.similarity].inspect}" }
300
+ else
301
+ lambda { |pm| [pm.phrase, pm.target, pm.distance, pm.similarity].inspect }
302
+ end
303
+ end
304
+ end
272
305
 
273
306
  if options[:sort]
274
307
  lambda {
@@ -290,13 +323,14 @@ else
290
323
  PerseusMatch::List.pair(phrases, pm_options, list_options) { |pm|
291
324
  count_all += 1
292
325
 
293
- if separator && pm.phrase != previous_phrase ||= pm.phrase
294
- puts separator
295
- previous_phrase = pm.phrase
296
- end
297
-
298
326
  if pm.similarity >= threshold
299
327
  count += 1
328
+
329
+ if separator && pm.phrase != previous_phrase ||= pm.phrase
330
+ puts separator
331
+ previous_phrase = pm.phrase
332
+ end
333
+
300
334
  puts format[pm]
301
335
  end
302
336
  }
@@ -81,9 +81,13 @@ class PerseusMatch
81
81
  def self.tokenize(form, unknowns = false)
82
82
  return @tokens[form] if @tokens
83
83
 
84
- @_tokens, @tokens = {}, Hash.new { |h, k| h[k] = new(
85
- k, (@_tokens[k] || []) | k.scan(/\w+/).map { |i| @_tokens[i] }.flatten.compact
86
- )}
84
+ @_tokens, @tokens = {}, Hash.new { |h, k|
85
+ h[k] = new(
86
+ k, (@_tokens[k] || []) | (
87
+ k.scan(/\w+/) + k.scan(/[\w-]+/)
88
+ ).map { |i| @_tokens[i] }.flatten.compact
89
+ )
90
+ }
87
91
 
88
92
  parse = lambda { |x|
89
93
  x.each_line { |res|
@@ -130,10 +134,12 @@ class PerseusMatch
130
134
  file = temp.path
131
135
  end
132
136
 
137
+ ruby = Config::CONFIG.values_at('RUBY_INSTALL_NAME', 'EXEEXT').join
138
+
133
139
  begin
134
- Dir.chdir(LINGO_BASE) { parse[%x{
135
- #{Config::CONFIG['ruby_install_name']} lingo.rb -c "#{cfg.path}" < "#{file}"
136
- }] }
140
+ Dir.chdir(LINGO_BASE) {
141
+ parse[%x{#{ruby} lingo.rb -c "#{cfg.path}" < "#{file}"}]
142
+ }
137
143
  ensure
138
144
  cfg.unlink
139
145
  temp.unlink if temp
@@ -4,7 +4,7 @@ class PerseusMatch
4
4
 
5
5
  MAJOR = 0
6
6
  MINOR = 0
7
- TINY = 5
7
+ TINY = 6
8
8
 
9
9
  class << self
10
10
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: perseus_match
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-01-22 00:00:00 +01:00
12
+ date: 2009-01-26 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency