RubyGems - perseus_match - Versions diffs - 0.0.5 → 0.0.6 - Mend

perseus_match 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

data/README +1 -1
data/bin/perseus_match +43 -9
data/lib/perseus_match/token_set.rb +12 -6
data/lib/perseus_match/version.rb +1 -1
metadata +2 -2

data/README CHANGED Viewed

@@ -2,7 +2,7 @@
 == VERSION
-This documentation refers to perseus_match version 0.0.5
+This documentation refers to perseus_match version 0.0.6
 == DESCRIPTION

data/bin/perseus_match CHANGED Viewed

@@ -8,6 +8,7 @@ require 'set'
 require 'rubygems'
 require 'nuggets/enumerable/minmax'
 require 'nuggets/numeric/duration'
+require 'nuggets/string/evaluate'
 $: << File.join(File.dirname(__FILE__), '..', 'lib')
@@ -26,6 +27,7 @@ options = {
   :minimal      => false,
   :separate     => false,
   :lingo        => false,
+  :format       => nil,
   :check        => false,
   :failed_only  => false,
   :align        => false,
@@ -84,6 +86,10 @@ OptionParser.new { |opts|
     options[:lingo] = true
   }
+  opts.on('-F', '--format FORMAT', 'Custom output format. Available placeholders:', '  %p = phrase', '  %P = phrase, CSV-ready', '  %t = target', '  %T = target, CSV-ready', '  %d = distance', '  %s = similarity') { |f|
+    options[:format] = f
+  }
   opts.separator ' '
   opts.separator '  * Checking pairs'
   opts.separator ' '
@@ -265,10 +271,37 @@ action = if options[:check]
     _action
   end
 else
-  format =
-    options[:lingo] ? lambda { |pm| "#{pm.phrase}*#{pm.target}" } :
-    options[:sort]  ? lambda { |pm| "  #{[pm.target, pm.distance, pm.similarity].inspect}" } :
-                      lambda { |pm| [pm.phrase, pm.target, pm.distance, pm.similarity].inspect }
+  format = if _format = options[:format]
+    substitutions = {
+      'p' => ['#{pm.phrase}',                     's'],
+      'P' => ['"#{pm.phrase.gsub(/"/, %q{""})}"', 's'],
+      't' => ['#{pm.target}',                     's'],
+      'T' => ['"#{pm.target.gsub(/"/, %q{""})}"', 's'],
+      'd' => ['#{pm.distance}',                   'd'],
+      's' => ['#{pm.similarity}',                 'f']
+    }
+    lambda { |pm|
+      _format.gsub(/(%-?[.\d]*)([pPtTds])/) {
+        value, field = substitutions[$2]
+        "#{$1}#{field}" % value.evaluate(binding)
+      }
+    }
+  else
+    if options[:lingo]
+      if options[:minimal]
+        lambda { |pm| ["#{pm.phrase}*#{pm.target}", "#{pm.target}*#{pm.phrase}"] }
+      else
+        lambda { |pm| "#{pm.phrase}*#{pm.target}" }
+      end
+    else
+      if options[:sort]
+        lambda { |pm| "  #{[pm.target, pm.distance, pm.similarity].inspect}" }
+      else
+        lambda { |pm| [pm.phrase, pm.target, pm.distance, pm.similarity].inspect }
+      end
+    end
+  end
   if options[:sort]
     lambda {
@@ -290,13 +323,14 @@ else
       PerseusMatch::List.pair(phrases, pm_options, list_options) { |pm|
         count_all += 1
-        if separator && pm.phrase != previous_phrase ||= pm.phrase
-          puts separator
-          previous_phrase = pm.phrase
-        end
         if pm.similarity >= threshold
           count += 1
+          if separator && pm.phrase != previous_phrase ||= pm.phrase
+            puts separator
+            previous_phrase = pm.phrase
+          end
           puts format[pm]
         end
       }

data/lib/perseus_match/token_set.rb CHANGED Viewed

@@ -81,9 +81,13 @@ class PerseusMatch
     def self.tokenize(form, unknowns = false)
       return @tokens[form] if @tokens
-      @_tokens, @tokens = {}, Hash.new { |h, k| h[k] = new(
-        k, (@_tokens[k] || []) | k.scan(/\w+/).map { |i| @_tokens[i] }.flatten.compact
-      )}
+      @_tokens, @tokens = {}, Hash.new { |h, k|
+        h[k] = new(
+          k, (@_tokens[k] || []) | (
+            k.scan(/\w+/) + k.scan(/[\w-]+/)
+          ).map { |i| @_tokens[i] }.flatten.compact
+        )
+      }
       parse = lambda { |x|
         x.each_line { |res|
@@ -130,10 +134,12 @@ class PerseusMatch
           file = temp.path
         end
+        ruby = Config::CONFIG.values_at('RUBY_INSTALL_NAME', 'EXEEXT').join
         begin
-          Dir.chdir(LINGO_BASE) { parse[%x{
-            #{Config::CONFIG['ruby_install_name']} lingo.rb -c "#{cfg.path}" < "#{file}"
-          }] }
+          Dir.chdir(LINGO_BASE) {
+            parse[%x{#{ruby} lingo.rb -c "#{cfg.path}" < "#{file}"}]
+          }
         ensure
           cfg.unlink
           temp.unlink if temp

data/lib/perseus_match/version.rb CHANGED Viewed

@@ -4,7 +4,7 @@ class PerseusMatch
     MAJOR = 0
     MINOR = 0
-    TINY  = 5
+    TINY  = 6
     class << self

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: perseus_match
 version: !ruby/object:Gem::Version
-  version: 0.0.5
+  version: 0.0.6
 platform: ruby
 authors:
 - Jens Wille
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-01-22 00:00:00 +01:00
+date: 2009-01-26 00:00:00 +01:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency