RubyGems - lumix - Versions diffs - 0.0.1 - Mend

lumix 0.0.1

Files changed (13) hide show

data/COPYING +18 -0
data/bin/lumix +4 -0
data/lib/lumix/concordancer.rb +263 -0
data/lib/lumix/filter.rb +60 -0
data/lib/lumix/gui.rb +148 -0
data/lib/lumix/main.rb +7 -0
data/lib/lumix/result_view.rb +93 -0
data/lib/lumix/schema/001_create_tables.rb +35 -0
data/lib/lumix/schema/002_categories.rb +28 -0
data/lib/lumix/textprocessing.rb +84 -0
data/spec/filter_spec.rb +47 -0
data/spec/text_snippet_spec.rb +52 -0
metadata +108 -0

data/COPYING ADDED Viewed

@@ -0,0 +1,18 @@
+Copyright (c) 2010 Michael Klaus
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to
+deal in the Software without restriction, including without limitation the
+rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+sell copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/bin/lumix ADDED Viewed

@@ -0,0 +1,4 @@
+#!/usr/bin/env jruby
+require 'rubygems'
+require 'lumix/gui'

data/lib/lumix/concordancer.rb ADDED Viewed

@@ -0,0 +1,263 @@
+#!/bin/env ruby
+# TODO take care of 's problem
+# TODO remove Word count line
+require 'rubygems'
+require 'digest/md5'
+require 'sequel'
+require 'sequel/extensions/migration'
+require 'lumix/textprocessing'
+require 'lumix/filter'
+DB_VERSION = 2
+class String
+  def digest
+    return @digest if @digest
+    digest = Digest::MD5.new
+    digest.update self
+    @digest = digest.hexdigest
+  end
+end
+class TextSnippet
+  attr_reader :name, :text, :begin, :end
+  def initialize(name, text, first, last)
+    @name = name
+    @text = text
+    @begin = first
+    @end = last
+  end
+  def to_s
+    cleanup(@text[@begin...@end])
+  end
+  def left(context = 5)
+    @text[0...@begin] =~ /((\S+\s+){0,#{context}}\S*)\z/m
+    cleanup($1)
+  end
+  def right(context = 5)
+    @text[@end..-1] =~ /\A(\S*(\s+\S+){0,#{context}})/m
+    cleanup($1)
+  end
+  def cleanup(txt)
+    txt.gsub(/\s+/, ' ')
+  end
+end
+Progress = Struct.new(:task, :work, :data, :done)
+class Concordancer
+  attr_reader :db, :tp
+  attr_accessor :progress_proc
+  def initialize(db_uri, options = {})
+    @progress_proc = options[:progress_proc]
+    @db = connect(db_uri) do |db|
+      db.tables.each{ |t| db.drop_table t } if options[:recreate]
+    end
+    @ids = db[:texts].map { |v| v[:id] }
+    @tp = TextProcessing.new
+  end
+  def fallback?
+    @fallback
+  end
+  def get_id(file)
+    text = File.read(file)
+    saved = db[:texts][:digest => text.digest]
+    saved ? saved[:id] : nil
+  end
+  def read(*files)
+    files = tp.to_filelist(files)
+    prog = Progress.new(:read, files.size)
+    puts "Reading #{files.size} files"
+    files.each_with_index do |file, index|
+      tp.read_file(file)
+      progress(prog, index + 1)
+    end
+    link
+  end
+  def read_file(file)
+    # read the raw text
+    text = File.read(file)
+    saved = db[:texts][:digest => text.digest]
+    unless saved
+      # retrieve the tagged version
+      tagged_file = create_tagged_filename(file)
+      process_file(file, tagged_file) unless File.exists?(tagged_file)
+      tagged = retag(File.read(tagged_file))
+      id = db[:texts].insert(:digest => text.digest, :text => text, :tagged => tagged, :filename => file, :tagged_filename => tagged_file)
+      @ids << id
+    end
+  end
+  def all
+    db[:texts].select(:id).map{|v| v.values}
+  end
+  def link!(*ids)
+    link(*ids) do |ds|
+      ds.delete
+    end
+  end
+  def link(*ids)
+    ids = all if ids.empty?
+    ids.flatten!
+    prog = Progress.new(:link, ids.size)
+    progress(prog)
+    ids.each_with_index do |id, index|
+      ds = db[:assoc].filter(:text_id => id)
+      yield ds if block_given?
+      link_text(id) if ds.empty?
+      progress(prog, index + 1)
+    end
+  end
+  def find(filter)
+    texts = db[:texts]
+    prog = Progress.new(:search, texts.count, filter)
+    progress(prog)
+    re = Filter.to_re(filter)
+    index = 0
+    texts.inject(0) do |result, t|
+      t_id, text, tagged = t[:id], t[:text], t[:tagged]
+      # matches to ranges
+      results = []
+      tagged.scan(re) do |hit|
+        t_begin = $~.begin(0)
+        t_end = $~.end(0)
+        # TODO decouple database operations for performance
+        results << find_range(t_id, t_begin, t_end)
+      end
+      result += results.inject(0) do |result, f|
+        text_snippet = TextSnippet.new(File.basename(t[:filename]), text, f[:src_begin].to_i, f[:src_end].to_i)
+        tagged_snippet = TextSnippet.new(File.basename(t[:tagged_filename]), tagged, f[:tagged_begin].to_i, f[:tagged_end].to_i)
+        if block_given?
+          yield text_snippet, tagged_snippet
+        else
+          puts text_snippet
+          puts tagged_snippet
+          puts
+        end
+        result += 1
+      end
+      progress prog, (index += 1)
+      result
+    end
+  end
+  def find_range(t_id, t_begin, t_end)
+    ds = db[:assoc].filter(:text_id => t_id).filter{tagged_end >= t_begin}.filter{tagged_begin < t_end}
+    ds.select{[{min(:src_begin) => :src_begin},{ max(:src_end) => :src_end}, {min(:tagged_begin) => :tagged_begin}, {max(:tagged_end) => :tagged_end}]}.first
+  end
+  private
+  def progress(prog, done = 0, data = prog.data)
+    if progress_proc
+      prog.done = done
+      prog.data = data
+      progress_proc.call(prog)
+    end
+  end
+  def connect(db_uri)
+    db = Sequel.connect(db_uri)
+    begin
+      db.get(1)
+      @fallback = false
+    rescue Exception => e
+      puts 'Falling back to sqlite'
+      puts e
+      db = Sequel.connect('jdbc:sqlite://concordancer.db')
+      @fallback = true
+    end
+    yield db
+    migration_path = File.join(File.dirname(__FILE__), 'schema')
+    Sequel::Migrator.apply(db, migration_path, DB_VERSION)
+    return db
+  end
+  def retag(text)
+    words = text.split(/[ \n]/).map do |word|
+      word.split(/\|/)
+    end
+    words.inject('') do |result, (word, lemma, tag, tag2)|
+      result + (word ? "#{word}|#{tag} " : "\n")
+    end
+  end
+  def link_text(id)
+    ds = db[:texts][:id => id]
+    text, tagged = ds[:text], ds[:tagged]
+    puts "Linking text #{ds[:filename]}"
+    re = /([^\s\|]+)\|(\S+)/m
+    src_last = 0
+    position = 0
+    assoc = []
+    tagged.scan(re) do |word, tag|
+      tagged_begin = $~.begin(0)
+      tagged_end = $~.end(0)
+      word_re = Regexp.new(Regexp.escape(word).gsub(/_/, '\s*'))
+      src_match = text[src_last..-1].match(word_re) # find the word
+      if src_match
+        src_begin = src_last + src_match.begin(0)
+        src_end = src_last + src_match.end(0)
+        src_last = src_end
+        assoc << {:text_id => id, :position => position, :src_begin => src_begin, :src_end => src_end, :tagged_begin => tagged_begin, :tagged_end => tagged_end}
+      else
+        STDERR.puts "Could not find match for '#{word}' in text #{ds[:filename]}"
+      end
+      position += 1
+    end
+    db[:assoc].multi_insert(assoc)
+  rescue => e
+    STDERR.puts e
+    STDERR.puts e.backtrace
+    raise e
+  end
+end
+if __FILE__ == $0
+  prog_proc = lambda do |prog|
+    puts "#{prog.task}#{prog.data ? "(#{prog.data})" : ""} #{prog.done}/#{prog.work}"
+  end
+  #  uri = 'postgres://concordancer:concordancer@localhost:5433/concordancer'
+  uri = 'jdbc:postgresql://localhost:5433/concordancer?user=concordancer&password=concordancer'
+  c = Concordancer.new(uri, :progress_proc => prog_proc, :recreate => true)
+  #puts c.filter_to_re('"sunt" APN NPN')
+  c.read('raw')
+  #c.find(%q[("de")? (N*)+ "si" (N*){1,2} (AS*)?])
+  #c.link! #if RECREATE
+  #c.link c.all
+  #ds = db[:assoc].filter(:text_id => 1).order_by(:position).filter{tagged_end >= 150}.filter{tagged_begin < 330}
+  #puts ds.sql
+  #exit
+  t = Time.now
+  output = ""
+  results = c.find(%q[(*){0,3} N* N* (*){0,3}]) do |text, tagged|
+    output << "#{text}\n#{tagged}\n\n"
+  end
+  puts Time.now - t
+  puts "Results: #{ results }"
+  puts output
+end

data/lib/lumix/filter.rb ADDED Viewed

@@ -0,0 +1,60 @@
+module Filter
+  class << self
+    HANDLERS = %w[handle_wildcard handle_choice handle_literals
+              handle_dangling_tags handle_multiplicators assure_wordbounds]
+    # TODO refactor
+    def to_re(filter)
+      re = HANDLERS.inject(filter) do |filter, handler|
+        puts filter
+        puts "#{handler} -->"
+        send handler, filter
+      end
+      puts re
+      Regexp.new(re)
+    end
+    # character wildcard replacement
+    def handle_wildcard(re)
+      re.gsub(/([^\)])\*/, '\1[^\b]*?')
+    end
+    # Takes (!A B C) and transforms it
+    def handle_choice(re)
+      re.gsub(/\(\!([^\)]+)\)/) do
+        c = $1.split.map{ |t| '(?!' + t + '\b)' }.join
+        '(?:' + c + '\S)*'
+      end
+    end
+    # transforms literals delimited by ""
+    def handle_literals(re)
+      re.gsub(/\"([^\"]*)\"(?:\|(\S+?))?/) do
+        str = $1
+        tag = $2 || '\S+?'
+        str.gsub(/ /, '_') + '\|' + tag
+      end
+    end
+    # add wildcard word match on tag-only search criteria
+    def handle_dangling_tags(re)
+      re.split(/ /).map do |s|
+        if s['\|']
+          s
+        else
+          s.gsub(/(\(?)(\S+)/, '\1[^\s\|]+\|\2')
+        end
+      end.join('\s+')
+    end
+    # Handles the + * ? and {} qualifiers
+    def handle_multiplicators(re)
+      re.gsub(/\(([^\)]+)(\)((\{[^\}]+\})|\*|\+|\?)\s?)/, '(?:\b\1\b\2')
+    end
+    def assure_wordbounds(re)
+      '\b' + re + '\b'
+    end
+  end
+end

data/lib/lumix/gui.rb ADDED Viewed

@@ -0,0 +1,148 @@
+require 'yaml'
+require 'lumix/concordancer'
+require 'sweet'
+require 'lumix/result_view'
+#Sweet.set_debug
+Texts = {:search => "Searching...", :read => "Importing files", :link => "Linking texts"}
+Indicator = %w'} ) ] | [ ( {'
+CONF = 'config.yaml'
+ConfigStruct = Struct.new(:database_uri)
+CConfig = YAML.load_file(CONF) rescue ConfigStruct.new('jdbc:postgresql://localhost:5432/concordancer?user=concordancer&password=concordancer')
+def save!
+  File.open(CONF, 'w') do |f|
+    f.write(CConfig.to_yaml)
+  end
+end
+Sweet.app :title => 'Ruby Concordancer', :width => 800, :height => 700, :layout => :grid.conf(:numColumns => 3) do
+  def conc
+    @conc ||= Concordancer.new(CConfig.database_uri, :progress_proc => @progress_proc)#, :recreate => true)
+  end
+  @progress_proc = proc do |p|
+    task = Texts[p.task] || p.task
+    perform do
+      if p.done == p.work
+        @p_status.text = 'Done!'
+        @p_indicator.text = ''
+        @p_bar.fraction = 0
+      else
+        @p_status.text = task
+        @p_indicator.text = Indicator[p.done % Indicator.size]
+        @p_bar.fraction = p.done.to_f / p.work
+      end
+    end
+  end
+  save! unless File.exists?(CONF)
+  menubar do
+    submenu '&File' do
+      submenu '&Import...' do
+        item('E&nglish texts') { import_chooser('en') }
+        item('&Romanian texts') { import_chooser('ro') }
+      end
+      item('&Export findings...') { export_findings }
+      separator
+      item('&Relink texts') { relink }
+      item('&Clear the database') { reconnect :recreate => true }
+      separator
+      item('E&xit') { exit }
+    end
+    #    submenu 'C&orpora' do
+    #      @m_cat = submenu '&Category' do
+    #        item('Cre&ate...') { create_category }
+    #        item('&Import...') { import_chooser }
+    #        separator
+    #        item('&Edit...') { edit_category }
+    #        item('&Delete') { delete_category }
+    #      end
+    #      @m_text = submenu '&Text' do
+    #        item('&Reimport...') { reimport_chooser }
+    #        item('&Delete') { delete_text }
+    #      end
+    #    end
+    #    @m_stats = submenu '&Statistics' do
+    #      item('&Editor') { script_editor }
+    #      separator
+    #      item('&Load Script...') { load_script }
+    #    end
+    #    submenu "&Help" do
+    #      separator
+    #      item('&About') { about }
+    #    end
+  end
+  tree :grid_data => {:align => [:fill, :fill], :span => [1, 2], :grab => [true, true]}
+  @filter = edit_line 'NSN NSN', :grid_data => {:align => [:fill, :center], :grab => true}, :max_size => 40 do
+    perform_search
+  end
+  button 'Search' do
+    perform_search
+  end
+  @results = table :columns => %w[Text Left Hit Right], :sort => true, :grid_data => {:align => [:fill, :fill], :span => 2, :grab => [true, true]}, :scroll => true
+  @counter = label :grid_data => {:span => 2, :align => :fill}
+  @p_status = label(:grid_data => {:align => [:fill, :bottom], :grab => true})
+  @p_bar = progress(:width => 50, :grid_data => {:align => [:right, :bottom]})
+  @p_indicator = label('  ',  :grid_data => {:align => [:right, :bottom]})
+  def perform_search
+    filter = @filter.text
+    @results.data.clear
+    Thread.new do
+      unless filter.empty?
+        puts "finding #{filter}"
+        found = conc.find(filter) do |text, tagged|
+          @results.add_hit(text.name, text.left, text.to_s, text.right)
+        end
+      end
+      perform do
+        @counter.text = "#{found} matches"
+        @p_status.text = "Found #{found || 'no'} matches for #{filter}"
+      end
+    end
+  end
+  def import_chooser(lang)
+    conc.tp.lang = lang
+    Thread.new(conc) do |conc|
+      conc.read('raw')
+    end
+  end
+  def export_findings
+    filename = to_filename(@filter.text) + '.findings'
+    @p_status.text = "Exporting to #{filename}"
+    File.open(filename, 'w') do |f|
+      @results.items.each do |item|
+        unless item.getChecked
+          left, hit, right = (0..2).map{ |i| item.text(i) }
+          f.puts "#{left}\t#{hit}\t#{right}"
+        end
+      end
+    end
+    @p_status.text = "Done! Exported to file #{filename}"
+  end
+  def relink
+    Thread.new(conc) do |conc|
+      conc.link!
+    end
+  end
+  def to_filename(filter)
+    filter.gsub(/\s+/, "_").gsub(/[\*\.\?\"]/, '')
+  end
+  def reconnect(opts = {})
+    @conc = Concordancer.new(CConfig.database_uri, opts.mergs(:progress_proc => @progress_proc))
+  end
+end

data/lib/lumix/main.rb ADDED Viewed

@@ -0,0 +1,7 @@
+#!/usr/bin/env jruby
+$: << File.join(File.dirname(__FILE__), '..')
+$: << File.join(File.dirname(__FILE__), '../../../Sweet/lib')
+require 'rubygems'
+require 'lumix/gui'

data/lib/lumix/result_view.rb ADDED Viewed

@@ -0,0 +1,93 @@
+class Java::OrgEclipseSwtWidgets::Table
+  attr_accessor :data, :tooltips
+  def sweeten(app, opts={}, &block)
+    @data = []
+    @tooltips = []
+    super
+    @redraw_thread = Thread.new do
+      while !isDisposed
+        if @dirty
+          @dirty = false
+          perform do
+            setItemCount data.size
+            clearAll if clear_all
+          end
+        end
+        sleep 1 # TODO find a better alternative
+      end
+    end
+    # TODO implement tooltips
+    addListener swt::SetData do |e|
+      item = e.item
+      index = indexOf(item)
+      item.setText(Array(data[index]).to_java(:string))
+    end
+    addListener swt::Resize do |e|
+      default_weight = 1.0 / columns.size
+      current_width = @old_width
+      w = width
+      columns[0..-2].each do |c|
+        weight = c.width == 0 ? default_weight : c.width.to_f / current_width
+        c.width = w * weight
+      end
+      columns[columns.size - 1].pack
+      @old_width = w
+    end
+  end
+  def columns=(*titles)
+    if titles
+      titles.each do |title|
+        col = widgets::TableColumn.new(self, swt::CENTER)
+        col.setText title
+      end
+      setHeaderVisible true
+      setLinesVisible true
+    end
+  end
+  def sort=(sort)
+    sort = Hash.new(true) if [true, :all].member?(sort)
+    if sort
+      columns.each_with_index do |col, index|
+        if sort[col.text]
+          col.addListener swt::Selection do
+            if data
+              @data = data.sort_by {|e| e[index] }
+              update :clear
+            end
+          end
+        end
+      end
+    end
+  end
+end
+::Sweet::WIDGET_DEFAULTS[:table] = {
+  :style => [:border, :virtual, :check]
+}
+::Sweet::WIDGET_HACKS[Java::OrgEclipseSwtWidgets::Table] = {
+  :block_handler => :set_data,
+  :custom_code => proc {
+    def update(clear_all = false)
+      return if isDisposed
+      setItemCount data.size
+      clearAll if clear_all
+    end
+    def add_hit(*args)
+      opts = args.last === Hash ? args.pop : {}
+      d = opts[:data] || args
+      t = opts[:tooltips] || d
+      data << d
+      tooltips << t
+      @dirty = true
+    end
+  }
+}

data/lib/lumix/schema/001_create_tables.rb ADDED Viewed

@@ -0,0 +1,35 @@
+class CreateTables < Sequel::Migration
+  def up
+    create_table :texts do
+      primary_key :id
+      String :digest
+      String :text
+      String :tagged
+      String :filename
+      String :tagged_filename
+      index :digest
+    end
+    create_table :assoc do
+      primary_key :id
+      Integer :text_id, :references => :texts
+      Integer :position
+      Integer :src_begin
+      Integer :src_end
+      Integer :tagged_begin
+      Integer :tagged_end
+      index [:text_id, :tagged_end]
+      index [:text_id, :tagged_begin]
+      index [:text_id, :position]
+    end
+  end
+  def down
+    drop_table :assoc
+    drop_table :texts
+  end
+end

data/lib/lumix/schema/002_categories.rb ADDED Viewed

@@ -0,0 +1,28 @@
+class Categories < Sequel::Migration
+  def up
+    create_table :categories do
+      primary_key :id
+      Integer :parent_id, :references => :categories
+      String :name
+      String :key
+      index [:parent_id, :id]
+    end
+    alter_table :texts do
+      add_column :category_id, Integer, :references => :categories
+      add_index [:category_id, :id]
+    end
+  end
+  def down
+    alter_table :texts do
+      drop_column :category_id
+    end
+    drop_table :categories
+  end
+end

data/lib/lumix/textprocessing.rb ADDED Viewed

@@ -0,0 +1,84 @@
+$KCODE='UTF8'
+require 'soap/wsdlDriver'
+class TextProcessing
+  attr_accessor :lang
+  def initialize(lang = 'ro')
+    @lang = lang
+  end
+  def rpc
+    @rpc if @rpc
+    wsdl = SOAP::WSDLDriverFactory.new('http://www.racai.ro/webservices/TextProcessing.asmx?WSDL')
+    @rpc = wsdl.create_rpc_driver
+  end
+  # inserts "tagged" as the second to last part in the filename
+  # e.g.
+  #   test.txt -> test.tagged.txt
+  # special case when no extension is present:
+  #   README -> README.tagged
+  def create_tagged_filename(infile)
+    components = infile.split(/\./)
+    position = [1, components.size-1].max
+    components.insert position, 'tagged'
+    components.join '.'
+  end
+  def to_filelist(*files)
+    files = files.flatten.map do |filename|
+      if File.directory?  filename
+        Dir.glob File.join(filename, '**/*') # add all files from that directory
+      else
+        filename
+      end
+    end.flatten.compact.uniq # make sure every file is only processed once
+    files.delete_if { |filename| File.directory?(filename) ||  filename['.tagged']} # remove remaining folders
+  end
+  # the core processing routing using the webservice
+  def process(text)
+    response = rpc.Process(:input => text, :lang => lang)
+    response.processResult
+  end
+  def process_stdin
+    puts process($stdin.read)
+  end
+  # takes the text from infile and outputs the result into the outfile
+  def process_file(infile, outfile)
+    File.open(outfile, 'w') do |out|
+      out.write process(File.read(infile))
+    end
+  end
+end
+# process the args if called as main script
+if __FILE__ == $0
+  args = ARGV
+  tp = if args.first == '-lang'
+    args.shift
+    TextProcessing.new(args.shift)
+  else
+    TextProcessing.new
+  end
+  if args.empty?
+    tp.process_stdin
+  else
+    files = tp.to_filelist(args)
+    puts "Processing files:"
+    for infile in files
+      outfile = tp.create_tagged_filename(infile)
+      puts "#{infile} -> #{outfile}"
+      tp.process_file(infile, outfile) unless File.exist?(outfile)
+    end
+  end
+end

data/spec/filter_spec.rb ADDED Viewed

@@ -0,0 +1,47 @@
+# To change this template, choose Tools | Templates
+# and open the template in the editor.
+require 'filter'
+puts RUBY_PLATFORM
+TXT = "They|PPER3 have|AUXP business|NN uses|VERB3 derp|ADNE too|ADVE " +
+"Apr|NN 4th|CD 2007|M have|DMKD .|PERIOD"
+def search(filter)
+  TXT.scan(Filter.to_re(filter))
+end
+describe Filter do
+  it "should find tags" do
+    search('NN').should == %w[business|NN Apr|NN]
+  end
+  it "should find words" do
+    search('"have"').should == %w[have|AUXP have|DMKD]
+  end
+  it "should find word and tag combinations" do
+    search('"have" NN "uses"').should == ['have|AUXP business|NN uses|VERB3']
+  end
+  it "should find wildcard tags" do
+    search('AU*').should == %w[have|AUXP]
+  end
+  it "should find exclusions" do
+    search('A(!UXP DNE)').should == %w[too|ADVE]
+  end
+  it "should find word|tag pairs" do
+    search('"have"|D*').should == %w[have|DMKD]
+  end
+  it "should find unlimited repetitions" do
+    search('(AD*)+').should == ['derp|ADNE too|ADVE']
+  end
+  it "should find limited repetitions" do
+    search('(AD*){3}').should == []
+  end
+end

data/spec/text_snippet_spec.rb ADDED Viewed

@@ -0,0 +1,52 @@
+require 'concordancer'
+describe TextSnippet do
+  before(:each) do
+  end
+  it "should handle umlauts properly" do
+    ts = create_ts('eins zwei drei vierß öfünfä ßechs sieben acht neun zehn', /öfünfä/)
+    ts.left(3).should == 'zwei drei vierß '
+    ts.to_s.should == 'öfünfä'
+    ts.right(3).should == ' ßechs sieben acht'
+  end
+  it "should handle partial words and umlauts properly" do
+    ts = create_ts('eins zwei drei vierß öfünfä ßechs sieben acht neun zehn', /fünf/)
+    ts.left(3).should == 'zwei drei vierß ö'
+    ts.to_s.should == 'fünf'
+    ts.right(3).should == 'ä ßechs sieben acht'
+  end
+  it "should have dynamic left context" do
+    ts = create_ts('one two three four five six seven eight nine ten', /five/)
+    ts.left(1).should == 'four '
+    ts.left(2).should == 'three four '
+    ts.left(10).should == 'one two three four '
+  end
+  it "should have dynamic right context" do
+    ts = create_ts('one two three four five six seven eight nine ten', /five/)
+    ts.right(1).should == ' six'
+    ts.right(2).should == ' six seven'
+    ts.right(10).should == ' six seven eight nine ten'
+  end
+  it "should work correctly with newlines" do
+    ts = create_ts("one two\n three four five six seven eight\n nine ten", /five/)
+    ts.left(1).should == 'four '
+    ts.right(1).should == ' six'
+  end
+  it "should replace newlines and tabs with spaces" do
+    ts = create_ts("one two three\n four five six\n seven eight nine ten", /five/)
+    ts.left(2).should == 'three  four '
+    ts.right(2).should == ' six  seven'
+  end
+end
+def create_ts(text, re)
+  m = text.match(re)
+  TextSnippet.new text, m.begin(0), m.end(0)
+end

metadata ADDED Viewed

@@ -0,0 +1,108 @@
+--- !ruby/object:Gem::Specification
+name: lumix
+version: !ruby/object:Gem::Version
+  prerelease: false
+  segments:
+    - 0
+    - 0
+    - 1
+  version: 0.0.1
+platform: ruby
+authors:
+  - Michael Klaus
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2010-07-27 00:00:00 +02:00
+default_executable:
+dependencies:
+  - !ruby/object:Gem::Dependency
+    name: sweet
+    prerelease: false
+    requirement: &id001 !ruby/object:Gem::Requirement
+      requirements:
+        - - ">="
+          - !ruby/object:Gem::Version
+            segments:
+              - 0
+            version: "0"
+    type: :runtime
+    version_requirements: *id001
+  - !ruby/object:Gem::Dependency
+    name: sequel
+    prerelease: false
+    requirement: &id002 !ruby/object:Gem::Requirement
+      requirements:
+        - - ">="
+          - !ruby/object:Gem::Version
+            segments:
+              - 0
+            version: "0"
+    type: :runtime
+    version_requirements: *id002
+  - !ruby/object:Gem::Dependency
+    name: jdbc-postgres
+    prerelease: false
+    requirement: &id003 !ruby/object:Gem::Requirement
+      requirements:
+        - - ">="
+          - !ruby/object:Gem::Version
+            segments:
+              - 0
+            version: "0"
+    type: :runtime
+    version_requirements: *id003
+description: A concordancer for corpus-based linuistic research.
+email: Michael.Klaus@gmx.net
+executables:
+  - lumix
+extensions: []
+extra_rdoc_files: []
+files:
+  - COPYING
+  - bin/lumix
+  - spec/text_snippet_spec.rb
+  - spec/filter_spec.rb
+  - lib/lumix/filter.rb
+  - lib/lumix/result_view.rb
+  - lib/lumix/gui.rb
+  - lib/lumix/textprocessing.rb
+  - lib/lumix/main.rb
+  - lib/lumix/concordancer.rb
+  - lib/lumix/schema/001_create_tables.rb
+  - lib/lumix/schema/002_categories.rb
+has_rdoc: true
+homepage: http://github.org/QaDeS/lumix
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+  - lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        segments:
+          - 0
+        version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        segments:
+          - 0
+        version: "0"
+requirements: []
+rubyforge_project:
+rubygems_version: 1.3.6
+signing_key:
+specification_version: 3
+summary: A concordancer for corpus-based linuistic research.
+test_files: []