RubyGems - picolena - Versions diffs - 0.1.1 → 0.1.2 - Mend

picolena 0.1.1 → 0.1.2

Files changed (70) hide show

data/lib/picolena/templates/app/models/index_reader.rb ADDED Viewed

@@ -0,0 +1,56 @@
+class IndexReader < Ferret::Index::Index
+  def initialize(params={})
+    # TODO: Remove those debug lines!
+    # puts "##################################################################Creating Reader!!!!!"
+    # Add needed parameters
+    params.merge!(:path => Picolena::IndexSavePath, :analyzer => Picolena::Analyzer)
+    # Creates the IndexReader
+    super(params)
+  end
+  # Returns the number of times a file is present in the index.
+  # index_reader.doc_freq(field, term) → integer
+  # Return the number of documents in which the term term appears in the field field.
+  def occurences_number(complete_path)
+    # complete_path_query = Ferret::Search::TermQuery.new(:complete_path, complete_path)
+    search_by_complete_path(complete_path).total_hits
+  end
+  def search_by_complete_path(complete_path)
+    search('complete_path:"'<<complete_path<<'"')
+  end
+  def delete_by_complete_path(complete_path)
+    search_by_complete_path(complete_path).hits.each{|hit|
+      delete(hit.doc)
+    }
+    close
+  end
+  # Validation methods.
+  def should_have_documents
+     raise IndexError, "no document found" unless has_documents?
+  end
+  # Returns true if there's at least one document indexed.
+  def has_documents?
+   size>0
+  end
+ class<<self
+   def ensure_existence
+     Indexer.index_every_directory(update=false) unless exists? or RAILS_ENV=="production"
+   end
+  def exists?
+     filename and File.exists?(filename)
+  end
+  def filename
+    Dir.glob(File.join(Picolena::IndexSavePath,'*.cfs')).first
+  end
+  end
+end

data/lib/picolena/templates/app/models/index_writer.rb ADDED Viewed

@@ -0,0 +1,36 @@
+class IndexWriter < Ferret::Index::IndexWriter
+  def initialize(params={})
+    # TODO: Remove those debug lines!
+    # puts "##################################################################Creating Writer!!!!!"
+    # Add needed parameters
+    params.merge!(:create_if_missing => true,
+                  :path              => Picolena::IndexSavePath,
+                  :analyzer          => Picolena::Analyzer
+                  # huge performance impact?
+                  # :auto_flush        => true
+                  )
+    # Creates the IndexWriter
+    super(params)
+    # Add required fields (content, filetype, probably_unique_id, ...)
+    add_fields!
+  end
+  def self.remove
+    Dir.glob(File.join(Picolena::IndexSavePath,'*')).each{|f| FileUtils.rm(f) if File.file?(f)}
+  end
+  private
+  def add_fields!
+    # No need to re-create any field.
+    return unless field_infos.fields.empty?
+    field_infos.add_field(:complete_path,      :store => :yes, :index => :yes)
+    field_infos.add_field(:content,            :store => :yes, :index => :yes)
+    field_infos.add_field(:basename,           :store => :no,  :index => :yes, :boost => 1.5)
+    field_infos.add_field(:file,               :store => :no,  :index => :yes, :boost => 1.5)
+    field_infos.add_field(:filetype,           :store => :no,  :index => :yes, :boost => 1.5)
+    field_infos.add_field(:date,               :store => :yes, :index => :yes)
+    field_infos.add_field(:probably_unique_id, :store => :no,  :index => :yes)
+    field_infos.add_field(:lang,               :store => :yes,  :index => :yes)
+  end
+end

data/lib/picolena/templates/app/models/indexer.rb ADDED Viewed

@@ -0,0 +1,142 @@
+class Indexer
+  # This regexp defines which files should *not* be indexed.
+  @@exclude          = /(Thumbs\.db)/
+  # Number of threads that will be used during indexing process
+  @@max_threads_number = 8
+  class << self
+    def fields_for(complete_path)
+      {
+        :complete_path      => complete_path,
+        :probably_unique_id => complete_path.base26_hash,
+        :file               => File.basename(complete_path),
+        :basename           => File.basename(complete_path, File.extname(complete_path)).gsub(/_/,' '),
+        :filetype           => File.extname(complete_path),
+        :date               => File.mtime(complete_path).strftime("%Y%m%d%H%M%S")
+      }
+    end
+    def index_every_directory(update=true)
+      log :debug => "Indexing every directory"
+      start=Time.now
+      @update = update
+      reset! unless update
+      Picolena::IndexedDirectories.each{|dir, alias_dir|
+        index_directory_with_multithreads(dir)
+      }
+      # FIXME: with those 2 lines,
+      writer.optimize
+      writer.close
+      # launching Indexer.index_every_directory twice in a row
+      # would raise a SEGFAULT:
+      # picolena/lib/picolena/templates/app/models/indexer.rb:27: [BUG] Segmentation fault
+      # ruby 1.8.6 (2007-06-07) [i486-linux]
+      #
+      # Aborted (core dumped)
+      #
+      # But without those 2 lines, specs don't pass anymore.
+      #
+      log :debug => "Indexing done in #{Time.now-start} s."
+    end
+    def index_directory_with_multithreads(dir)
+      # FIXME: Don't know why, but if more than one thread is created while update the index,
+      # indexer raises:
+      #
+      # current thread not owner
+      # /usr/lib/ruby/1.8/monitor.rb:278:in `mon_check_owner'
+      # /home/www/picolena/lib/picolena/templates/lib/core_exts.rb:32:in `join'
+      # ...
+      #
+      # So Index creation is multithreaded, Index update is monothreaded.
+      threads_number = @update ? 1 : @@max_threads_number
+      log :debug => "Indexing #{dir}, #{threads_number} thread(s)"
+      indexing_list=Dir[File.join(dir,"**/*")].select{|filename|
+        File.file?(filename) && filename !~ @@exclude
+      }
+      indexing_list_chunks=indexing_list.in_transposed_chunks(threads_number)
+      indexing_list_chunks.each_with_thread{|chunk|
+        chunk.each{|filename|
+          add_or_update_file(filename)
+        }
+      }
+    end
+    def add_or_update_file(complete_path)
+      should_be_added = true
+      if @update then
+        log :debug =>  "What to do with #{complete_path} ?"
+        occurences = reader.occurences_number(complete_path)
+        log :debug =>  "\tappears #{occurences} times in the index"
+        case occurences
+          when 0
+          #Nothing to do here, the file will be added.
+          when 1
+          d=Document.find_by_complete_path(complete_path)
+          if File.mtime(complete_path).strftime("%Y%m%d%H%M%S").to_i > d.mtime then
+            log :debug => "\thas been modified"
+            delete_file(complete_path)
+          else
+            should_be_added = false
+            log :debug => "\thas not been modified. leaving it"
+          end
+        else
+          delete_file(complete_path)
+        end
+      end
+      add_file(complete_path) if should_be_added
+    end
+    def add_file(complete_path)
+      log :debug => "Adding #{complete_path}"
+      mime_type=File.mime(complete_path)
+      fields = fields_for(complete_path)
+      begin
+        text, lang = PlainTextExtractor.extract_content_and_language_from(complete_path)
+        raise "\tempty document #{complete_path}" if text.strip.empty?
+        fields[:content] = text
+        log :debug => "language found: #{lang}" if lang
+        fields[:lang] = lang
+      rescue => e
+        log :debug => "\tindexing without content: #{e.message}"
+      end
+      writer << fields
+    end
+    def writer
+      @@writer ||= IndexWriter.new
+    end
+    def reader
+      @@reader ||= IndexReader.new
+    end
+    def reset!
+      log :debug => "Resetting Index"
+      @@writer=nil
+      @@reader=nil
+      IndexWriter.remove
+    end
+    def delete_file(complete_path)
+      log :debug => "\tRemoving from index"
+      reader.delete_by_complete_path(complete_path)
+    end
+    private
+    def log(hash)
+      hash.each{|level,message|
+        IndexerLogger.send(level,message)
+      }
+    end
+  end
+end

data/lib/picolena/templates/app/models/plain_text_extractor.rb ADDED Viewed

@@ -0,0 +1,122 @@
+require 'plain_text_extractor_DSL'
+class PlainTextExtractor
+  include PlainTextExtractorDSL
+  class<<self
+    # Returns every defined extractor
+    def all
+      Picolena::Extractors
+    end
+    # Add an extractor to the extractors list
+    def add(extractor)
+      all<<extractor
+    end
+    # Calls block for each extractor
+    def each(&block)
+      all.each(&block)
+    end
+    # Returns every required dependency for every defined extractor
+    def dependencies
+      @@dependencies||=all.collect{|extractor| extractor.dependencies}.flatten.compact.uniq.sort
+    end
+    # Returns every supported file extensions
+    def supported_extensions
+      @@supported_exts||=all.collect{|extractor| extractor.exts}.flatten.compact.uniq
+    end
+    # Finds which extractor should be used for a given file, according to its extension
+    # Raises if the file is unsupported.
+    def find_by_filename(filename)
+      ext=File.ext_as_sym(filename)
+      found_extractor=all.find{|extractor| extractor.exts.include?(ext)} || raise(ArgumentError, "no convertor for #{filename}")
+      found_extractor.source=filename
+      found_extractor
+    end
+    # Launches extractor on given file and outputs plain text result
+    def extract_content_from(source)
+      find_by_filename(source).extract_content
+    end
+    def extract_content_and_language_from(source)
+      find_by_filename(source).extract_content_and_language
+    end
+    def language_guesser
+      @@language_guesser||=('mguesser -n1' unless IO.popen("which mguesser"){|i| i.read}.empty?)
+    end
+  end
+  attr_accessor :source
+  # Parses command in order to know which programs are needed.
+  # rspec will then check that every dependecy is installed on the system
+  def dependencies
+    if command.is_a?(String) then
+      command.split(/\|\s*/).collect{|command_part| command_part.split(/ /).first}
+    else
+      @dependencies
+    end
+  end
+  ## Conversion part
+  # destination method can be used by some conversion command that cannot output to stdout (example?)
+  # a file containing plain text result will first be written by command, and then be read by extract_content.
+  def destination
+    require 'tmpdir'
+    @@temp_file_as_destination ||= File.join(Dir::tmpdir,"ferret_#{Time.now.to_i}")
+  end
+  # Replaces generic command with specific source and destination (if specified) files
+  def specific_command
+    command.sub('SOURCE','"'<<source<<'"').sub('DESTINATION','"'<<destination<<'"')
+  end
+  # Returns plain text content of source file
+  def extract_content
+    if command.is_a?(String) then
+      # If command is a String, launch it via system(command).
+      if command.include?('DESTINATION') then
+        # If command includes 'DESTINATION' keyword,
+        # launches the command and returns the content of
+        # DESTINATION file.
+        system(specific_command)
+        File.read_and_remove(destination)
+      else
+        # Otherwise, launches the command and returns STDOUT.
+        IO.popen(specific_command){|io| io.read}
+      end
+    else
+      # command is a Block.
+      # Returns the result of command.call,
+      # with source file as parameter.
+      command.call(source)
+    end
+  end
+  # Returns plain text content and language of source file,
+  # using mguesser to guess used language.
+  # This method only returns probable language if the content is bigger than 500 chars
+  # and if probability score is higher than 90%.
+  def extract_content_and_language
+    content=extract_content
+    # Language recognition is too unreliable for small files.
+    return [content, nil] unless Picolena::UseLanguageRecognition && PlainTextExtractor.language_guesser && content.size > 500
+    language=IO.popen(PlainTextExtractor.language_guesser,'w+'){|lang_guesser|
+      lang_guesser.write content
+      lang_guesser.close_write
+      output=lang_guesser.read
+      if output=~/^([01]\.\d+)\t(\w+)\t(\w+)/ then
+        score, lang, encoding = $1.to_f, $2, $3
+        # Language recognition isn't reliable if score is too low.
+        lang unless score<0.9
+      end
+    }
+    [content,language]
+  end
+end

data/lib/picolena/templates/app/models/query.rb ADDED Viewed

@@ -0,0 +1,31 @@
+class Query
+  class << self
+    # Returns a Ferret::Query from a raw String query.
+    def extract_from(raw_query)
+      parser.parse(convert_to_english(raw_query))
+    end
+    private
+    # Converts query keywords to english so they can be parsed by Ferret.
+    def convert_to_english(raw_query)
+      to_en={
+       /\b#{:AND.l}\b/=>'AND',
+       /\b#{:OR.l}\b/=>'OR',
+       /\b#{:NOT.l}\b/=>'NOT',
+       /(#{:filetype.l}):/=>'filetype:',
+       /#{:content.l}:/ => 'content:',
+       /#{:date.l}:/ => 'date:',
+       /\b#{:LIKE.l}\s+(\S+)/=>'\1~'
+      }
+      to_en.inject(raw_query){|mem,non_english_to_english_keyword|
+        mem.gsub(*non_english_to_english_keyword)
+      }
+    end
+    # Instantiates a QueryParser once, and keeps it in cache.
+    def parser
+      @@parser ||= Ferret::QueryParser.new(:fields => [:content, :file, :basename, :filetype, :date], :or_default => false, :analyzer=>Picolena::Analyzer)
+    end
+  end
+end

data/lib/picolena/templates/app/views/documents/_document.html.haml CHANGED Viewed

@@ -3,8 +3,8 @@
   %small=number_to_percentage(document.score*100, :precision=>1)
 =highlight_matching_content(document)
 %p=link_to_containing_directory(document)
-- if document.supported?
+-if document.supported?
   %p
     =link_to_plain_text_content(document)
     =link_to_cached_content(document)
-%hr/
+%hr/

data/lib/picolena/templates/config/environment.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-%w(rubygems paginator pathname logger).each{|lib| require lib}
+%w(rubygems paginator fileutils pathname logger thread).each{|lib| require lib}
 # Uncomment below to force Rails into production mode when
 # you don't control web/app server and can't set it the proper way
@@ -7,7 +7,7 @@
 # Specifies gem version of Rails to use when vendor/rails is not present
 RAILS_GEM_VERSION = '2.0.2' unless defined? RAILS_GEM_VERSION
-IndexLogger=Logger.new($stdout)
+IndexerLogger=Logger.new($stdout)
 # Bootstrap the Rails environment, frameworks, and default configuration
 require File.join(File.dirname(__FILE__), 'boot')

data/lib/picolena/templates/config/environments/development.rb CHANGED Viewed

@@ -18,4 +18,4 @@ config.action_view.cache_template_extensions         = false
 config.action_mailer.raise_delivery_errors = false
-IndexLogger.level = Logger::DEBUG
+IndexerLogger.level = Logger::DEBUG

data/lib/picolena/templates/config/environments/production.rb CHANGED Viewed

@@ -18,4 +18,4 @@ config.action_view.cache_template_loading            = true
 # Disable delivery errors, bad email addresses will be ignored
 # config.action_mailer.raise_delivery_errors = false
-IndexLogger.level = Logger::INFO
+IndexerLogger.level = Logger::INFO

data/lib/picolena/templates/config/environments/test.rb CHANGED Viewed

@@ -22,4 +22,4 @@ config.action_controller.allow_forgery_protection    = false
 config.action_mailer.delivery_method = :test
-IndexLogger.level = Logger::WARN
+IndexerLogger.level = Logger::WARN

data/lib/picolena/templates/config/initializers/002_load_indexed_dirs.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+module Picolena
 #Loading directories to be indexed
 indexed_dir_config_file='config/custom/indexed_directories.yml'
 IndexedDirectories={}
@@ -6,3 +7,4 @@ YAML.load_file(indexed_dir_config_file)[RAILS_ENV].each_pair{|abs_or_rel_path, a
 }
 IndexSavePath=File.join(IndexesSavePath,ENV["RAILS_ENV"] || "development")
+end

data/lib/picolena/templates/config/initializers/003_load_white_list_IPs.rb CHANGED Viewed

@@ -1,3 +1,4 @@
+module Picolena
 #Deny all, Allow only IPs described in config/custom/white_list_ip.yml
 white_list_ip_config_file='config/custom/white_list_ip.yml'
 WhiteListIPs=Regexp.new(
@@ -5,4 +6,5 @@ WhiteListIPs=Regexp.new(
       YAML.load_file(white_list_ip_config_file)["Allow"].collect{|ip|
         ip.downcase.include?("all") ? /.*/ : Regexp.escape(ip)
       }.join("|")<<")"
-  ) rescue /^(127\.0\.0\.1|0\.0\.0\.0)/
+  ) rescue /^(127\.0\.0\.1|0\.0\.0\.0)/
+end

data/lib/picolena/templates/config/initializers/004_load_plain_text_extractors.rb ADDED Viewed

@@ -0,0 +1,6 @@
+require 'core_exts'
+require 'plain_text_extractor_DSL'
+Picolena::Extractors=[]
+Dir.glob(File.join(RAILS_ROOT,'lib/plain_text_extractors/*.rb')).each{|extractor|
+  require extractor
+}

data/lib/picolena/templates/config/initializers/005_load_custom_title_and_names_and_links.rb CHANGED Viewed

@@ -1,5 +1,7 @@
+module Picolena
 custom_localization_yml=File.join(RAILS_ROOT,'config/custom/title_and_names_and_links.yml')
 YAML.load_file(custom_localization_yml).each{|key_name, custom_translation|
   Globalite.localizations[key_name.to_sym]=custom_translation unless custom_translation.blank?
 }
+end

data/lib/picolena/templates/config/initializers/006_load_icons.rb ADDED Viewed

@@ -0,0 +1,8 @@
+icons_config_file='config/custom/icons_and_filetypes.yml'
+FiletypeToIconSymbol={}
+YAML.load_file(icons_config_file).each_pair{|icon_name, filetypes|
+    icon_symbol=icon_name.to_sym
+    filetypes.split(/\s/).each{|filetype|
+      FiletypeToIconSymbol[filetype.downcase]=icon_symbol
+  }
+}

data/lib/picolena/templates/lib/core_exts.rb CHANGED Viewed

@@ -17,11 +17,30 @@ end
 class String
   # Creates a "probably unique" id with the desired length, composed only of lowercase letters.
-  def base26_hash(length=HashLength)
+  def base26_hash(length=Picolena::HashLength)
     Digest::MD5.hexdigest(self).to_i(16).to_s(26).tr('0-9a-p', 'a-z')[-length,length]
   end
 end
+module Enumerable
+  def each_with_thread(&block)
+    tds=self.collect{|elem|
+      Thread.new(elem) {|elem|
+        block.call(elem)
+      }
+    }
+    tds.each{|aThread| aThread.join}
+  end
+end
+class Array
+  def in_transposed_chunks(n)
+    s=self.size
+    i=n-s%n
+    (self+[nil]*i).enum_slice(n).to_a.transpose.collect{|e| e.compact}
+  end
+end
 class File
   def self.ext_as_sym(filename)
     File.extname(filename).sub(/^\./,'').downcase.to_sym rescue :no_extension

data/lib/picolena/templates/lib/plain_text_extractor_DSL.rb ADDED Viewed

@@ -0,0 +1,72 @@
+# Defines plain text extractors with DSL
+# For example, to convert "Microsoft Office Word document" to plain text
+#  PlainTextExtractor.new {
+#    every :doc, :dot
+#    as "application/msword"
+#    aka "Microsoft Office Word document"
+#    with "antiword SOURCE" => :on_linux, "some other command" => :on_windows
+#    which_should_for_example_extract 'district heating', :from => 'Types of malfunction in DH substations.doc'
+#    or_extract 'Basic Word template for Picolena specs', :from => 'office2003-word-template.dot'
+#  }
+module PlainTextExtractorDSL
+  attr_reader :exts, :mime_name, :description, :command, :content_and_file_examples
+  def initialize(&block)
+    @content_and_file_examples=[]
+    self.instance_eval(&block)
+    PlainTextExtractor.add(self)
+    MimeType.add(self.exts,self.mime_name)
+  end
+  def every(*exts)
+    @exts=exts
+  end
+  def as(mime_name)
+    @mime_name=mime_name
+  end
+  def aka(description)
+    @description=description
+  end
+  def which_requires(*dependencies)
+    @dependencies=dependencies
+  end
+  #used by rspec to test extractors:
+  #  which_should_for_example_extract 'in a pdf file', :from => 'basic.pdf'
+  #  or_extract 'some other stuff inside another pdf file', :from => 'yet_another.pdf'
+  #
+  #this spec will pass if 'basic.pdf' and 'yet_another.pdf' are included in an indexed directory, if every dependency is installed,
+  #and if plain text output from the extractor applied to 'basic.pdf' and 'yet_another.pdf' respectively include 'in a pdf file' and 'some other stuff inside another pdf file'
+  def which_should_for_example_extract(content, file)
+    @content_and_file_examples << [content,file[:from]]
+  end
+  #it allows to define specs in this way:
+  #  which_should_for_example_extract 'Hello world!', :from => 'hello.rb'
+  #  or_extract 'text inside!', :from => 'crossed.txt'
+  alias_method :or_extract, :which_should_for_example_extract
+  def with(command_as_hash_or_string=nil,&block)
+    #TODO: Find a better way to manage platforms, and include OS X, Vista, BSD...
+    platform=case RUBY_PLATFORM
+    when /linux/
+      :on_linux
+    when /win/
+      :on_windows
+    end
+    @command=case command_as_hash_or_string
+    when String
+      command_as_hash_or_string
+    when Hash
+      #dup must be used, otherwise @command gets frozen. No idea why though....
+      command_as_hash_or_string.invert[platform].dup
+    else
+      block || raise("No command defined for this extractor: #{description}")
+    end
+    @command<<' 2>/dev/null' if (@command.is_a?(String) && platform==:on_linux && !@command.include?('|'))
+  end
+end

data/lib/picolena/templates/lib/{filters → plain_text_extractors}/adobe.pdf.rb RENAMED Viewed

@@ -4,10 +4,10 @@
 #   Installation: Ubuntu  xpdf-utils package
 #   Home page: http://www.foolabs.com/xpdf/
-PlainText.extract {
-  from :pdf
+PlainTextExtractor.new {
+  every :pdf
   as "application/pdf"
   aka "Adobe Portable Document Format"
   with "pdftotext -enc UTF-8 SOURCE -" => :on_linux, "some other command" => :on_windows
   which_should_for_example_extract 'in a pdf file', :from => 'basic.pdf'
-}
+}

data/lib/picolena/templates/lib/{filters → plain_text_extractors}/html.rb RENAMED Viewed

@@ -1,5 +1,5 @@
-PlainText.extract {
-  from :html, :htm
+PlainTextExtractor.new {
+  every :html, :htm
   as "text/html"
   aka "HyperText Markup Language document"
   with {|source|

data/lib/picolena/templates/lib/{filters → plain_text_extractors}/ms.excel.rb RENAMED Viewed

@@ -1,7 +1,7 @@
 #Excel 97-2003
-PlainText.extract {
-  from :xls
+PlainTextExtractor.new {
+  every :xls
   as "application/excel"
   aka "Microsoft Office Excel document"
   with "xls2csv SOURCE 2>/dev/null | grep -i [a-z] | sed -e 's/\"//g' -e 's/,*$//' -e 's/,/ /g'" => :on_linux, "some other command" => :on_windows
@@ -11,8 +11,8 @@ PlainText.extract {
 #Excel 2007
 require 'zip/zip'
-PlainText.extract {
-  from :xlsx
+PlainTextExtractor.new {
+  every :xlsx
   as 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
   aka "Microsoft Office 2007 Excel spreadsheet"
   with {|source|

data/lib/picolena/templates/lib/{filters → plain_text_extractors}/ms.powerpoint.rb RENAMED Viewed

@@ -1,7 +1,7 @@
 #Powerpoint 97-2003
-PlainText.extract {
-  from :ppt, :pps
+PlainTextExtractor.new {
+  every :ppt, :pps
   as "application/powerpoint"
   aka "Microsoft Office Powerpoint document"
   with "catppt SOURCE" => :on_linux, "some other command" => :on_windows
@@ -13,8 +13,8 @@ PlainText.extract {
 #Powerpoint 2007
 require 'zip/zip'
-PlainText.extract {
-  from :pptx
+PlainTextExtractor.new {
+  every :pptx
   as 'application/vnd.openxmlformats-officedocument.presentationml.presentation' #could that mime BE any longer?
   aka "Microsoft Office 2007 Powerpoint document"
   with {|source|

data/lib/picolena/templates/lib/{filters → plain_text_extractors}/ms.rtf.rb RENAMED Viewed

@@ -4,8 +4,8 @@
 #   Installation: Ubuntu unrtf package
 #   http://www.gnu.org/software/unrtf/unrtf.html
-PlainText.extract {
-  from :rtf
+PlainTextExtractor.new {
+  every :rtf
   as "application/rtf"
   aka "Microsoft Rich Text Format"
   with "unrtf  SOURCE -t text" => :on_linux, "some other command" => :on_windows