RubyGems - libsvm_preprocessor - Versions diffs - 0.1 - Mend

libsvm_preprocessor 0.1

Files changed (18) hide show

checksums.yaml +7 -0
data/LICENSE +19 -0
data/README.md +0 -0
data/Rakefile +5 -0
data/bin/libsvm_pp +33 -0
data/lib/libsvm_preprocessor/cli.rb +57 -0
data/lib/libsvm_preprocessor/feature_generator.rb +45 -0
data/lib/libsvm_preprocessor/global.rb +3 -0
data/lib/libsvm_preprocessor/preprocessor.rb +136 -0
data/lib/libsvm_preprocessor/token_map.rb +30 -0
data/lib/libsvm_preprocessor/tokenizer.rb +44 -0
data/lib/libsvm_preprocessor/version.rb +3 -0
data/lib/libsvm_prerpocessor.rb +1 -0
data/spec/feature_generator_spec.rb +58 -0
data/spec/preprocessor_spec.rb +111 -0
data/spec/token_map_spec.rb +60 -0
data/spec/tokenizer_spec.rb +36 -0
metadata +90 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+!binary "U0hBMQ==":
+  metadata.gz: ec1de3e3e31391a33e628f4dc4c6ace1b9c96cd3
+  data.tar.gz: e897e7149c5ace324fba715154402da3efc075d9
+!binary "U0hBNTEy":
+  metadata.gz: 122a7ad95b42b0b2429aa69d8cac690c90800f870f851ac56a665c69e2ba933cb770b484f45ada754f4a7336a1e2e28c98ce8407f27ab7a98797aca8b8562613
+  data.tar.gz: f725942158aab1a7d8a34105ccad1639664e961f99739f5a05bbe67da1089ef4427200721f0ba527d1aa7fff2b3c52b158b95e2ccbaa84d4bfac64cc847879c4

data/LICENSE ADDED Viewed

@@ -0,0 +1,19 @@
+Copyright (c) 2013 by Andrea Nodari
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.

data/README.md ADDED Viewed

File without changes

data/Rakefile ADDED Viewed

@@ -0,0 +1,5 @@
+require 'rspec/core/rake_task'
+RSpec::Core::RakeTask.new(:spec)
+task :default => :spec

data/bin/libsvm_pp ADDED Viewed

@@ -0,0 +1,33 @@
+#!/usr/bin/env ruby
+# encoding: utf-8
+if RUBY_VERSION < '2.0.0'
+  puts 'This gem supports only Ruby 2.0.0+'
+  exit 1
+else
+  $LOAD_PATH.unshift(File.dirname(File.realpath(__FILE__)) + '/../lib')
+  require 'csv'
+  require 'libsvm_preprocessor/preprocessor'
+  require 'libsvm_preprocessor/cli'
+  options = CLI.parse(ARGV)
+  if !File.exist? ARGV[0]
+    puts "Please insert a real input file."
+    exit 1
+  end
+  preprocessor = Preprocessor.new(options)
+  preprocessor.use(ARGV[0], testing: options[:testing])
+end
+# output_dir = File.dirname(File.realpath(__FILE__)) + '/../output'
+# input_test  = ARGV[1]
+# output_test_path = "#{OUTPUT_DIR}/test.svm"
+# output_test  = File.open(output_test_path, "w")
+# CSV.foreach(input_test, OPTIONS_INPUT) do |row|
+#   vector = processor.toSVM(processor.push(row, testing: true))
+#   output_test.puts vector
+# end
+# output_test.close

data/lib/libsvm_preprocessor/cli.rb ADDED Viewed

@@ -0,0 +1,57 @@
+require 'optparse'
+class CLI
+  def self.parse(args)
+    options = {}
+    options[:mode]         = :unigram
+    options[:lang]         = :it
+    options[:stemming]     = false
+    options[:stopwords]    = false
+    options[:testing]      = false
+    options[:numeric_type] = nil
+    options[:output]       = nil
+    opt_parser = OptionParser.new do |opts|
+      opts.banner = "libsvm_pp [options] <filename>"
+      opts.on("-m [TYPE]", "--mode [TYPE]", [:unigram, :bigram],
+              "Select unigram (default) or bigram") do |mode|
+        options[:mode] = mode
+      end
+      opts.on("-s", "--stemming", "Use this you want stemming") do |s|
+        options[:stemming] = s
+      end
+      opts.on("-w", "--remove-stopwords",
+              "Use this if you want remove stopwords") do |w|
+        options[:stopwords] = w
+      end
+      opts.on("-t", "--testing",
+              "Use this to use testing mode") do |t|
+        options[:testing] = t
+      end
+      opts.on("-l [TYPE]", "--language", [:it, :en],
+              "Select your language it / en") do |l|
+        options[:lang] = l
+      end
+      opts.on("-n N", Integer, "Numeric mode") do |n|
+        options[:numeric_type] = n
+      end
+      opts.on("-o [output]", String, "output file") do |o|
+        options[:output] = o
+      end
+    end
+    opt_parser.parse!(args)
+    options
+  end
+end

data/lib/libsvm_preprocessor/feature_generator.rb ADDED Viewed

@@ -0,0 +1,45 @@
+class FeatureGenerator
+  def hash_of_ngrams
+    @token_map.hash_of_ngrams
+  end
+  def initialize(options = {})
+    @token_map = TokenMap.new
+    @options = options
+    @options[:mode] ||= :unigram
+  end
+  def features(ary_of_terms, testing: false)
+    if @options[:mode] == :unigram
+      @token_map.token_map(unigrams(ary_of_terms), testing: testing)
+    elsif @options[:mode] == :bigram
+      @token_map.token_map(unigrams(ary_of_terms) +
+                           bigrams(ary_of_terms),
+                           testing: testing)
+    elsif @options[:mode] == :trichar
+      @token_map.token_map trichar(ary_of_terms)
+    end
+  end
+  def trichar(ary_of_terms)
+    string = ary_of_terms.join(" ")
+    if string.size < 3
+      return [ [string] ]
+    end
+    string1 = string[0...-2].split(//)
+    string2 = string[1...-1].split(//)
+    string3 = string[2..-1].split(//)
+    string1.zip(string2).zip(string3).map do |x|
+      [x.flatten.join]
+    end
+  end
+  def unigrams(ary_of_term)
+    ary_of_term.map { |term| [term] }
+  end
+  def bigrams(ary)
+    ary[0...-1].zip(ary[1..-1])
+  end
+end

data/lib/libsvm_preprocessor/global.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module LibsvmPreprocessor
+  CSV_OPTIONS  = { col_sep: "\t", headers: false }
+end

data/lib/libsvm_preprocessor/preprocessor.rb ADDED Viewed

@@ -0,0 +1,136 @@
+require 'libsvm_preprocessor/tokenizer'
+require 'libsvm_preprocessor/token_map'
+require 'libsvm_preprocessor/feature_generator'
+require 'libsvm_preprocessor/global'
+class Preprocessor
+  attr_reader :categories
+  attr_reader :instances
+  attr_reader :non_zero_features
+  OPTIONS_MAP = {
+    0  => { lang: "it", mode: :unigram, stemming: false, stopword: false },
+    1  => { lang: "it", mode: :bigram, stemming: false, stopword: false },
+    2  => { lang: "it", mode: :unigram, stemming: true, stopword: false },
+    3  => { lang: "it", mode: :bigram, stemming: true, stopword: false },
+    4  => { lang: "it", mode: :unigram, stemming: false, stopword: true },
+    5  => { lang: "it", mode: :bigram, stemming: false, stopword: true },
+    6  => { lang: "it", mode: :unigram, stemming: true, stopword: true },
+    7  => { lang: "it", mode: :bigram, stemming: true, stopword: true },
+    8  => { lang: "it", mode: :trichar, stemming: true, stopword: true },
+    9  => { lang: "it", mode: :trichar, stemming: true, stopword: false },
+    10 => { lang: "it", mode: :trichar, stemming: false, stopword: true },
+    11 => { lang: "it", mode: :trichar, stemming: false, stopword: false },
+  }
+  def hash_of_ngrams
+    @generator.hash_of_ngrams
+  end
+  def override_options(options)
+    OPTIONS_MAP[options[:numeric_type]]
+  end
+  def self.options_map_size
+    OPTIONS_MAP.size
+  end
+  def self.options_map(key)
+    OPTIONS_MAP[key].map { |k, v| "#{k}: #{v}"}.join(" | ")
+  end
+  def options
+    @options
+  end
+  def initialize(options = {})
+    if options[:numeric_type]
+      options = override_options(options)
+    end
+    @options = options
+    @tokenizer  = Tokenizer.new(options)
+    @generator  = FeatureGenerator.new(options)
+    @non_zero_features = {}
+    @non_zero_features[:testing]  = 0
+    @non_zero_features[:training] = 0
+    @instances  = {}
+    @instances[:testing]  = []
+    @instances[:training] = []
+    @categories = {}
+    @current_category_id = -1
+  end
+  def push(data, testing: false)
+    category, string = data
+    # If it is a new category I need to associate a new id
+    if !@categories[category]
+      @categories[category] = next_category_id
+    end
+    v = vectorize(category, string, testing: testing)
+    if testing
+      @instances[:testing] << v
+      @non_zero_features[:testing] += v.last.size
+    else
+      @instances[:training] << v
+      @non_zero_features[:training] += v.last.size
+    end
+    return v
+  end
+  def toSVM(vector)
+    # the following line is made to have clean diff with libshorttext
+    return "#{vector.first} " if vector.last.empty?
+    features = vector.last
+      .map {|h| "#{h.keys.first}:#{h[h.keys.first]}"}.join(" ")
+    "#{vector.first}  #{features}"
+  end
+  # This method is only meant to stringify the vector in very same
+  # format of libsvm (in this way diff does not mess up)
+  def nice_string(v)
+    return v.join("  ") if v[1] != ""
+    return "#{v[0]} "
+  end
+  def use(input_path, testing: false)
+    if @options[:output]
+      output_file = File.open(@options.output, "w")
+      CSV.foreach(input_path, ::LibsvmPreprocessor::CSV_OPTIONS) do |row|
+        output_file.puts toSVM( push(row, testing: testing) )
+      end
+      output_file.close
+    else
+      CSV.foreach(input_path, ::LibsvmPreprocessor::CSV_OPTIONS) do |row|
+        puts toSVM( push(row, testing: testing) )
+      end
+    end
+  end
+  private
+  def vectorize(category, string, testing: false)
+    tokens   = @tokenizer.tokenize(string)
+    features = @generator.features(tokens, testing: testing)
+    ids_with_frequency = count_frequency(features)
+    [ @categories[category], ids_with_frequency ]
+  end
+  def count_frequency(features)
+    ids = features.map { |x| x.keys.first }.sort
+    result = ids.uniq.map do |id|
+      { id => ids.count(id) }
+    end
+    result
+  end
+  # Give the next category id available
+  def next_category_id
+    @current_category_id += 1
+  end
+end

data/lib/libsvm_preprocessor/token_map.rb ADDED Viewed

@@ -0,0 +1,30 @@
+class TokenMap
+  attr_reader :hash_of_ngrams
+  def initialize
+    @hash_of_ngrams = {}
+    @current_ngram_id = 0
+  end
+  def token_map(ary_of_ngrams, testing: false)
+    if !testing
+      ary_of_ngrams.each { |ngram| @hash_of_ngrams[ngram] ||= next_ngram_id }
+      ary_of_ngrams.map { |ngram| { @hash_of_ngrams[ngram] => ngram } }
+    else
+      ary_of_ngrams.map do |ngram|
+        { @hash_of_ngrams[ngram] => ngram }
+      end.select do |hash|
+        hash.keys.first
+      end
+    end
+  end
+  private
+  # Give the next term id available
+  def next_ngram_id
+    @current_ngram_id += 1
+  end
+end

data/lib/libsvm_preprocessor/tokenizer.rb ADDED Viewed

@@ -0,0 +1,44 @@
+require 'lingua/stemmer'
+require 'stopwords'
+require 'unicode'
+class Tokenizer
+  def initialize(options = {})
+    @options = options
+    @options[:stopword] ||= false
+    @options[:stemming] ||= false
+    @options[:lang]     ||= "it"
+    @filter  = Stopwords::Snowball::Filter.new(@options[:lang])
+    @stemmer = Lingua::Stemmer.new(language: @options[:lang])
+  end
+  def tokenize(string)
+    result = process_text(string)
+    result = remove_stopwords(result) if @options[:stopword]
+    result = stem_each(result) if @options[:stemming]
+    result
+  end
+  def process_text(string)
+    string.downcase!
+    string = Unicode.nfd(string)
+    string.gsub!(/[^[:alpha:]]/, ' ')
+    string.gsub!(/([a-z])([0-9])/, '\1 \2')
+    string.gsub!(/([0-9])([a-z])/, '\1 \2')
+    string.gsub!(/\s+/, ' ')
+    string.strip!
+    string.split(' ')
+  end
+  # Remove stopwords according to the selected language
+  def remove_stopwords(ary)
+    @filter.filter(ary)
+  end
+  # Stem each word according to the selected language
+  def stem_each(ary)
+    ary.map { |term| @stemmer.stem(term) }
+  end
+end

data/lib/libsvm_preprocessor/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module LibsvmPreprocessor
+  VERSION = '0.1'
+end

data/lib/libsvm_prerpocessor.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require 'libsvm_preprocessor/preprocessor'

data/spec/feature_generator_spec.rb ADDED Viewed

@@ -0,0 +1,58 @@
+require 'rspec'
+require 'libsvm_preprocessor/preprocessor'
+describe FeatureGenerator do
+  let(:ary_of_terms) { ["a","b","c"] }
+  let(:ary) { ["mar","rosso"] }
+  context "with default options" do
+    let(:generator) { FeatureGenerator.new }
+    it "use unigrams" do
+      expected = [{1=>["a"]}, {2=>["b"]}, {3=>["c"]}]
+      expect(generator.features(ary_of_terms)).to eq(expected)
+    end
+  end
+  context "using bigrams" do
+    let(:generator) { FeatureGenerator.new(:mode => :bigram) }
+    it "use bigrams" do
+      expected = [{1=>["a"]}, {2=>["b"]}, {3=>["c"]}, {4=>["a","b"]}, {5=>["b","c"]}]
+      expect(generator.features(ary_of_terms)).to eq(expected)
+    end
+    it "use ingnore duplicates" do
+      expected = [{1=>["a"]}, {1=>["a"]}, {2=>["a","a"]}]
+      expect(generator.features(["a","a"])).to eq(expected)
+    end
+  end
+  context "using trichar" do
+    let(:generator) { FeatureGenerator.new(:mode => :trichar) }
+    it "use trichar" do
+      expected = [{1=>["mar"]}, {2=>["ar "]}, {3=>["r r"]}, {4=> [" ro"]}, {5=>["ros"]}, {6=>["oss"]}, {7=> ["sso"]}]
+      expect(generator.features(ary)).to eq(expected)
+    end
+    it "ignore duplicates" do
+      expected = [{1=>["aaa"]}, {1=>["aaa"]},{1=>["aaa"]}]
+      expect(generator.features(["aaaaa"])).to eq(expected)
+    end
+    it "workarounds little word" do
+      expected = [{1 => ["te"]}]
+      expect(generator.features(["te"])).to eq(expected)
+    end
+    it "workarounds little words" do
+      expected = [{1 => ["te "]}, {2 => ["e n"]}, {3 => [" ne"]}]
+      expect(generator.features(["te", "ne"])).to eq(expected)
+    end
+  end
+end

data/spec/preprocessor_spec.rb ADDED Viewed

@@ -0,0 +1,111 @@
+require 'rspec'
+require 'libsvm_preprocessor/preprocessor'
+describe Preprocessor do
+  describe "default settings" do
+    let(:preproc) { Preprocessor.new }
+    let(:p_trichar) { Preprocessor.new(mode: :trichar) }
+    context "adding a text" do
+      it "maps new categories" do
+        preproc.push ["category", "bottiglia"]
+        expect(preproc.categories["category"]).to eq 0
+      end
+    end
+    context "with default settings" do
+      it "produce a new vector" do
+        v = (preproc.push ["category", "bottiglia"])
+        expect(v).to eq([0, [{1 => 1}]])
+      end
+      it "takes into account frequencies" do
+        v = (preproc.push ["category", "bottiglia bottiglia bottiglia"])
+        expect(v).to eq([0, [{1 => 3}]])
+      end
+      it "produce svm format" do
+        v = (preproc.push ["category", "bottiglia bottiglia bottiglia"])
+        result = preproc.toSVM(v)
+        expect(result).to eq("0  1:3")
+      end
+    end
+    context "with trichar mode" do
+      it "produce a new vector with frequencies" do
+        v = (p_trichar.push ["category", "osso osso"])
+        expect(v).to eq([0, [{1 => 2}, {2 => 2}, {3 => 1}, {4 => 1}, {5 => 1}]])
+      end
+    end
+    context "when I am testing" do
+      it "ignore new words" do
+        v = preproc.push(["category", "bottiglia"], testing: true)
+        expect(v).to eq([0, []])
+      end
+      it "remembers the old ones" do
+        preproc.push(["category", "bottiglia"], testing: false)
+        v = preproc.push(["category", "bottiglia vetro"], testing: true)
+        expect(v).to eq([0, [{1 => 1}]])
+      end
+      it "produce svm format with blank features" do
+        v = preproc.push(["category", "bottiglia"], testing: true)
+        result = preproc.toSVM(v)
+        expect(result).to eq("0 ")
+      end
+    end
+  end
+  describe "using bigrams as feature" do
+    let(:preproc) { Preprocessor.new(mode: :bigram) }
+    context "adding a text" do
+      it "maps new categories" do
+        preproc.push ["category", "bottiglia"]
+        expect(preproc.categories["category"]).to eq 0
+      end
+    end
+    context "simple vectorization" do
+      it "produce a new vector" do
+        v = (preproc.push ["category", "bottiglia"])
+        expect(v).to eq([0, [{1 => 1}]])
+      end
+      it "takes into account frequencies" do
+        v = (preproc.push ["category", "bottiglia bottiglia bottiglia"])
+        expect(v).to eq([0, [{1 => 3}, {2 => 2}]])
+      end
+      it "produce svm format" do
+        v = (preproc.push ["category", "bottiglia bottiglia bottiglia"])
+        result = preproc.toSVM(v)
+        expect(result).to eq("0  1:3 2:2")
+      end
+    end
+    context "when I am testing" do
+      it "ignore new words" do
+        v = preproc.push(["category", "bottiglia"], testing: true)
+        expect(v).to eq([0, []])
+      end
+      it "remembers the old ones" do
+        preproc.push(["category", "bottiglia"], testing: false)
+        v = preproc.push(["category", "bottiglia vetro"], testing: true)
+        expect(v).to eq([0, [{1 => 1}]])
+      end
+      it "produce svm format with blank features" do
+        v = preproc.push(["category", "bottiglia"], testing: true)
+        result = preproc.toSVM(v)
+        expect(result).to eq("0 ")
+      end
+    end
+  end
+end

data/spec/token_map_spec.rb ADDED Viewed

@@ -0,0 +1,60 @@
+require 'rspec'
+require 'libsvm_preprocessor/preprocessor'
+describe TokenMap do
+  let(:token_map) { TokenMap.new }
+  context "it maps terms in new ids" do
+    it "maps new tokens" do
+      ngrams = token_map.token_map([["bottiglia"],["di"],["vetro"]])
+      expected = [{1 => ["bottiglia"]}, {2 => ["di"]}, {3 => ["vetro"]}]
+      expect(ngrams).to eq(expected)
+    end
+  end
+  context "it remembers old ids" do
+    it "maps new tokens" do
+      token_map.token_map([["bottiglia"],["di"],["vetro"]])
+      ngrams = token_map.token_map([["bottiglia"],["di"],["plastica"]])
+      expected = [{1 => ["bottiglia"]}, {2 => ["di"]}, {4 => ["plastica"]}]
+      expect(ngrams).to eq(expected)
+    end
+  end
+  context "it remembers old ids also with other trichars" do
+    it "maps new tokens" do
+      token_map.token_map([["abc"],["bc "],["c a"],[" ab"],["abc"]])
+      ngrams = token_map.token_map([["abc"],["c a"],["bot"]])
+      expected = [{1 => ["abc"]}, {3 => ["c a"]}, {5 => ["bot"]}]
+      expect(ngrams).to eq(expected)
+    end
+  end
+  context "it ignores duplicates" do
+    it "maps new tokens" do
+      ngrams = token_map.token_map([["bottiglia"],["di"],["plastica"],["plastica"]])
+      expected = [{1 => ["bottiglia"]}, {2 => ["di"]}, {3 => ["plastica"]}, {3 => ["plastica"]}]
+      expect(ngrams).to eq(expected)
+    end
+  end
+  context "if I am creating a test file" do
+    it "does not consider new terms" do
+      token_map.token_map([["bottiglia"],["di"],["plastica"]])
+      ngrams = token_map.token_map([["polenta"],["valsugana"]], testing: true)
+      expected = []
+      expect(ngrams).to eq(expected)
+    end
+    it "does not consider new terms but remembers the old ones" do
+      token_map.token_map([["bottiglia"],["di"],["plastica"]])
+      ngrams = token_map.token_map([["tappo"],["plastica"]], testing: true)
+      expected = [{3 => ["plastica"]}]
+      expect(ngrams).to eq(expected)
+    end
+  end
+end

data/spec/tokenizer_spec.rb ADDED Viewed

@@ -0,0 +1,36 @@
+require 'rspec'
+require 'libsvm_preprocessor/preprocessor'
+describe Tokenizer do
+  let(:tokenizer) { Tokenizer.new }
+  context "tokenizer with default settings" do
+    it "tokenize a single word" do
+      tokens = tokenizer.tokenize("bottiglia")
+      expect(tokens).to eq(["bottiglia"])
+    end
+    it "tokenize multiple words" do
+      tokens = tokenizer.tokenize("bottiglia")
+      expect(tokens).to eq(["bottiglia"])
+    end
+  end
+  context "tokenizer with stopword removal" do
+    let(:tokenizer) { Tokenizer.new(stopword: true) }
+    it "tokenize removing stopwords" do
+      tokens = tokenizer.tokenize("bottiglia di vetro")
+      expect(tokens).to eq(["bottiglia", "vetro"])
+    end
+  end
+  context "tokenizer with stopword removal" do
+    let(:tokenizer) { Tokenizer.new(stemming: true) }
+    it "tokenize stemming each word" do
+      tokens = tokenizer.tokenize("bottiglia di vetro")
+      expect(tokens).to eq(["bottigl", "di", "vetr"])
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,90 @@
+--- !ruby/object:Gem::Specification
+name: libsvm_preprocessor
+version: !ruby/object:Gem::Version
+  version: '0.1'
+platform: ruby
+authors:
+- Andrea Nodari
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2013-05-31 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: stopwords-filter
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.2.1
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.2.1
+- !ruby/object:Gem::Dependency
+  name: ruby-stemmer
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.9.3
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.9.3
+description: |2
+      It's a text preprocessor that generate a libsvm input file
+email: andrea.nodari91@gmail.com
+executables:
+- libsvm_pp
+extensions: []
+extra_rdoc_files: []
+files:
+- README.md
+- Rakefile
+- LICENSE
+- lib/libsvm_preprocessor/cli.rb
+- lib/libsvm_preprocessor/feature_generator.rb
+- lib/libsvm_preprocessor/global.rb
+- lib/libsvm_preprocessor/preprocessor.rb
+- lib/libsvm_preprocessor/token_map.rb
+- lib/libsvm_preprocessor/tokenizer.rb
+- lib/libsvm_preprocessor/version.rb
+- lib/libsvm_prerpocessor.rb
+- bin/libsvm_pp
+- spec/feature_generator_spec.rb
+- spec/preprocessor_spec.rb
+- spec/token_map_spec.rb
+- spec/tokenizer_spec.rb
+homepage: http://github.com/nodo/libsvm_preprocessor
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.0.0.preview3.1
+signing_key:
+specification_version: 4
+summary: It's a text preprocessor that generate a libsvm input file
+test_files: []
+has_rdoc: false