RubyGems - tf-idf-similarity - Versions diffs - 0.1.4 → 0.1.5 - Mend

tf-idf-similarity 0.1.4 → 0.1.5

Files changed (21) hide show

checksums.yaml +4 -4
data/.rspec +2 -0
data/.travis.yml +13 -9
data/.yardopts +0 -1
data/Gemfile +3 -3
data/LICENSE +1 -1
data/README.md +49 -23
data/lib/tf-idf-similarity.rb +2 -6
data/lib/tf-idf-similarity/bm25_model.rb +1 -1
data/lib/tf-idf-similarity/document.rb +1 -1
data/lib/tf-idf-similarity/extras/tf_idf_model.rb +1 -1
data/lib/tf-idf-similarity/matrix_methods.rb +1 -1
data/lib/tf-idf-similarity/token.rb +3 -6
data/lib/tf-idf-similarity/version.rb +1 -1
data/spec/extras/tf_idf_model_spec.rb +3 -3
data/spec/spec_helper.rb +5 -1
data/spec/token_spec.rb +8 -0
data/td-idf-similarity.gemspec +6 -8
metadata +17 -20
data/USAGE +0 -1
data/ext/mkrf_conf.rb +0 -15

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: c0ba1f941db96541f035a283df336907bf941439
-  data.tar.gz: 22bbec24681023e880e1e4e3fa14d26356630021
+  metadata.gz: 736ca4c4b93d14ea046cbc4bdae930c8b88082be
+  data.tar.gz: 6b43e8356c59e0f48ac08f300186d4e12497368d
 SHA512:
-  metadata.gz: 9e7cca8d705d8080dff857d2d953a6f0091e361bb0693f0ce650e64a2f4633ad5db386fa41ea8b73ae1cfe839db8e4e9f56592c98b36cdc6ab756699ecfaa5f7
-  data.tar.gz: 3bcb9dcb07c9eb00c234920ff8d6340aac815c8181510d7ae65183e9b1d528001247439a86c6b603d973362bcee020eb0340558a9318693713cdaaa4b62a2ffd
+  metadata.gz: 635ea3047ba54a951020f95ab7e9412adf07a39d6042b85e605fcd0517345d506690bac11ab05f7f20e16f80106e95e8002fd1ae2ab4e466a27cc4f143ac15d6
+  data.tar.gz: 693ac6c70f9daf3f0a1ed06ba693d170654e7c871702641d598e59a6fc69cbd5316e76441da062aca05d0b8f67c0ba0c958a4115e2c9da17316a2ebef2190738

data/.rspec ADDED

	@@ -0,0 +1,2 @@
1	+ --color
2	+ --require spec_helper

data/.travis.yml CHANGED

@@ -1,21 +1,25 @@
+sudo: false
 language: ruby
+cache: bundler
 rvm:
-  - 1.9.2
   - 1.9.3
   - 2.0.0
   - 2.1.0
+  - 2.2.0
 env:
   - MATRIX_LIBRARY=gsl
   - MATRIX_LIBRARY=narray
   - MATRIX_LIBRARY=nmatrix
   - MATRIX_LIBRARY=matrix
+addons:
+  apt:
+    packages:
+    - gsl-bin
+    - libgsl0-dev
+    # Installing ATLAS will install BLAS.
+    - libatlas-dev
+    - libatlas-base-dev
+    - libatlas3gf-base
 before_install:
   - bundle config build.nmatrix --with-lapacklib
-  - if [ $MATRIX_LIBRARY = 'nmatrix' -o $MATRIX_LIBRARY = 'gsl' ]; then sudo apt-get update -qq; fi
-  - if [ $MATRIX_LIBRARY = 'gsl' ]; then sudo apt-get install gsl-bin libgsl0-dev; fi
-  # Installing ATLAS will install BLAS.
-  - if [ $MATRIX_LIBRARY = 'nmatrix' ]; then sudo apt-get install -qq libatlas-dev libatlas-base-dev libatlas3gf-base; fi
-  - if [ $MATRIX_LIBRARY = 'nmatrix' ]; then export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/atlas; fi
-# Travis sometimes runs without Bundler.
-install: bundle
-script: bundle exec rake --trace
+  - export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/atlas

data/.yardopts CHANGED

@@ -1,4 +1,3 @@
---no-private
 --hide-void-return
 --embed-mixin ClassMethods
 --markup=markdown

data/Gemfile CHANGED

@@ -1,8 +1,8 @@
-source "http://rubygems.org"
+source 'https://rubygems.org'
-gem 'rb-gsl', '~> 1.16.0.2'     if ENV['MATRIX_LIBRARY'] == 'gsl'
+gem 'rb-gsl', '~> 1.16.0.2' if ENV['MATRIX_LIBRARY'] == 'gsl'
 gem 'narray', '~> 0.6.0.0' if ENV['MATRIX_LIBRARY'] == 'narray'
-gem 'nmatrix', '~> 0.1.0.rc5'  if ENV['MATRIX_LIBRARY'] == 'nmatrix' && RUBY_VERSION >= '1.9'
+gem 'nmatrix', '~> 0.1.0.rc5' if ENV['MATRIX_LIBRARY'] == 'nmatrix' && RUBY_VERSION >= '1.9'
 # Specify your gem's dependencies in the gemspec
 gemspec

data/LICENSE CHANGED

@@ -1,4 +1,4 @@
-Copyright (c) 2012 Open North Inc.
+Copyright (c) 2012 James McKinney
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the

data/README.md CHANGED

@@ -1,12 +1,12 @@
 # Ruby Vector Space Model (VSM) with tf*idf weights
-[![Gem Version](https://badge.fury.io/rb/tf-idf-similarity.svg)](http://badge.fury.io/rb/tf-idf-similarity)
-[![Build Status](https://secure.travis-ci.org/opennorth/tf-idf-similarity.png)](http://travis-ci.org/opennorth/tf-idf-similarity)
-[![Dependency Status](https://gemnasium.com/opennorth/tf-idf-similarity.png)](https://gemnasium.com/opennorth/tf-idf-similarity)
-[![Coverage Status](https://coveralls.io/repos/opennorth/tf-idf-similarity/badge.png?branch=master)](https://coveralls.io/r/opennorth/tf-idf-similarity)
-[![Code Climate](https://codeclimate.com/github/opennorth/tf-idf-similarity.png)](https://codeclimate.com/github/opennorth/tf-idf-similarity)
+[![Gem Version](https://badge.fury.io/rb/tf-idf-similarity.svg)](https://badge.fury.io/rb/tf-idf-similarity)
+[![Build Status](https://secure.travis-ci.org/jpmckinney/tf-idf-similarity.png)](https://travis-ci.org/jpmckinney/tf-idf-similarity)
+[![Dependency Status](https://gemnasium.com/jpmckinney/tf-idf-similarity.png)](https://gemnasium.com/jpmckinney/tf-idf-similarity)
+[![Coverage Status](https://coveralls.io/repos/jpmckinney/tf-idf-similarity/badge.png)](https://coveralls.io/r/jpmckinney/tf-idf-similarity)
+[![Code Climate](https://codeclimate.com/github/jpmckinney/tf-idf-similarity.png)](https://codeclimate.com/github/jpmckinney/tf-idf-similarity)
-Calculates the similarity between texts using a [bag-of-words](http://en.wikipedia.org/wiki/Bag_of_words_model) [Vector Space Model](http://en.wikipedia.org/wiki/Vector_space_model) with [Term Frequency-Inverse Document Frequency (tf*idf)](http://en.wikipedia.org/wiki/Tf*idf) weights. If your use case demands performance, use [Lucene](http://lucene.apache.org/core/) (see below).
+Calculates the similarity between texts using a [bag-of-words](https://en.wikipedia.org/wiki/Bag_of_words_model) [Vector Space Model](https://en.wikipedia.org/wiki/Vector_space_model) with [Term Frequency-Inverse Document Frequency (tf*idf)](https://en.wikipedia.org/wiki/Tf–idf) weights. If your use case demands performance, use [Lucene](http://lucene.apache.org/core/) (see below).
 ## Usage
@@ -24,13 +24,13 @@ document3 = TfIdfSimilarity::Document.new("Nam scelerisque dui sed leo...")
 corpus = [document1, document2, document3]
 ```
-Create a document-term matrix using [Term Frequency-Inverse Document Frequency function](http://en.wikipedia.org/wiki/):
+Create a document-term matrix using [Term Frequency-Inverse Document Frequency function](https://en.wikipedia.org/wiki/Tf–idf):
 ```ruby
 model = TfIdfSimilarity::TfIdfModel.new(corpus)
 ```
-Or, create a document-term matrix using the [Okapi BM25 ranking function](http://en.wikipedia.org/wiki/Okapi_BM25):
+Or, create a document-term matrix using the [Okapi BM25 ranking function](https://en.wikipedia.org/wiki/Okapi_BM25):
 ```ruby
 model = TfIdfSimilarity::BM25Model.new(corpus)
@@ -58,16 +58,46 @@ end
 puts tfidf_by_term.sort_by{|_,tfidf| -tfidf}
 ```
+Tokenize a document yourself, for example by excluding stop words:
+```ruby
+require 'unicode_utils'
+text = "Lorem ipsum dolor sit amet..."
+tokens = UnicodeUtils.each_word(text).to_a - ['and', 'the', 'to']
+document1 = TfIdfSimilarity::Document.new(text, :tokens => tokens)
+```
+Provide, by yourself, the number of times each term appears and the number of tokens in the document:
+```ruby
+require 'unicode_utils'
+text = "Lorem ipsum dolor sit amet..."
+tokens = UnicodeUtils.each_word(text).to_a - ['and', 'the', 'to']
+term_counts = Hash.new(0)
+size = 0
+tokens.each do |token|
+  # Unless the token is numeric.
+  unless token[/\A\d+\z/]
+    # Remove all punctuation from tokens.
+    term_counts[token.gsub(/\p{Punct}/, '')] += 1
+    size += 1
+  end
+end
+document1 = TfIdfSimilarity::Document.new(text, :term_counts => term_counts, :size => size)
+```
 [Read the documentation at RubyDoc.info.](http://rubydoc.info/gems/tf-idf-similarity)
 ## Speed
 Instead of using the Ruby Standard Library's [Matrix](http://www.ruby-doc.org/stdlib-2.0/libdoc/matrix/rdoc/Matrix.html) class, you can use one of the [GNU Scientific Library (GSL)](http://www.gnu.org/software/gsl/), [NArray](http://narray.rubyforge.org/) or [NMatrix](https://github.com/SciRuby/nmatrix) (0.0.9 or greater) gems for faster matrix operations. For example:
-    require 'gsl'
-    model = TfIdfSimilarity::TfIdfModel.new(corpus, :library => :gsl)
+    require 'narray'
+    model = TfIdfSimilarity::TfIdfModel.new(corpus, :library => :narray)
-The NMatrix gem gives access to [Automatically Tuned Linear Algebra Software (ATLAS)](http://math-atlas.sourceforge.net/), which you may know of through [Linear Algebra PACKage (LAPACK)](http://www.netlib.org/lapack/) or [Basic Linear Algebra Subprograms (BLAS)](http://www.netlib.org/blas/). Follow [these instructions](https://github.com/SciRuby/nmatrix#synopsis) to install the NMatrix gem. You may need [additional instructions for Mac OS X Lion](https://github.com/SciRuby/nmatrix/wiki/Installation).
+NArray seems to have the best performance of the three libraries.
+The NMatrix gem gives access to [Automatically Tuned Linear Algebra Software (ATLAS)](http://math-atlas.sourceforge.net/), which you may know of through [Linear Algebra PACKage (LAPACK)](http://www.netlib.org/lapack/) or [Basic Linear Algebra Subprograms (BLAS)](http://www.netlib.org/blas/). Follow [these instructions](https://github.com/SciRuby/nmatrix#installation) to install the NMatrix gem.
 ## Extras
@@ -76,7 +106,7 @@ You can access more term frequency, document frequency, and normalization formul
     require 'tf-idf-similarity/extras/document'
     require 'tf-idf-similarity/extras/tf_idf_model'
-The default tf*idf formula follows the [Lucene Conceptual Scoring Formula](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html).
+The default tf*idf formula follows the [Lucene Conceptual Scoring Formula](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html).
 ## Why?
@@ -115,17 +145,13 @@ Adapters for the following projects were also considered:
 ## Further Reading
-Lucene implements many more [similarity functions](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/Similarity.html), such as:
-* a [divergence from randomness (DFR) framework](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/DFRSimilarity.html)
-* a [framework for the family of information-based models](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/IBSimilarity.html)
-* a [language model with Bayesian smoothing using Dirichlet priors](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html)
-* a [language model with Jelinek-Mercer smoothing](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html)
-Lucene can even [combine similarity measures](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/MultiSimilarity.html).
+Lucene implements many more [similarity functions](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/Similarity.html), such as:
-## Bugs? Questions?
+* a [divergence from randomness (DFR) framework](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/DFRSimilarity.html)
+* a [framework for the family of information-based models](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/IBSimilarity.html)
+* a [language model with Bayesian smoothing using Dirichlet priors](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html)
+* a [language model with Jelinek-Mercer smoothing](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html)
-This gem's main repository is on GitHub: [http://github.com/opennorth/tf-idf-similarity](http://github.com/opennorth/tf-idf-similarity), where your contributions, forks, bug reports, feature requests, and feedback are greatly welcomed.
+Lucene can even [combine similarity measures](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/MultiSimilarity.html).
-Copyright (c) 2012 Open North Inc., released under the MIT license
+Copyright (c) 2012 James McKinney, released under the MIT license

data/lib/tf-idf-similarity.rb CHANGED

@@ -1,12 +1,8 @@
 require 'forwardable'
 require 'set'
-begin
-  require 'unicode_utils/downcase'
-  require 'unicode_utils/each_word'
-rescue LoadError
-  # Ruby 1.8
-end
+require 'unicode_utils/downcase'
+require 'unicode_utils/each_word'
 module TfIdfSimilarity
 end

data/lib/tf-idf-similarity/bm25_model.rb CHANGED

@@ -1,7 +1,7 @@
 # A document-term matrix using the BM25 function.
 #
 # @see http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/BM25Similarity.html
-# @see http://en.wikipedia.org/wiki/Okapi_BM25
+# @see https://en.wikipedia.org/wiki/Okapi_BM25
 module TfIdfSimilarity
   class BM25Model < Model
     # Return the term's inverse document frequency.

data/lib/tf-idf-similarity/document.rb CHANGED

@@ -76,7 +76,7 @@ module TfIdfSimilarity
     # @see http://unicode.org/reports/tr29/#Default_Word_Boundaries
     # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.StandardTokenizerFactory
     def tokenize(text)
-      @tokens || defined?(UnicodeUtils) && UnicodeUtils.each_word(text) || text.split(/\b/) # @todo Ruby 1.8 has no good word boundary code
+      @tokens || UnicodeUtils.each_word(text)
     end
   end
 end

data/lib/tf-idf-similarity/extras/tf_idf_model.rb CHANGED

@@ -110,7 +110,7 @@ module TfIdfSimilarity
     end
     alias_method :binary_tf, :binary_term_frequency
-    # @see http://en.wikipedia.org/wiki/Tf*idf
+    # @see https://en.wikipedia.org/wiki/Tf*idf
     # @see http://nlp.stanford.edu/IR-book/html/htmledition/maximum-tf-normalization-1.html
     def normalized_term_frequency(document, term, a = 0)
       a + (1 - a) * document.term_count(term) / document.maximum_term_count

data/lib/tf-idf-similarity/matrix_methods.rb CHANGED

@@ -34,7 +34,7 @@ module TfIdfSimilarity
           elsif column.respond_to?(:normalize)
             column.normalize
           else
-            column * (1 / Math.sqrt(column.inner_product(column))) # 1.8 does define division
+            column / Math.sqrt(column.inner_product(column))
           end
         end)
       end

data/lib/tf-idf-similarity/token.rb CHANGED

@@ -9,7 +9,7 @@
 # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WordDelimiterFilterFactory
 # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory
 module TfIdfSimilarity
-  class Token < String
+  class Token < SimpleDelegator
     # Returns a falsy value if all its characters are numbers, punctuation,
     # whitespace or control characters.
     #
@@ -35,10 +35,7 @@ module TfIdfSimilarity
     #
     # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.LowerCaseFilterFactory
     def lowercase_filter
-      self.class.new(defined?(UnicodeUtils) ? UnicodeUtils.downcase(self) : tr(
-        "ÀÁÂÃÄÅĀĂĄÇĆĈĊČÐĎĐÈÉÊËĒĔĖĘĚĜĞĠĢĤĦÌÍÎÏĨĪĬĮĴĶĹĻĽĿŁÑŃŅŇŊÒÓÔÕÖØŌŎŐŔŖŘŚŜŞŠŢŤŦÙÚÛÜŨŪŬŮŰŲŴÝŶŸŹŻŽ",
-        "àáâãäåāăąçćĉċčðďđèéêëēĕėęěĝğġģĥħìíîïĩīĭįĵķĺļľŀłñńņňŋòóôõöøōŏőŕŗřśŝşšţťŧùúûüũūŭůűųŵýŷÿźżž"
-      ).downcase)
+      self.class.new(UnicodeUtils.downcase(self))
     end
     # Returns a string with no English possessive or periods in acronyms.
@@ -47,7 +44,7 @@ module TfIdfSimilarity
     #
     # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.ClassicFilterFactory
     def classic_filter
-      self.class.new(self.gsub('.', '').chomp("'s"))
+      self.class.new(self.gsub('.', '').sub(/['`’]s\z/, ''))
     end
   end
 end

data/lib/tf-idf-similarity/version.rb CHANGED

@@ -1,3 +1,3 @@
 module TfIdfSimilarity
-  VERSION = "0.1.4"
+  VERSION = "0.1.5"
 end

data/spec/extras/tf_idf_model_spec.rb CHANGED

@@ -33,7 +33,7 @@ module TfIdfSimilarity
         build_model(documents)
       end
-      pending "Add #search"
+      skip "Add #search"
     end
     # @see https://github.com/bbcrd/Similarity/blob/master/test/test_corpus.rb
@@ -98,7 +98,7 @@ module TfIdfSimilarity
       end
       it 'should return the similarity matrix' do
-        pending "Calculate the tf*idf matrix like the similarity gem does"
+        skip "Calculate the tf*idf matrix like the similarity gem does"
       end
       it 'should return the number of documents in which a term appears' do
@@ -113,7 +113,7 @@ module TfIdfSimilarity
       end
       it 'should return the document vector' do
-        pending "Calculate the tf*idf matrix like the similarity gem does"
+        skip "Calculate the tf*idf matrix like the similarity gem does"
       end
     end

data/spec/spec_helper.rb CHANGED

@@ -1,7 +1,11 @@
 require 'rubygems'
+require 'simplecov'
 require 'coveralls'
-Coveralls.wear!
+SimpleCov.formatter = Coveralls::SimpleCov::Formatter
+SimpleCov.start do
+  add_filter 'spec'
+end
 require 'rspec'
 require File.dirname(__FILE__) + '/../lib/tf-idf-similarity'

data/spec/token_spec.rb CHANGED

@@ -28,6 +28,14 @@ module TfIdfSimilarity
         Token.new("foo's").classic_filter.should == 'foo'
       end
+      it 'should remove ending possessives with nonstandard apostrophe 1' do
+        Token.new("foo`s").classic_filter.should == 'foo'
+      end
+      it 'should remove ending possessives with nonstandard apostrophe 2' do
+        Token.new("foo’s").classic_filter.should == 'foo'
+      end
       it 'should not remove infix possessives' do
         Token.new("foo's bar").classic_filter.should == "foo's bar"
       end

data/td-idf-similarity.gemspec CHANGED

@@ -5,9 +5,8 @@ Gem::Specification.new do |s|
   s.name        = "tf-idf-similarity"
   s.version     = TfIdfSimilarity::VERSION
   s.platform    = Gem::Platform::RUBY
-  s.authors     = ["Open North"]
-  s.email       = ["info@opennorth.ca"]
-  s.homepage    = "http://github.com/opennorth/tf-idf-similarity"
+  s.authors     = ["James McKinney"]
+  s.homepage    = "https://github.com/jpmckinney/tf-idf-similarity"
   s.summary     = %q{Calculates the similarity between texts using tf*idf}
   s.license     = 'MIT'
@@ -16,10 +15,9 @@ Gem::Specification.new do |s|
   s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
   s.require_paths = ["lib"]
-  s.add_development_dependency('rspec', '~> 2.10')
-  s.add_development_dependency('rake')
-  s.add_development_dependency('coveralls')
-  s.add_development_dependency('mime-types', '~> 1.25') # 2.0 requires Ruby 1.9.2
+  s.add_runtime_dependency('unicode_utils', '~> 1.4')
-  s.extensions = ['ext/mkrf_conf.rb']
+  s.add_development_dependency('coveralls')
+  s.add_development_dependency('rake')
+  s.add_development_dependency('rspec', '~> 2.10')
 end

metadata CHANGED

@@ -1,31 +1,31 @@
 --- !ruby/object:Gem::Specification
 name: tf-idf-similarity
 version: !ruby/object:Gem::Version
-  version: 0.1.4
+  version: 0.1.5
 platform: ruby
 authors:
-- Open North
+- James McKinney
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-10-11 00:00:00.000000000 Z
+date: 2016-01-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: rspec
+  name: unicode_utils
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '2.10'
-  type: :development
+        version: '1.4'
+  type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '2.10'
+        version: '1.4'
 - !ruby/object:Gem::Dependency
-  name: rake
+  name: coveralls
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
@@ -39,7 +39,7 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '0'
 - !ruby/object:Gem::Dependency
-  name: coveralls
+  name: rake
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
@@ -53,36 +53,33 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '0'
 - !ruby/object:Gem::Dependency
-  name: mime-types
+  name: rspec
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.25'
+        version: '2.10'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.25'
+        version: '2.10'
 description:
-email:
-- info@opennorth.ca
+email:
 executables: []
-extensions:
-- ext/mkrf_conf.rb
+extensions: []
 extra_rdoc_files: []
 files:
 - ".gitignore"
+- ".rspec"
 - ".travis.yml"
 - ".yardopts"
 - Gemfile
 - LICENSE
 - README.md
 - Rakefile
-- USAGE
-- ext/mkrf_conf.rb
 - lib/tf-idf-similarity.rb
 - lib/tf-idf-similarity/bm25_model.rb
 - lib/tf-idf-similarity/document.rb
@@ -102,7 +99,7 @@ files:
 - spec/tf_idf_model_spec.rb
 - spec/token_spec.rb
 - td-idf-similarity.gemspec
-homepage: http://github.com/opennorth/tf-idf-similarity
+homepage: https://github.com/jpmckinney/tf-idf-similarity
 licenses:
 - MIT
 metadata: {}
@@ -122,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.2.2
+rubygems_version: 2.4.5
 signing_key:
 specification_version: 4
 summary: Calculates the similarity between texts using tf*idf

data/USAGE DELETED

	@@ -1 +0,0 @@
1	- See README.md for full usage details.

data/ext/mkrf_conf.rb DELETED

@@ -1,15 +0,0 @@
-# @see http://www.programmersparadox.com/2012/05/21/gemspec-loading-dependent-gems-based-on-the-users-system/
-require 'rubygems/dependency_installer.rb'
-installer = Gem::DependencyInstaller.new
-begin
-  unless RUBY_VERSION < '1.9'
-    installer.install('unicode_utils', '>=0')
-  end
-rescue
-  exit(1)
-end
-f = File.open(File.join(File.dirname(__FILE__), "Rakefile"), "w")
-f.write("task :default\n")
-f.close