RubyGems - tf-idf-similarity - Versions diffs - 0.1.4 → 0.1.5 - Mend

tf-idf-similarity 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

checksums.yaml +4 -4
data/.rspec +2 -0
data/.travis.yml +13 -9
data/.yardopts +0 -1
data/Gemfile +3 -3
data/LICENSE +1 -1
data/README.md +49 -23
data/lib/tf-idf-similarity.rb +2 -6
data/lib/tf-idf-similarity/bm25_model.rb +1 -1
data/lib/tf-idf-similarity/document.rb +1 -1
data/lib/tf-idf-similarity/extras/tf_idf_model.rb +1 -1
data/lib/tf-idf-similarity/matrix_methods.rb +1 -1
data/lib/tf-idf-similarity/token.rb +3 -6
data/lib/tf-idf-similarity/version.rb +1 -1
data/spec/extras/tf_idf_model_spec.rb +3 -3
data/spec/spec_helper.rb +5 -1
data/spec/token_spec.rb +8 -0
data/td-idf-similarity.gemspec +6 -8
metadata +17 -20
data/USAGE +0 -1
data/ext/mkrf_conf.rb +0 -15

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: c0ba1f941db96541f035a283df336907bf941439
-  data.tar.gz: 22bbec24681023e880e1e4e3fa14d26356630021
+  metadata.gz: 736ca4c4b93d14ea046cbc4bdae930c8b88082be
+  data.tar.gz: 6b43e8356c59e0f48ac08f300186d4e12497368d
 SHA512:
-  metadata.gz: 9e7cca8d705d8080dff857d2d953a6f0091e361bb0693f0ce650e64a2f4633ad5db386fa41ea8b73ae1cfe839db8e4e9f56592c98b36cdc6ab756699ecfaa5f7
-  data.tar.gz: 3bcb9dcb07c9eb00c234920ff8d6340aac815c8181510d7ae65183e9b1d528001247439a86c6b603d973362bcee020eb0340558a9318693713cdaaa4b62a2ffd
+  metadata.gz: 635ea3047ba54a951020f95ab7e9412adf07a39d6042b85e605fcd0517345d506690bac11ab05f7f20e16f80106e95e8002fd1ae2ab4e466a27cc4f143ac15d6
+  data.tar.gz: 693ac6c70f9daf3f0a1ed06ba693d170654e7c871702641d598e59a6fc69cbd5316e76441da062aca05d0b8f67c0ba0c958a4115e2c9da17316a2ebef2190738

data/.rspec ADDED

	@@ -0,0 +1,2 @@
1	+ --color
2	+ --require spec_helper

data/.travis.yml CHANGED

@@ -1,21 +1,25 @@
+sudo: false
 language: ruby
+cache: bundler
 rvm:
-  - 1.9.2
   - 1.9.3
   - 2.0.0
   - 2.1.0
+  - 2.2.0
 env:
   - MATRIX_LIBRARY=gsl
   - MATRIX_LIBRARY=narray
   - MATRIX_LIBRARY=nmatrix
   - MATRIX_LIBRARY=matrix
+addons:
+  apt:
+    packages:
+    - gsl-bin
+    - libgsl0-dev
+    # Installing ATLAS will install BLAS.
+    - libatlas-dev
+    - libatlas-base-dev
+    - libatlas3gf-base
 before_install:
   - bundle config build.nmatrix --with-lapacklib
-  - if [ $MATRIX_LIBRARY = 'nmatrix' -o $MATRIX_LIBRARY = 'gsl' ]; then sudo apt-get update -qq; fi
-  - if [ $MATRIX_LIBRARY = 'gsl' ]; then sudo apt-get install gsl-bin libgsl0-dev; fi
-  # Installing ATLAS will install BLAS.
-  - if [ $MATRIX_LIBRARY = 'nmatrix' ]; then sudo apt-get install -qq libatlas-dev libatlas-base-dev libatlas3gf-base; fi
-  - if [ $MATRIX_LIBRARY = 'nmatrix' ]; then export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/atlas; fi
-# Travis sometimes runs without Bundler.
-install: bundle
-script: bundle exec rake --trace
+  - export CPLUS_INCLUDE_PATH=$CPLUS_INCLUDE_PATH:/usr/include/atlas

data/.yardopts CHANGED

@@ -1,4 +1,3 @@
---no-private
 --hide-void-return
 --embed-mixin ClassMethods
 --markup=markdown

data/Gemfile CHANGED

@@ -1,8 +1,8 @@
-source "http://rubygems.org"
+source 'https://rubygems.org'
-gem 'rb-gsl', '~> 1.16.0.2'     if ENV['MATRIX_LIBRARY'] == 'gsl'
+gem 'rb-gsl', '~> 1.16.0.2' if ENV['MATRIX_LIBRARY'] == 'gsl'
 gem 'narray', '~> 0.6.0.0' if ENV['MATRIX_LIBRARY'] == 'narray'
-gem 'nmatrix', '~> 0.1.0.rc5'  if ENV['MATRIX_LIBRARY'] == 'nmatrix' && RUBY_VERSION >= '1.9'
+gem 'nmatrix', '~> 0.1.0.rc5' if ENV['MATRIX_LIBRARY'] == 'nmatrix' && RUBY_VERSION >= '1.9'
 # Specify your gem's dependencies in the gemspec
 gemspec

data/LICENSE CHANGED

@@ -1,4 +1,4 @@
-Copyright (c) 2012 Open North Inc.
+Copyright (c) 2012 James McKinney
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the

data/README.md CHANGED

@@ -1,12 +1,12 @@
 # Ruby Vector Space Model (VSM) with tf*idf weights
-[![Gem Version](https://badge.fury.io/rb/tf-idf-similarity.svg)](http://badge.fury.io/rb/tf-idf-similarity)
-[![Build Status](https://secure.travis-ci.org/opennorth/tf-idf-similarity.png)](http://travis-ci.org/opennorth/tf-idf-similarity)
-[![Dependency Status](https://gemnasium.com/opennorth/tf-idf-similarity.png)](https://gemnasium.com/opennorth/tf-idf-similarity)
-[![Coverage Status](https://coveralls.io/repos/opennorth/tf-idf-similarity/badge.png?branch=master)](https://coveralls.io/r/opennorth/tf-idf-similarity)
-[![Code Climate](https://codeclimate.com/github/opennorth/tf-idf-similarity.png)](https://codeclimate.com/github/opennorth/tf-idf-similarity)
+[![Gem Version](https://badge.fury.io/rb/tf-idf-similarity.svg)](https://badge.fury.io/rb/tf-idf-similarity)
+[![Build Status](https://secure.travis-ci.org/jpmckinney/tf-idf-similarity.png)](https://travis-ci.org/jpmckinney/tf-idf-similarity)
+[![Dependency Status](https://gemnasium.com/jpmckinney/tf-idf-similarity.png)](https://gemnasium.com/jpmckinney/tf-idf-similarity)
+[![Coverage Status](https://coveralls.io/repos/jpmckinney/tf-idf-similarity/badge.png)](https://coveralls.io/r/jpmckinney/tf-idf-similarity)
+[![Code Climate](https://codeclimate.com/github/jpmckinney/tf-idf-similarity.png)](https://codeclimate.com/github/jpmckinney/tf-idf-similarity)
-Calculates the similarity between texts using a [bag-of-words](http://en.wikipedia.org/wiki/Bag_of_words_model) [Vector Space Model](http://en.wikipedia.org/wiki/Vector_space_model) with [Term Frequency-Inverse Document Frequency (tf*idf)](http://en.wikipedia.org/wiki/Tf*idf) weights. If your use case demands performance, use [Lucene](http://lucene.apache.org/core/) (see below).
+Calculates the similarity between texts using a [bag-of-words](https://en.wikipedia.org/wiki/Bag_of_words_model) [Vector Space Model](https://en.wikipedia.org/wiki/Vector_space_model) with [Term Frequency-Inverse Document Frequency (tf*idf)](https://en.wikipedia.org/wiki/Tf–idf) weights. If your use case demands performance, use [Lucene](http://lucene.apache.org/core/) (see below).
 ## Usage
@@ -24,13 +24,13 @@ document3 = TfIdfSimilarity::Document.new("Nam scelerisque dui sed leo...")
 corpus = [document1, document2, document3]
 ```
-Create a document-term matrix using [Term Frequency-Inverse Document Frequency function](http://en.wikipedia.org/wiki/):
+Create a document-term matrix using [Term Frequency-Inverse Document Frequency function](https://en.wikipedia.org/wiki/Tf–idf):
 ```ruby
 model = TfIdfSimilarity::TfIdfModel.new(corpus)
 ```
-Or, create a document-term matrix using the [Okapi BM25 ranking function](http://en.wikipedia.org/wiki/Okapi_BM25):
+Or, create a document-term matrix using the [Okapi BM25 ranking function](https://en.wikipedia.org/wiki/Okapi_BM25):
 ```ruby
 model = TfIdfSimilarity::BM25Model.new(corpus)
@@ -58,16 +58,46 @@ end
 puts tfidf_by_term.sort_by{|_,tfidf| -tfidf}
 ```
+Tokenize a document yourself, for example by excluding stop words:
+```ruby
+require 'unicode_utils'
+text = "Lorem ipsum dolor sit amet..."
+tokens = UnicodeUtils.each_word(text).to_a - ['and', 'the', 'to']
+document1 = TfIdfSimilarity::Document.new(text, :tokens => tokens)
+```
+Provide, by yourself, the number of times each term appears and the number of tokens in the document:
+```ruby
+require 'unicode_utils'
+text = "Lorem ipsum dolor sit amet..."
+tokens = UnicodeUtils.each_word(text).to_a - ['and', 'the', 'to']
+term_counts = Hash.new(0)
+size = 0
+tokens.each do |token|
+  # Unless the token is numeric.
+  unless token[/\A\d+\z/]
+    # Remove all punctuation from tokens.
+    term_counts[token.gsub(/\p{Punct}/, '')] += 1
+    size += 1
+  end
+end
+document1 = TfIdfSimilarity::Document.new(text, :term_counts => term_counts, :size => size)
+```
 [Read the documentation at RubyDoc.info.](http://rubydoc.info/gems/tf-idf-similarity)
 ## Speed
 Instead of using the Ruby Standard Library's [Matrix](http://www.ruby-doc.org/stdlib-2.0/libdoc/matrix/rdoc/Matrix.html) class, you can use one of the [GNU Scientific Library (GSL)](http://www.gnu.org/software/gsl/), [NArray](http://narray.rubyforge.org/) or [NMatrix](https://github.com/SciRuby/nmatrix) (0.0.9 or greater) gems for faster matrix operations. For example:
-    require 'gsl'
-    model = TfIdfSimilarity::TfIdfModel.new(corpus, :library => :gsl)
+    require 'narray'
+    model = TfIdfSimilarity::TfIdfModel.new(corpus, :library => :narray)
-The NMatrix gem gives access to [Automatically Tuned Linear Algebra Software (ATLAS)](http://math-atlas.sourceforge.net/), which you may know of through [Linear Algebra PACKage (LAPACK)](http://www.netlib.org/lapack/) or [Basic Linear Algebra Subprograms (BLAS)](http://www.netlib.org/blas/). Follow [these instructions](https://github.com/SciRuby/nmatrix#synopsis) to install the NMatrix gem. You may need [additional instructions for Mac OS X Lion](https://github.com/SciRuby/nmatrix/wiki/Installation).
+NArray seems to have the best performance of the three libraries.
+The NMatrix gem gives access to [Automatically Tuned Linear Algebra Software (ATLAS)](http://math-atlas.sourceforge.net/), which you may know of through [Linear Algebra PACKage (LAPACK)](http://www.netlib.org/lapack/) or [Basic Linear Algebra Subprograms (BLAS)](http://www.netlib.org/blas/). Follow [these instructions](https://github.com/SciRuby/nmatrix#installation) to install the NMatrix gem.
 ## Extras
@@ -76,7 +106,7 @@ You can access more term frequency, document frequency, and normalization formul
     require 'tf-idf-similarity/extras/document'
     require 'tf-idf-similarity/extras/tf_idf_model'
-The default tf*idf formula follows the [Lucene Conceptual Scoring Formula](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html).
+The default tf*idf formula follows the [Lucene Conceptual Scoring Formula](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/TFIDFSimilarity.html).
 ## Why?
@@ -115,17 +145,13 @@ Adapters for the following projects were also considered:
 ## Further Reading
-Lucene implements many more [similarity functions](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/Similarity.html), such as:
-* a [divergence from randomness (DFR) framework](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/DFRSimilarity.html)
-* a [framework for the family of information-based models](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/IBSimilarity.html)
-* a [language model with Bayesian smoothing using Dirichlet priors](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html)
-* a [language model with Jelinek-Mercer smoothing](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html)
-Lucene can even [combine similarity measures](http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/MultiSimilarity.html).
+Lucene implements many more [similarity functions](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/Similarity.html), such as:
-## Bugs? Questions?
+* a [divergence from randomness (DFR) framework](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/DFRSimilarity.html)
+* a [framework for the family of information-based models](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/IBSimilarity.html)
+* a [language model with Bayesian smoothing using Dirichlet priors](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/LMDirichletSimilarity.html)
+* a [language model with Jelinek-Mercer smoothing](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.html)
-This gem's main repository is on GitHub: [http://github.com/opennorth/tf-idf-similarity](http://github.com/opennorth/tf-idf-similarity), where your contributions, forks, bug reports, feature requests, and feedback are greatly welcomed.
+Lucene can even [combine similarity measures](http://lucene.apache.org/core/4_0_0/core/org/apache/lucene/search/similarities/MultiSimilarity.html).
-Copyright (c) 2012 Open North Inc., released under the MIT license
+Copyright (c) 2012 James McKinney, released under the MIT license

data/lib/tf-idf-similarity.rb CHANGED

@@ -1,12 +1,8 @@
 require 'forwardable'
 require 'set'
-begin
-  require 'unicode_utils/downcase'
-  require 'unicode_utils/each_word'
-rescue LoadError
-  # Ruby 1.8
-end
+require 'unicode_utils/downcase'
+require 'unicode_utils/each_word'
 module TfIdfSimilarity
 end

data/lib/tf-idf-similarity/bm25_model.rb CHANGED

@@ -1,7 +1,7 @@
 # A document-term matrix using the BM25 function.
 #
 # @see http://lucene.apache.org/core/4_0_0-BETA/core/org/apache/lucene/search/similarities/BM25Similarity.html
-# @see http://en.wikipedia.org/wiki/Okapi_BM25
+# @see https://en.wikipedia.org/wiki/Okapi_BM25
 module TfIdfSimilarity
   class BM25Model < Model
     # Return the term's inverse document frequency.

data/lib/tf-idf-similarity/document.rb CHANGED

@@ -76,7 +76,7 @@ module TfIdfSimilarity
     # @see http://unicode.org/reports/tr29/#Default_Word_Boundaries
     # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.StandardTokenizerFactory
     def tokenize(text)
-      @tokens || defined?(UnicodeUtils) && UnicodeUtils.each_word(text) || text.split(/\b/) # @todo Ruby 1.8 has no good word boundary code
+      @tokens || UnicodeUtils.each_word(text)
     end
   end
 end

data/lib/tf-idf-similarity/extras/tf_idf_model.rb CHANGED

@@ -110,7 +110,7 @@ module TfIdfSimilarity
     end
     alias_method :binary_tf, :binary_term_frequency
-    # @see http://en.wikipedia.org/wiki/Tf*idf
+    # @see https://en.wikipedia.org/wiki/Tf*idf
     # @see http://nlp.stanford.edu/IR-book/html/htmledition/maximum-tf-normalization-1.html
     def normalized_term_frequency(document, term, a = 0)
       a + (1 - a) * document.term_count(term) / document.maximum_term_count

data/lib/tf-idf-similarity/matrix_methods.rb CHANGED

@@ -34,7 +34,7 @@ module TfIdfSimilarity
           elsif column.respond_to?(:normalize)
             column.normalize
           else
-            column * (1 / Math.sqrt(column.inner_product(column))) # 1.8 does define division
+            column / Math.sqrt(column.inner_product(column))
           end
         end)
       end

data/lib/tf-idf-similarity/token.rb CHANGED

@@ -9,7 +9,7 @@
 # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.WordDelimiterFilterFactory
 # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory
 module TfIdfSimilarity
-  class Token < String
+  class Token < SimpleDelegator
     # Returns a falsy value if all its characters are numbers, punctuation,
     # whitespace or control characters.
     #
@@ -35,10 +35,7 @@ module TfIdfSimilarity
     #
     # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.LowerCaseFilterFactory
     def lowercase_filter
-      self.class.new(defined?(UnicodeUtils) ? UnicodeUtils.downcase(self) : tr(
-        "ÀÁÂÃÄÅĀĂĄÇĆĈĊČÐĎĐÈÉÊËĒĔĖĘĚĜĞĠĢĤĦÌÍÎÏĨĪĬĮĴĶĹĻĽĿŁÑŃŅŇŊÒÓÔÕÖØŌŎŐŔŖŘŚŜŞŠŢŤŦÙÚÛÜŨŪŬŮŰŲŴÝŶŸŹŻŽ",
-        "àáâãäåāăąçćĉċčðďđèéêëēĕėęěĝğġģĥħìíîïĩīĭįĵķĺļľŀłñńņňŋòóôõöøōŏőŕŗřśŝşšţťŧùúûüũūŭůűųŵýŷÿźżž"
-      ).downcase)
+      self.class.new(UnicodeUtils.downcase(self))
     end
     # Returns a string with no English possessive or periods in acronyms.
@@ -47,7 +44,7 @@ module TfIdfSimilarity
     #
     # @see http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.ClassicFilterFactory
     def classic_filter
-      self.class.new(self.gsub('.', '').chomp("'s"))
+      self.class.new(self.gsub('.', '').sub(/['`’]s\z/, ''))
     end
   end
 end

data/lib/tf-idf-similarity/version.rb CHANGED

@@ -1,3 +1,3 @@
 module TfIdfSimilarity
-  VERSION = "0.1.4"
+  VERSION = "0.1.5"
 end

data/spec/extras/tf_idf_model_spec.rb CHANGED

@@ -33,7 +33,7 @@ module TfIdfSimilarity
         build_model(documents)
       end
-      pending "Add #search"
+      skip "Add #search"
     end
     # @see https://github.com/bbcrd/Similarity/blob/master/test/test_corpus.rb
@@ -98,7 +98,7 @@ module TfIdfSimilarity
       end
       it 'should return the similarity matrix' do
-        pending "Calculate the tf*idf matrix like the similarity gem does"
+        skip "Calculate the tf*idf matrix like the similarity gem does"
       end
       it 'should return the number of documents in which a term appears' do
@@ -113,7 +113,7 @@ module TfIdfSimilarity
       end
       it 'should return the document vector' do
-        pending "Calculate the tf*idf matrix like the similarity gem does"
+        skip "Calculate the tf*idf matrix like the similarity gem does"
       end
     end

data/spec/spec_helper.rb CHANGED

@@ -1,7 +1,11 @@
 require 'rubygems'
+require 'simplecov'
 require 'coveralls'
-Coveralls.wear!
+SimpleCov.formatter = Coveralls::SimpleCov::Formatter
+SimpleCov.start do
+  add_filter 'spec'
+end
 require 'rspec'
 require File.dirname(__FILE__) + '/../lib/tf-idf-similarity'

data/spec/token_spec.rb CHANGED

@@ -28,6 +28,14 @@ module TfIdfSimilarity
         Token.new("foo's").classic_filter.should == 'foo'
       end
+      it 'should remove ending possessives with nonstandard apostrophe 1' do
+        Token.new("foo`s").classic_filter.should == 'foo'
+      end
+      it 'should remove ending possessives with nonstandard apostrophe 2' do
+        Token.new("foo’s").classic_filter.should == 'foo'
+      end
       it 'should not remove infix possessives' do
         Token.new("foo's bar").classic_filter.should == "foo's bar"
       end

data/td-idf-similarity.gemspec CHANGED

@@ -5,9 +5,8 @@ Gem::Specification.new do |s|
   s.name        = "tf-idf-similarity"
   s.version     = TfIdfSimilarity::VERSION
   s.platform    = Gem::Platform::RUBY
-  s.authors     = ["Open North"]
-  s.email       = ["info@opennorth.ca"]
-  s.homepage    = "http://github.com/opennorth/tf-idf-similarity"
+  s.authors     = ["James McKinney"]
+  s.homepage    = "https://github.com/jpmckinney/tf-idf-similarity"
   s.summary     = %q{Calculates the similarity between texts using tf*idf}
   s.license     = 'MIT'
@@ -16,10 +15,9 @@ Gem::Specification.new do |s|
   s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
   s.require_paths = ["lib"]
-  s.add_development_dependency('rspec', '~> 2.10')
-  s.add_development_dependency('rake')
-  s.add_development_dependency('coveralls')
-  s.add_development_dependency('mime-types', '~> 1.25') # 2.0 requires Ruby 1.9.2
+  s.add_runtime_dependency('unicode_utils', '~> 1.4')
-  s.extensions = ['ext/mkrf_conf.rb']
+  s.add_development_dependency('coveralls')
+  s.add_development_dependency('rake')
+  s.add_development_dependency('rspec', '~> 2.10')
 end

metadata CHANGED

@@ -1,31 +1,31 @@
 --- !ruby/object:Gem::Specification
 name: tf-idf-similarity
 version: !ruby/object:Gem::Version
-  version: 0.1.4
+  version: 0.1.5
 platform: ruby
 authors:
-- Open North
+- James McKinney
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-10-11 00:00:00.000000000 Z
+date: 2016-01-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
-  name: rspec
+  name: unicode_utils
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '2.10'
-  type: :development
+        version: '1.4'
+  type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '2.10'
+        version: '1.4'
 - !ruby/object:Gem::Dependency
-  name: rake
+  name: coveralls
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
@@ -39,7 +39,7 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '0'
 - !ruby/object:Gem::Dependency
-  name: coveralls
+  name: rake
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
@@ -53,36 +53,33 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '0'
 - !ruby/object:Gem::Dependency
-  name: mime-types
+  name: rspec
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.25'
+        version: '2.10'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.25'
+        version: '2.10'
 description:
-email:
-- info@opennorth.ca
+email:
 executables: []
-extensions:
-- ext/mkrf_conf.rb
+extensions: []
 extra_rdoc_files: []
 files:
 - ".gitignore"
+- ".rspec"
 - ".travis.yml"
 - ".yardopts"
 - Gemfile
 - LICENSE
 - README.md
 - Rakefile
-- USAGE
-- ext/mkrf_conf.rb
 - lib/tf-idf-similarity.rb
 - lib/tf-idf-similarity/bm25_model.rb
 - lib/tf-idf-similarity/document.rb
@@ -102,7 +99,7 @@ files:
 - spec/tf_idf_model_spec.rb
 - spec/token_spec.rb
 - td-idf-similarity.gemspec
-homepage: http://github.com/opennorth/tf-idf-similarity
+homepage: https://github.com/jpmckinney/tf-idf-similarity
 licenses:
 - MIT
 metadata: {}
@@ -122,7 +119,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.2.2
+rubygems_version: 2.4.5
 signing_key:
 specification_version: 4
 summary: Calculates the similarity between texts using tf*idf

data/USAGE DELETED

	@@ -1 +0,0 @@
1	- See README.md for full usage details.

data/ext/mkrf_conf.rb DELETED

@@ -1,15 +0,0 @@
-# @see http://www.programmersparadox.com/2012/05/21/gemspec-loading-dependent-gems-based-on-the-users-system/
-require 'rubygems/dependency_installer.rb'
-installer = Gem::DependencyInstaller.new
-begin
-  unless RUBY_VERSION < '1.9'
-    installer.install('unicode_utils', '>=0')
-  end
-rescue
-  exit(1)
-end
-f = File.open(File.join(File.dirname(__FILE__), "Rakefile"), "w")
-f.write("task :default\n")
-f.close